Bug 418702 - ARMv8.1 Paired register compare-and-swap instructions are not supported.
[valgrind.git] / VEX / priv / host_arm64_defs.c
blobe4ef5698683df260efccd97e53050c3f7fbf5cbf
2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm64_defs.c ---*/
4 /*---------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2013-2017 OpenWorks
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
29 #include "libvex_basictypes.h"
30 #include "libvex.h"
31 #include "libvex_trc_values.h"
33 #include "main_util.h"
34 #include "host_generic_regs.h"
35 #include "host_arm64_defs.h"
38 /* --------- Registers. --------- */
40 /* The usual HReg abstraction. We use the following classes only:
41 X regs (64 bit int)
42 D regs (64 bit float, also used for 32 bit float)
43 Q regs (128 bit vector)
46 const RRegUniverse* getRRegUniverse_ARM64 ( void )
48 /* The real-register universe is a big constant, so we just want to
49 initialise it once. */
50 static RRegUniverse rRegUniverse_ARM64;
51 static Bool rRegUniverse_ARM64_initted = False;
53 /* Handy shorthand, nothing more */
54 RRegUniverse* ru = &rRegUniverse_ARM64;
56 /* This isn't thread-safe. Sigh. */
57 if (LIKELY(rRegUniverse_ARM64_initted))
58 return ru;
60 RRegUniverse__init(ru);
62 /* Add the registers. The initial segment of this array must be
63 those available for allocation by reg-alloc, and those that
64 follow are not available for allocation. */
65 ru->allocable_start[HRcInt64] = ru->size;
66 ru->regs[ru->size++] = hregARM64_X22();
67 ru->regs[ru->size++] = hregARM64_X23();
68 ru->regs[ru->size++] = hregARM64_X24();
69 ru->regs[ru->size++] = hregARM64_X25();
70 ru->regs[ru->size++] = hregARM64_X26();
71 ru->regs[ru->size++] = hregARM64_X27();
72 ru->regs[ru->size++] = hregARM64_X28();
74 ru->regs[ru->size++] = hregARM64_X0();
75 ru->regs[ru->size++] = hregARM64_X1();
76 ru->regs[ru->size++] = hregARM64_X2();
77 ru->regs[ru->size++] = hregARM64_X3();
78 ru->regs[ru->size++] = hregARM64_X4();
79 ru->regs[ru->size++] = hregARM64_X5();
80 ru->regs[ru->size++] = hregARM64_X6();
81 ru->regs[ru->size++] = hregARM64_X7();
82 ru->allocable_end[HRcInt64] = ru->size - 1;
83 // X8 is used as a ProfInc temporary, not available to regalloc.
84 // X9 is a chaining/spill temporary, not available to regalloc.
86 // Do we really need all these?
87 //ru->regs[ru->size++] = hregARM64_X10();
88 //ru->regs[ru->size++] = hregARM64_X11();
89 //ru->regs[ru->size++] = hregARM64_X12();
90 //ru->regs[ru->size++] = hregARM64_X13();
91 //ru->regs[ru->size++] = hregARM64_X14();
92 //ru->regs[ru->size++] = hregARM64_X15();
93 // X21 is the guest state pointer, not available to regalloc.
95 // vector regs. Unfortunately not callee-saved.
96 ru->allocable_start[HRcVec128] = ru->size;
97 ru->regs[ru->size++] = hregARM64_Q16();
98 ru->regs[ru->size++] = hregARM64_Q17();
99 ru->regs[ru->size++] = hregARM64_Q18();
100 ru->regs[ru->size++] = hregARM64_Q19();
101 ru->regs[ru->size++] = hregARM64_Q20();
102 ru->allocable_end[HRcVec128] = ru->size - 1;
104 // F64 regs, all of which are callee-saved
105 ru->allocable_start[HRcFlt64] = ru->size;
106 ru->regs[ru->size++] = hregARM64_D8();
107 ru->regs[ru->size++] = hregARM64_D9();
108 ru->regs[ru->size++] = hregARM64_D10();
109 ru->regs[ru->size++] = hregARM64_D11();
110 ru->regs[ru->size++] = hregARM64_D12();
111 ru->regs[ru->size++] = hregARM64_D13();
112 ru->allocable_end[HRcFlt64] = ru->size - 1;
114 ru->allocable = ru->size;
115 /* And other regs, not available to the allocator. */
117 // unavail: x21 as GSP
118 // x8 is used as a ProfInc temporary
119 // x9 is used as a spill/reload/chaining/call temporary
120 // x30 as LR
121 // x31 because dealing with the SP-vs-ZR overloading is too
122 // confusing, and we don't need to do so, so let's just avoid
123 // the problem
125 // Currently, we have 15 allocatable integer registers:
126 // 0 1 2 3 4 5 6 7 22 23 24 25 26 27 28
128 // Hence for the allocatable integer registers we have:
130 // callee-saved: 22 23 24 25 26 27 28
131 // caller-saved: 0 1 2 3 4 5 6 7
133 // If the set of available registers changes or if the e/r status
134 // changes, be sure to re-check/sync the definition of
135 // getRegUsage for ARM64Instr_Call too.
137 ru->regs[ru->size++] = hregARM64_X8();
138 ru->regs[ru->size++] = hregARM64_X9();
139 ru->regs[ru->size++] = hregARM64_X21();
141 rRegUniverse_ARM64_initted = True;
143 RRegUniverse__check_is_sane(ru);
144 return ru;
148 UInt ppHRegARM64 ( HReg reg ) {
149 Int r;
150 /* Be generic for all virtual regs. */
151 if (hregIsVirtual(reg)) {
152 return ppHReg(reg);
154 /* But specific for real regs. */
155 switch (hregClass(reg)) {
156 case HRcInt64:
157 r = hregEncoding(reg);
158 vassert(r >= 0 && r < 31);
159 return vex_printf("x%d", r);
160 case HRcFlt64:
161 r = hregEncoding(reg);
162 vassert(r >= 0 && r < 32);
163 return vex_printf("d%d", r);
164 case HRcVec128:
165 r = hregEncoding(reg);
166 vassert(r >= 0 && r < 32);
167 return vex_printf("q%d", r);
168 default:
169 vpanic("ppHRegARM64");
173 static UInt ppHRegARM64asSreg ( HReg reg ) {
174 UInt written = ppHRegARM64(reg);
175 written += vex_printf("(S-reg)");
176 return written;
179 static UInt ppHRegARM64asHreg ( HReg reg ) {
180 UInt written = ppHRegARM64(reg);
181 written += vex_printf("(H-reg)");
182 return written;
186 /* --------- Condition codes, ARM64 encoding. --------- */
188 static const HChar* showARM64CondCode ( ARM64CondCode cond ) {
189 switch (cond) {
190 case ARM64cc_EQ: return "eq";
191 case ARM64cc_NE: return "ne";
192 case ARM64cc_CS: return "cs";
193 case ARM64cc_CC: return "cc";
194 case ARM64cc_MI: return "mi";
195 case ARM64cc_PL: return "pl";
196 case ARM64cc_VS: return "vs";
197 case ARM64cc_VC: return "vc";
198 case ARM64cc_HI: return "hi";
199 case ARM64cc_LS: return "ls";
200 case ARM64cc_GE: return "ge";
201 case ARM64cc_LT: return "lt";
202 case ARM64cc_GT: return "gt";
203 case ARM64cc_LE: return "le";
204 case ARM64cc_AL: return "al"; // default
205 case ARM64cc_NV: return "nv";
206 default: vpanic("showARM64CondCode");
211 /* --------- Memory address expressions (amodes). --------- */
213 ARM64AMode* ARM64AMode_RI9 ( HReg reg, Int simm9 ) {
214 ARM64AMode* am = LibVEX_Alloc_inline(sizeof(ARM64AMode));
215 am->tag = ARM64am_RI9;
216 am->ARM64am.RI9.reg = reg;
217 am->ARM64am.RI9.simm9 = simm9;
218 vassert(-256 <= simm9 && simm9 <= 255);
219 return am;
222 ARM64AMode* ARM64AMode_RI12 ( HReg reg, Int uimm12, UChar szB ) {
223 ARM64AMode* am = LibVEX_Alloc_inline(sizeof(ARM64AMode));
224 am->tag = ARM64am_RI12;
225 am->ARM64am.RI12.reg = reg;
226 am->ARM64am.RI12.uimm12 = uimm12;
227 am->ARM64am.RI12.szB = szB;
228 vassert(uimm12 >= 0 && uimm12 <= 4095);
229 switch (szB) {
230 case 1: case 2: case 4: case 8: break;
231 default: vassert(0);
233 return am;
236 ARM64AMode* ARM64AMode_RR ( HReg base, HReg index ) {
237 ARM64AMode* am = LibVEX_Alloc_inline(sizeof(ARM64AMode));
238 am->tag = ARM64am_RR;
239 am->ARM64am.RR.base = base;
240 am->ARM64am.RR.index = index;
241 return am;
244 static void ppARM64AMode ( ARM64AMode* am ) {
245 switch (am->tag) {
246 case ARM64am_RI9:
247 vex_printf("%d(", am->ARM64am.RI9.simm9);
248 ppHRegARM64(am->ARM64am.RI9.reg);
249 vex_printf(")");
250 break;
251 case ARM64am_RI12:
252 vex_printf("%u(", (UInt)am->ARM64am.RI12.szB
253 * (UInt)am->ARM64am.RI12.uimm12);
254 ppHRegARM64(am->ARM64am.RI12.reg);
255 vex_printf(")");
256 break;
257 case ARM64am_RR:
258 vex_printf("(");
259 ppHRegARM64(am->ARM64am.RR.base);
260 vex_printf(",");
261 ppHRegARM64(am->ARM64am.RR.index);
262 vex_printf(")");
263 break;
264 default:
265 vassert(0);
269 static void addRegUsage_ARM64AMode ( HRegUsage* u, ARM64AMode* am ) {
270 switch (am->tag) {
271 case ARM64am_RI9:
272 addHRegUse(u, HRmRead, am->ARM64am.RI9.reg);
273 return;
274 case ARM64am_RI12:
275 addHRegUse(u, HRmRead, am->ARM64am.RI12.reg);
276 return;
277 case ARM64am_RR:
278 addHRegUse(u, HRmRead, am->ARM64am.RR.base);
279 addHRegUse(u, HRmRead, am->ARM64am.RR.index);
280 return;
281 default:
282 vpanic("addRegUsage_ARM64Amode");
286 static void mapRegs_ARM64AMode ( HRegRemap* m, ARM64AMode* am ) {
287 switch (am->tag) {
288 case ARM64am_RI9:
289 am->ARM64am.RI9.reg = lookupHRegRemap(m, am->ARM64am.RI9.reg);
290 return;
291 case ARM64am_RI12:
292 am->ARM64am.RI12.reg = lookupHRegRemap(m, am->ARM64am.RI12.reg);
293 return;
294 case ARM64am_RR:
295 am->ARM64am.RR.base = lookupHRegRemap(m, am->ARM64am.RR.base);
296 am->ARM64am.RR.index = lookupHRegRemap(m, am->ARM64am.RR.index);
297 return;
298 default:
299 vpanic("mapRegs_ARM64Amode");
304 /* --------- Reg or uimm12<<{0,12} operands --------- */
306 ARM64RIA* ARM64RIA_I12 ( UShort imm12, UChar shift ) {
307 ARM64RIA* riA = LibVEX_Alloc_inline(sizeof(ARM64RIA));
308 riA->tag = ARM64riA_I12;
309 riA->ARM64riA.I12.imm12 = imm12;
310 riA->ARM64riA.I12.shift = shift;
311 vassert(imm12 < 4096);
312 vassert(shift == 0 || shift == 12);
313 return riA;
315 ARM64RIA* ARM64RIA_R ( HReg reg ) {
316 ARM64RIA* riA = LibVEX_Alloc_inline(sizeof(ARM64RIA));
317 riA->tag = ARM64riA_R;
318 riA->ARM64riA.R.reg = reg;
319 return riA;
322 static void ppARM64RIA ( ARM64RIA* riA ) {
323 switch (riA->tag) {
324 case ARM64riA_I12:
325 vex_printf("#%u",(UInt)(riA->ARM64riA.I12.imm12
326 << riA->ARM64riA.I12.shift));
327 break;
328 case ARM64riA_R:
329 ppHRegARM64(riA->ARM64riA.R.reg);
330 break;
331 default:
332 vassert(0);
336 static void addRegUsage_ARM64RIA ( HRegUsage* u, ARM64RIA* riA ) {
337 switch (riA->tag) {
338 case ARM64riA_I12:
339 return;
340 case ARM64riA_R:
341 addHRegUse(u, HRmRead, riA->ARM64riA.R.reg);
342 return;
343 default:
344 vpanic("addRegUsage_ARM64RIA");
348 static void mapRegs_ARM64RIA ( HRegRemap* m, ARM64RIA* riA ) {
349 switch (riA->tag) {
350 case ARM64riA_I12:
351 return;
352 case ARM64riA_R:
353 riA->ARM64riA.R.reg = lookupHRegRemap(m, riA->ARM64riA.R.reg);
354 return;
355 default:
356 vpanic("mapRegs_ARM64RIA");
361 /* --------- Reg or "bitfield" (logic immediate) operands --------- */
363 ARM64RIL* ARM64RIL_I13 ( UChar bitN, UChar immR, UChar immS ) {
364 ARM64RIL* riL = LibVEX_Alloc_inline(sizeof(ARM64RIL));
365 riL->tag = ARM64riL_I13;
366 riL->ARM64riL.I13.bitN = bitN;
367 riL->ARM64riL.I13.immR = immR;
368 riL->ARM64riL.I13.immS = immS;
369 vassert(bitN < 2);
370 vassert(immR < 64);
371 vassert(immS < 64);
372 return riL;
374 ARM64RIL* ARM64RIL_R ( HReg reg ) {
375 ARM64RIL* riL = LibVEX_Alloc_inline(sizeof(ARM64RIL));
376 riL->tag = ARM64riL_R;
377 riL->ARM64riL.R.reg = reg;
378 return riL;
381 static void ppARM64RIL ( ARM64RIL* riL ) {
382 switch (riL->tag) {
383 case ARM64riL_I13:
384 vex_printf("#nrs(%u,%u,%u)",
385 (UInt)riL->ARM64riL.I13.bitN,
386 (UInt)riL->ARM64riL.I13.immR,
387 (UInt)riL->ARM64riL.I13.immS);
388 break;
389 case ARM64riL_R:
390 ppHRegARM64(riL->ARM64riL.R.reg);
391 break;
392 default:
393 vassert(0);
397 static void addRegUsage_ARM64RIL ( HRegUsage* u, ARM64RIL* riL ) {
398 switch (riL->tag) {
399 case ARM64riL_I13:
400 return;
401 case ARM64riL_R:
402 addHRegUse(u, HRmRead, riL->ARM64riL.R.reg);
403 return;
404 default:
405 vpanic("addRegUsage_ARM64RIL");
409 static void mapRegs_ARM64RIL ( HRegRemap* m, ARM64RIL* riL ) {
410 switch (riL->tag) {
411 case ARM64riL_I13:
412 return;
413 case ARM64riL_R:
414 riL->ARM64riL.R.reg = lookupHRegRemap(m, riL->ARM64riL.R.reg);
415 return;
416 default:
417 vpanic("mapRegs_ARM64RIL");
422 /* --------------- Reg or uimm6 operands --------------- */
424 ARM64RI6* ARM64RI6_I6 ( UInt imm6 ) {
425 ARM64RI6* ri6 = LibVEX_Alloc_inline(sizeof(ARM64RI6));
426 ri6->tag = ARM64ri6_I6;
427 ri6->ARM64ri6.I6.imm6 = imm6;
428 vassert(imm6 > 0 && imm6 < 64);
429 return ri6;
431 ARM64RI6* ARM64RI6_R ( HReg reg ) {
432 ARM64RI6* ri6 = LibVEX_Alloc_inline(sizeof(ARM64RI6));
433 ri6->tag = ARM64ri6_R;
434 ri6->ARM64ri6.R.reg = reg;
435 return ri6;
438 static void ppARM64RI6 ( ARM64RI6* ri6 ) {
439 switch (ri6->tag) {
440 case ARM64ri6_I6:
441 vex_printf("#%u", ri6->ARM64ri6.I6.imm6);
442 break;
443 case ARM64ri6_R:
444 ppHRegARM64(ri6->ARM64ri6.R.reg);
445 break;
446 default:
447 vassert(0);
451 static void addRegUsage_ARM64RI6 ( HRegUsage* u, ARM64RI6* ri6 ) {
452 switch (ri6->tag) {
453 case ARM64ri6_I6:
454 return;
455 case ARM64ri6_R:
456 addHRegUse(u, HRmRead, ri6->ARM64ri6.R.reg);
457 return;
458 default:
459 vpanic("addRegUsage_ARM64RI6");
463 static void mapRegs_ARM64RI6 ( HRegRemap* m, ARM64RI6* ri6 ) {
464 switch (ri6->tag) {
465 case ARM64ri6_I6:
466 return;
467 case ARM64ri6_R:
468 ri6->ARM64ri6.R.reg = lookupHRegRemap(m, ri6->ARM64ri6.R.reg);
469 return;
470 default:
471 vpanic("mapRegs_ARM64RI6");
476 /* --------- Instructions. --------- */
478 static const HChar* showARM64LogicOp ( ARM64LogicOp op ) {
479 switch (op) {
480 case ARM64lo_AND: return "and";
481 case ARM64lo_OR: return "orr";
482 case ARM64lo_XOR: return "eor";
483 default: vpanic("showARM64LogicOp");
487 static const HChar* showARM64ShiftOp ( ARM64ShiftOp op ) {
488 switch (op) {
489 case ARM64sh_SHL: return "lsl";
490 case ARM64sh_SHR: return "lsr";
491 case ARM64sh_SAR: return "asr";
492 default: vpanic("showARM64ShiftOp");
496 static const HChar* showARM64UnaryOp ( ARM64UnaryOp op ) {
497 switch (op) {
498 case ARM64un_NEG: return "neg";
499 case ARM64un_NOT: return "not";
500 case ARM64un_CLZ: return "clz";
501 default: vpanic("showARM64UnaryOp");
505 static const HChar* showARM64MulOp ( ARM64MulOp op ) {
506 switch (op) {
507 case ARM64mul_PLAIN: return "mul ";
508 case ARM64mul_ZX: return "umulh";
509 case ARM64mul_SX: return "smulh";
510 default: vpanic("showARM64MulOp");
514 static void characteriseARM64CvtOp ( /*OUT*/HChar* syn,
515 /*OUT*/UInt* fszB, /*OUT*/UInt* iszB,
516 ARM64CvtOp op ) {
517 switch (op) {
518 case ARM64cvt_F32_I32S:
519 *syn = 's'; *fszB = 4; *iszB = 4; break;
520 case ARM64cvt_F64_I32S:
521 *syn = 's'; *fszB = 8; *iszB = 4; break;
522 case ARM64cvt_F32_I64S:
523 *syn = 's'; *fszB = 4; *iszB = 8; break;
524 case ARM64cvt_F64_I64S:
525 *syn = 's'; *fszB = 8; *iszB = 8; break;
526 case ARM64cvt_F32_I32U:
527 *syn = 'u'; *fszB = 4; *iszB = 4; break;
528 case ARM64cvt_F64_I32U:
529 *syn = 'u'; *fszB = 8; *iszB = 4; break;
530 case ARM64cvt_F32_I64U:
531 *syn = 'u'; *fszB = 4; *iszB = 8; break;
532 case ARM64cvt_F64_I64U:
533 *syn = 'u'; *fszB = 8; *iszB = 8; break;
534 default:
535 vpanic("characteriseARM64CvtOp");
539 static const HChar* showARM64FpBinOp ( ARM64FpBinOp op ) {
540 switch (op) {
541 case ARM64fpb_ADD: return "add";
542 case ARM64fpb_SUB: return "sub";
543 case ARM64fpb_MUL: return "mul";
544 case ARM64fpb_DIV: return "div";
545 default: vpanic("showARM64FpBinOp");
549 static const HChar* showARM64FpUnaryOp ( ARM64FpUnaryOp op ) {
550 switch (op) {
551 case ARM64fpu_NEG: return "neg ";
552 case ARM64fpu_ABS: return "abs ";
553 case ARM64fpu_SQRT: return "sqrt ";
554 case ARM64fpu_RINT: return "rinti";
555 case ARM64fpu_RECPX: return "recpx";
556 default: vpanic("showARM64FpUnaryOp");
560 static void showARM64VecBinOp(/*OUT*/const HChar** nm,
561 /*OUT*/const HChar** ar, ARM64VecBinOp op ) {
562 switch (op) {
563 case ARM64vecb_ADD64x2: *nm = "add "; *ar = "2d"; return;
564 case ARM64vecb_ADD32x4: *nm = "add "; *ar = "4s"; return;
565 case ARM64vecb_ADD16x8: *nm = "add "; *ar = "8h"; return;
566 case ARM64vecb_ADD8x16: *nm = "add "; *ar = "16b"; return;
567 case ARM64vecb_SUB64x2: *nm = "sub "; *ar = "2d"; return;
568 case ARM64vecb_SUB32x4: *nm = "sub "; *ar = "4s"; return;
569 case ARM64vecb_SUB16x8: *nm = "sub "; *ar = "8h"; return;
570 case ARM64vecb_SUB8x16: *nm = "sub "; *ar = "16b"; return;
571 case ARM64vecb_MUL32x4: *nm = "mul "; *ar = "4s"; return;
572 case ARM64vecb_MUL16x8: *nm = "mul "; *ar = "8h"; return;
573 case ARM64vecb_MUL8x16: *nm = "mul "; *ar = "16b"; return;
574 case ARM64vecb_FADD64x2: *nm = "fadd "; *ar = "2d"; return;
575 case ARM64vecb_FSUB64x2: *nm = "fsub "; *ar = "2d"; return;
576 case ARM64vecb_FMUL64x2: *nm = "fmul "; *ar = "2d"; return;
577 case ARM64vecb_FDIV64x2: *nm = "fdiv "; *ar = "2d"; return;
578 case ARM64vecb_FADD32x4: *nm = "fadd "; *ar = "4s"; return;
579 case ARM64vecb_FSUB32x4: *nm = "fsub "; *ar = "4s"; return;
580 case ARM64vecb_FMUL32x4: *nm = "fmul "; *ar = "4s"; return;
581 case ARM64vecb_FDIV32x4: *nm = "fdiv "; *ar = "4s"; return;
582 case ARM64vecb_FMAX64x2: *nm = "fmax "; *ar = "2d"; return;
583 case ARM64vecb_FMAX32x4: *nm = "fmax "; *ar = "4s"; return;
584 case ARM64vecb_FMIN64x2: *nm = "fmin "; *ar = "2d"; return;
585 case ARM64vecb_FMIN32x4: *nm = "fmin "; *ar = "4s"; return;
586 case ARM64vecb_UMAX32x4: *nm = "umax "; *ar = "4s"; return;
587 case ARM64vecb_UMAX16x8: *nm = "umax "; *ar = "8h"; return;
588 case ARM64vecb_UMAX8x16: *nm = "umax "; *ar = "16b"; return;
589 case ARM64vecb_UMIN32x4: *nm = "umin "; *ar = "4s"; return;
590 case ARM64vecb_UMIN16x8: *nm = "umin "; *ar = "8h"; return;
591 case ARM64vecb_UMIN8x16: *nm = "umin "; *ar = "16b"; return;
592 case ARM64vecb_SMAX32x4: *nm = "smax "; *ar = "4s"; return;
593 case ARM64vecb_SMAX16x8: *nm = "smax "; *ar = "8h"; return;
594 case ARM64vecb_SMAX8x16: *nm = "smax "; *ar = "16b"; return;
595 case ARM64vecb_SMIN32x4: *nm = "smin "; *ar = "4s"; return;
596 case ARM64vecb_SMIN16x8: *nm = "smin "; *ar = "8h"; return;
597 case ARM64vecb_SMIN8x16: *nm = "smin "; *ar = "16b"; return;
598 case ARM64vecb_AND: *nm = "and "; *ar = "16b"; return;
599 case ARM64vecb_ORR: *nm = "orr "; *ar = "16b"; return;
600 case ARM64vecb_XOR: *nm = "eor "; *ar = "16b"; return;
601 case ARM64vecb_CMEQ64x2: *nm = "cmeq "; *ar = "2d"; return;
602 case ARM64vecb_CMEQ32x4: *nm = "cmeq "; *ar = "4s"; return;
603 case ARM64vecb_CMEQ16x8: *nm = "cmeq "; *ar = "8h"; return;
604 case ARM64vecb_CMEQ8x16: *nm = "cmeq "; *ar = "16b"; return;
605 case ARM64vecb_CMHI64x2: *nm = "cmhi "; *ar = "2d"; return;
606 case ARM64vecb_CMHI32x4: *nm = "cmhi "; *ar = "4s"; return;
607 case ARM64vecb_CMHI16x8: *nm = "cmhi "; *ar = "8h"; return;
608 case ARM64vecb_CMHI8x16: *nm = "cmhi "; *ar = "16b"; return;
609 case ARM64vecb_CMGT64x2: *nm = "cmgt "; *ar = "2d"; return;
610 case ARM64vecb_CMGT32x4: *nm = "cmgt "; *ar = "4s"; return;
611 case ARM64vecb_CMGT16x8: *nm = "cmgt "; *ar = "8h"; return;
612 case ARM64vecb_CMGT8x16: *nm = "cmgt "; *ar = "16b"; return;
613 case ARM64vecb_FCMEQ64x2: *nm = "fcmeq "; *ar = "2d"; return;
614 case ARM64vecb_FCMEQ32x4: *nm = "fcmeq "; *ar = "4s"; return;
615 case ARM64vecb_FCMGE64x2: *nm = "fcmge "; *ar = "2d"; return;
616 case ARM64vecb_FCMGE32x4: *nm = "fcmge "; *ar = "4s"; return;
617 case ARM64vecb_FCMGT64x2: *nm = "fcmgt "; *ar = "2d"; return;
618 case ARM64vecb_FCMGT32x4: *nm = "fcmgt "; *ar = "4s"; return;
619 case ARM64vecb_TBL1: *nm = "tbl "; *ar = "16b"; return;
620 case ARM64vecb_UZP164x2: *nm = "uzp1 "; *ar = "2d"; return;
621 case ARM64vecb_UZP132x4: *nm = "uzp1 "; *ar = "4s"; return;
622 case ARM64vecb_UZP116x8: *nm = "uzp1 "; *ar = "8h"; return;
623 case ARM64vecb_UZP18x16: *nm = "uzp1 "; *ar = "16b"; return;
624 case ARM64vecb_UZP264x2: *nm = "uzp2 "; *ar = "2d"; return;
625 case ARM64vecb_UZP232x4: *nm = "uzp2 "; *ar = "4s"; return;
626 case ARM64vecb_UZP216x8: *nm = "uzp2 "; *ar = "8h"; return;
627 case ARM64vecb_UZP28x16: *nm = "uzp2 "; *ar = "16b"; return;
628 case ARM64vecb_ZIP132x4: *nm = "zip1 "; *ar = "4s"; return;
629 case ARM64vecb_ZIP116x8: *nm = "zip1 "; *ar = "8h"; return;
630 case ARM64vecb_ZIP18x16: *nm = "zip1 "; *ar = "16b"; return;
631 case ARM64vecb_ZIP232x4: *nm = "zip2 "; *ar = "4s"; return;
632 case ARM64vecb_ZIP216x8: *nm = "zip2 "; *ar = "8h"; return;
633 case ARM64vecb_ZIP28x16: *nm = "zip2 "; *ar = "16b"; return;
634 case ARM64vecb_PMUL8x16: *nm = "pmul "; *ar = "16b"; return;
635 case ARM64vecb_PMULL8x8: *nm = "pmull "; *ar = "8hbb"; return;
636 case ARM64vecb_UMULL2DSS: *nm = "umull "; *ar = "2dss"; return;
637 case ARM64vecb_UMULL4SHH: *nm = "umull "; *ar = "4shh"; return;
638 case ARM64vecb_UMULL8HBB: *nm = "umull "; *ar = "8hbb"; return;
639 case ARM64vecb_SMULL2DSS: *nm = "smull "; *ar = "2dss"; return;
640 case ARM64vecb_SMULL4SHH: *nm = "smull "; *ar = "4shh"; return;
641 case ARM64vecb_SMULL8HBB: *nm = "smull "; *ar = "8hbb"; return;
642 case ARM64vecb_SQADD64x2: *nm = "sqadd "; *ar = "2d"; return;
643 case ARM64vecb_SQADD32x4: *nm = "sqadd "; *ar = "4s"; return;
644 case ARM64vecb_SQADD16x8: *nm = "sqadd "; *ar = "8h"; return;
645 case ARM64vecb_SQADD8x16: *nm = "sqadd "; *ar = "16b"; return;
646 case ARM64vecb_UQADD64x2: *nm = "uqadd "; *ar = "2d"; return;
647 case ARM64vecb_UQADD32x4: *nm = "uqadd "; *ar = "4s"; return;
648 case ARM64vecb_UQADD16x8: *nm = "uqadd "; *ar = "8h"; return;
649 case ARM64vecb_UQADD8x16: *nm = "uqadd "; *ar = "16b"; return;
650 case ARM64vecb_SQSUB64x2: *nm = "sqsub "; *ar = "2d"; return;
651 case ARM64vecb_SQSUB32x4: *nm = "sqsub "; *ar = "4s"; return;
652 case ARM64vecb_SQSUB16x8: *nm = "sqsub "; *ar = "8h"; return;
653 case ARM64vecb_SQSUB8x16: *nm = "sqsub "; *ar = "16b"; return;
654 case ARM64vecb_UQSUB64x2: *nm = "uqsub "; *ar = "2d"; return;
655 case ARM64vecb_UQSUB32x4: *nm = "uqsub "; *ar = "4s"; return;
656 case ARM64vecb_UQSUB16x8: *nm = "uqsub "; *ar = "8h"; return;
657 case ARM64vecb_UQSUB8x16: *nm = "uqsub "; *ar = "16b"; return;
658 case ARM64vecb_SQDMULL2DSS: *nm = "sqdmull"; *ar = "2dss"; return;
659 case ARM64vecb_SQDMULL4SHH: *nm = "sqdmull"; *ar = "4shh"; return;
660 case ARM64vecb_SQDMULH32x4: *nm = "sqdmulh"; *ar = "4s"; return;
661 case ARM64vecb_SQDMULH16x8: *nm = "sqdmulh"; *ar = "8h"; return;
662 case ARM64vecb_SQRDMULH32x4: *nm = "sqrdmulh"; *ar = "4s"; return;
663 case ARM64vecb_SQRDMULH16x8: *nm = "sqrdmulh"; *ar = "8h"; return;
664 case ARM64vecb_SQSHL64x2: *nm = "sqshl "; *ar = "2d"; return;
665 case ARM64vecb_SQSHL32x4: *nm = "sqshl "; *ar = "4s"; return;
666 case ARM64vecb_SQSHL16x8: *nm = "sqshl "; *ar = "8h"; return;
667 case ARM64vecb_SQSHL8x16: *nm = "sqshl "; *ar = "16b"; return;
668 case ARM64vecb_UQSHL64x2: *nm = "uqshl "; *ar = "2d"; return;
669 case ARM64vecb_UQSHL32x4: *nm = "uqshl "; *ar = "4s"; return;
670 case ARM64vecb_UQSHL16x8: *nm = "uqshl "; *ar = "8h"; return;
671 case ARM64vecb_UQSHL8x16: *nm = "uqshl "; *ar = "16b"; return;
672 case ARM64vecb_SQRSHL64x2: *nm = "sqrshl"; *ar = "2d"; return;
673 case ARM64vecb_SQRSHL32x4: *nm = "sqrshl"; *ar = "4s"; return;
674 case ARM64vecb_SQRSHL16x8: *nm = "sqrshl"; *ar = "8h"; return;
675 case ARM64vecb_SQRSHL8x16: *nm = "sqrshl"; *ar = "16b"; return;
676 case ARM64vecb_UQRSHL64x2: *nm = "uqrshl"; *ar = "2d"; return;
677 case ARM64vecb_UQRSHL32x4: *nm = "uqrshl"; *ar = "4s"; return;
678 case ARM64vecb_UQRSHL16x8: *nm = "uqrshl"; *ar = "8h"; return;
679 case ARM64vecb_UQRSHL8x16: *nm = "uqrshl"; *ar = "16b"; return;
680 case ARM64vecb_SSHL64x2: *nm = "sshl "; *ar = "2d"; return;
681 case ARM64vecb_SSHL32x4: *nm = "sshl "; *ar = "4s"; return;
682 case ARM64vecb_SSHL16x8: *nm = "sshl "; *ar = "8h"; return;
683 case ARM64vecb_SSHL8x16: *nm = "sshl "; *ar = "16b"; return;
684 case ARM64vecb_USHL64x2: *nm = "ushl "; *ar = "2d"; return;
685 case ARM64vecb_USHL32x4: *nm = "ushl "; *ar = "4s"; return;
686 case ARM64vecb_USHL16x8: *nm = "ushl "; *ar = "8h"; return;
687 case ARM64vecb_USHL8x16: *nm = "ushl "; *ar = "16b"; return;
688 case ARM64vecb_SRSHL64x2: *nm = "srshl "; *ar = "2d"; return;
689 case ARM64vecb_SRSHL32x4: *nm = "srshl "; *ar = "4s"; return;
690 case ARM64vecb_SRSHL16x8: *nm = "srshl "; *ar = "8h"; return;
691 case ARM64vecb_SRSHL8x16: *nm = "srshl "; *ar = "16b"; return;
692 case ARM64vecb_URSHL64x2: *nm = "urshl "; *ar = "2d"; return;
693 case ARM64vecb_URSHL32x4: *nm = "urshl "; *ar = "4s"; return;
694 case ARM64vecb_URSHL16x8: *nm = "urshl "; *ar = "8h"; return;
695 case ARM64vecb_URSHL8x16: *nm = "urshl "; *ar = "16b"; return;
696 case ARM64vecb_FRECPS64x2: *nm = "frecps"; *ar = "2d"; return;
697 case ARM64vecb_FRECPS32x4: *nm = "frecps"; *ar = "4s"; return;
698 case ARM64vecb_FRSQRTS64x2: *nm = "frsqrts"; *ar = "2d"; return;
699 case ARM64vecb_FRSQRTS32x4: *nm = "frsqrts"; *ar = "4s"; return;
700 default: vpanic("showARM64VecBinOp");
704 static void showARM64VecModifyOp(/*OUT*/const HChar** nm,
705 /*OUT*/const HChar** ar,
706 ARM64VecModifyOp op ) {
707 switch (op) {
708 case ARM64vecmo_SUQADD64x2: *nm = "suqadd"; *ar = "2d"; return;
709 case ARM64vecmo_SUQADD32x4: *nm = "suqadd"; *ar = "4s"; return;
710 case ARM64vecmo_SUQADD16x8: *nm = "suqadd"; *ar = "8h"; return;
711 case ARM64vecmo_SUQADD8x16: *nm = "suqadd"; *ar = "16b"; return;
712 case ARM64vecmo_USQADD64x2: *nm = "usqadd"; *ar = "2d"; return;
713 case ARM64vecmo_USQADD32x4: *nm = "usqadd"; *ar = "4s"; return;
714 case ARM64vecmo_USQADD16x8: *nm = "usqadd"; *ar = "8h"; return;
715 case ARM64vecmo_USQADD8x16: *nm = "usqadd"; *ar = "16b"; return;
716 default: vpanic("showARM64VecModifyOp");
720 static void showARM64VecUnaryOp(/*OUT*/const HChar** nm,
721 /*OUT*/const HChar** ar, ARM64VecUnaryOp op )
723 switch (op) {
724 case ARM64vecu_FNEG64x2: *nm = "fneg "; *ar = "2d"; return;
725 case ARM64vecu_FNEG32x4: *nm = "fneg "; *ar = "4s"; return;
726 case ARM64vecu_FABS64x2: *nm = "fabs "; *ar = "2d"; return;
727 case ARM64vecu_FABS32x4: *nm = "fabs "; *ar = "4s"; return;
728 case ARM64vecu_NOT: *nm = "not "; *ar = "all"; return;
729 case ARM64vecu_ABS64x2: *nm = "abs "; *ar = "2d"; return;
730 case ARM64vecu_ABS32x4: *nm = "abs "; *ar = "4s"; return;
731 case ARM64vecu_ABS16x8: *nm = "abs "; *ar = "8h"; return;
732 case ARM64vecu_ABS8x16: *nm = "abs "; *ar = "16b"; return;
733 case ARM64vecu_CLS32x4: *nm = "cls "; *ar = "4s"; return;
734 case ARM64vecu_CLS16x8: *nm = "cls "; *ar = "8h"; return;
735 case ARM64vecu_CLS8x16: *nm = "cls "; *ar = "16b"; return;
736 case ARM64vecu_CLZ32x4: *nm = "clz "; *ar = "4s"; return;
737 case ARM64vecu_CLZ16x8: *nm = "clz "; *ar = "8h"; return;
738 case ARM64vecu_CLZ8x16: *nm = "clz "; *ar = "16b"; return;
739 case ARM64vecu_CNT8x16: *nm = "cnt "; *ar = "16b"; return;
740 case ARM64vecu_RBIT: *nm = "rbit "; *ar = "16b"; return;
741 case ARM64vecu_REV1616B: *nm = "rev16"; *ar = "16b"; return;
742 case ARM64vecu_REV3216B: *nm = "rev32"; *ar = "16b"; return;
743 case ARM64vecu_REV328H: *nm = "rev32"; *ar = "8h"; return;
744 case ARM64vecu_REV6416B: *nm = "rev64"; *ar = "16b"; return;
745 case ARM64vecu_REV648H: *nm = "rev64"; *ar = "8h"; return;
746 case ARM64vecu_REV644S: *nm = "rev64"; *ar = "4s"; return;
747 case ARM64vecu_URECPE32x4: *nm = "urecpe"; *ar = "4s"; return;
748 case ARM64vecu_URSQRTE32x4: *nm = "ursqrte"; *ar = "4s"; return;
749 case ARM64vecu_FRECPE64x2: *nm = "frecpe"; *ar = "2d"; return;
750 case ARM64vecu_FRECPE32x4: *nm = "frecpe"; *ar = "4s"; return;
751 case ARM64vecu_FRSQRTE64x2: *nm = "frsqrte"; *ar = "2d"; return;
752 case ARM64vecu_FRSQRTE32x4: *nm = "frsqrte"; *ar = "4s"; return;
753 case ARM64vecu_FSQRT64x2: *nm = "fsqrt"; *ar = "2d"; return;
754 case ARM64vecu_FSQRT32x4: *nm = "fsqrt"; *ar = "4s"; return;
755 default: vpanic("showARM64VecUnaryOp");
759 static void showARM64VecShiftImmOp(/*OUT*/const HChar** nm,
760 /*OUT*/const HChar** ar,
761 ARM64VecShiftImmOp op )
763 switch (op) {
764 case ARM64vecshi_USHR64x2: *nm = "ushr "; *ar = "2d"; return;
765 case ARM64vecshi_USHR32x4: *nm = "ushr "; *ar = "4s"; return;
766 case ARM64vecshi_USHR16x8: *nm = "ushr "; *ar = "8h"; return;
767 case ARM64vecshi_USHR8x16: *nm = "ushr "; *ar = "16b"; return;
768 case ARM64vecshi_SSHR64x2: *nm = "sshr "; *ar = "2d"; return;
769 case ARM64vecshi_SSHR32x4: *nm = "sshr "; *ar = "4s"; return;
770 case ARM64vecshi_SSHR16x8: *nm = "sshr "; *ar = "8h"; return;
771 case ARM64vecshi_SSHR8x16: *nm = "sshr "; *ar = "16b"; return;
772 case ARM64vecshi_SHL64x2: *nm = "shl "; *ar = "2d"; return;
773 case ARM64vecshi_SHL32x4: *nm = "shl "; *ar = "4s"; return;
774 case ARM64vecshi_SHL16x8: *nm = "shl "; *ar = "8h"; return;
775 case ARM64vecshi_SHL8x16: *nm = "shl "; *ar = "16b"; return;
776 case ARM64vecshi_SQSHRN2SD: *nm = "sqshrn"; *ar = "2sd"; return;
777 case ARM64vecshi_SQSHRN4HS: *nm = "sqshrn"; *ar = "4hs"; return;
778 case ARM64vecshi_SQSHRN8BH: *nm = "sqshrn"; *ar = "8bh"; return;
779 case ARM64vecshi_UQSHRN2SD: *nm = "uqshrn"; *ar = "2sd"; return;
780 case ARM64vecshi_UQSHRN4HS: *nm = "uqshrn"; *ar = "4hs"; return;
781 case ARM64vecshi_UQSHRN8BH: *nm = "uqshrn"; *ar = "8bh"; return;
782 case ARM64vecshi_SQSHRUN2SD: *nm = "sqshrun"; *ar = "2sd"; return;
783 case ARM64vecshi_SQSHRUN4HS: *nm = "sqshrun"; *ar = "4hs"; return;
784 case ARM64vecshi_SQSHRUN8BH: *nm = "sqshrun"; *ar = "8bh"; return;
785 case ARM64vecshi_SQRSHRN2SD: *nm = "sqrshrn"; *ar = "2sd"; return;
786 case ARM64vecshi_SQRSHRN4HS: *nm = "sqrshrn"; *ar = "4hs"; return;
787 case ARM64vecshi_SQRSHRN8BH: *nm = "sqrshrn"; *ar = "8bh"; return;
788 case ARM64vecshi_UQRSHRN2SD: *nm = "uqrshrn"; *ar = "2sd"; return;
789 case ARM64vecshi_UQRSHRN4HS: *nm = "uqrshrn"; *ar = "4hs"; return;
790 case ARM64vecshi_UQRSHRN8BH: *nm = "uqrshrn"; *ar = "8bh"; return;
791 case ARM64vecshi_SQRSHRUN2SD: *nm = "sqrshrun"; *ar = "2sd"; return;
792 case ARM64vecshi_SQRSHRUN4HS: *nm = "sqrshrun"; *ar = "4hs"; return;
793 case ARM64vecshi_SQRSHRUN8BH: *nm = "sqrshrun"; *ar = "8bh"; return;
794 case ARM64vecshi_UQSHL64x2: *nm = "uqshl "; *ar = "2d"; return;
795 case ARM64vecshi_UQSHL32x4: *nm = "uqshl "; *ar = "4s"; return;
796 case ARM64vecshi_UQSHL16x8: *nm = "uqshl "; *ar = "8h"; return;
797 case ARM64vecshi_UQSHL8x16: *nm = "uqshl "; *ar = "16b"; return;
798 case ARM64vecshi_SQSHL64x2: *nm = "sqshl "; *ar = "2d"; return;
799 case ARM64vecshi_SQSHL32x4: *nm = "sqshl "; *ar = "4s"; return;
800 case ARM64vecshi_SQSHL16x8: *nm = "sqshl "; *ar = "8h"; return;
801 case ARM64vecshi_SQSHL8x16: *nm = "sqshl "; *ar = "16b"; return;
802 case ARM64vecshi_SQSHLU64x2: *nm = "sqshlu"; *ar = "2d"; return;
803 case ARM64vecshi_SQSHLU32x4: *nm = "sqshlu"; *ar = "4s"; return;
804 case ARM64vecshi_SQSHLU16x8: *nm = "sqshlu"; *ar = "8h"; return;
805 case ARM64vecshi_SQSHLU8x16: *nm = "sqshlu"; *ar = "16b"; return;
806 default: vpanic("showARM64VecShiftImmOp");
810 static const HChar* showARM64VecNarrowOp(ARM64VecNarrowOp op) {
811 switch (op) {
812 case ARM64vecna_XTN: return "xtn ";
813 case ARM64vecna_SQXTN: return "sqxtn ";
814 case ARM64vecna_UQXTN: return "uqxtn ";
815 case ARM64vecna_SQXTUN: return "sqxtun";
816 default: vpanic("showARM64VecNarrowOp");
820 ARM64Instr* ARM64Instr_Arith ( HReg dst,
821 HReg argL, ARM64RIA* argR, Bool isAdd ) {
822 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
823 i->tag = ARM64in_Arith;
824 i->ARM64in.Arith.dst = dst;
825 i->ARM64in.Arith.argL = argL;
826 i->ARM64in.Arith.argR = argR;
827 i->ARM64in.Arith.isAdd = isAdd;
828 return i;
830 ARM64Instr* ARM64Instr_Cmp ( HReg argL, ARM64RIA* argR, Bool is64 ) {
831 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
832 i->tag = ARM64in_Cmp;
833 i->ARM64in.Cmp.argL = argL;
834 i->ARM64in.Cmp.argR = argR;
835 i->ARM64in.Cmp.is64 = is64;
836 return i;
838 ARM64Instr* ARM64Instr_Logic ( HReg dst,
839 HReg argL, ARM64RIL* argR, ARM64LogicOp op ) {
840 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
841 i->tag = ARM64in_Logic;
842 i->ARM64in.Logic.dst = dst;
843 i->ARM64in.Logic.argL = argL;
844 i->ARM64in.Logic.argR = argR;
845 i->ARM64in.Logic.op = op;
846 return i;
848 ARM64Instr* ARM64Instr_Test ( HReg argL, ARM64RIL* argR ) {
849 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
850 i->tag = ARM64in_Test;
851 i->ARM64in.Test.argL = argL;
852 i->ARM64in.Test.argR = argR;
853 return i;
855 ARM64Instr* ARM64Instr_Shift ( HReg dst,
856 HReg argL, ARM64RI6* argR, ARM64ShiftOp op ) {
857 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
858 i->tag = ARM64in_Shift;
859 i->ARM64in.Shift.dst = dst;
860 i->ARM64in.Shift.argL = argL;
861 i->ARM64in.Shift.argR = argR;
862 i->ARM64in.Shift.op = op;
863 return i;
865 ARM64Instr* ARM64Instr_Unary ( HReg dst, HReg src, ARM64UnaryOp op ) {
866 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
867 i->tag = ARM64in_Unary;
868 i->ARM64in.Unary.dst = dst;
869 i->ARM64in.Unary.src = src;
870 i->ARM64in.Unary.op = op;
871 return i;
873 ARM64Instr* ARM64Instr_Set64 ( HReg dst, ARM64CondCode cond ) {
874 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
875 i->tag = ARM64in_Set64;
876 i->ARM64in.Set64.dst = dst;
877 i->ARM64in.Set64.cond = cond;
878 return i;
880 ARM64Instr* ARM64Instr_MovI ( HReg dst, HReg src ) {
881 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
882 i->tag = ARM64in_MovI;
883 i->ARM64in.MovI.dst = dst;
884 i->ARM64in.MovI.src = src;
885 vassert(hregClass(src) == HRcInt64);
886 vassert(hregClass(dst) == HRcInt64);
887 return i;
889 ARM64Instr* ARM64Instr_Imm64 ( HReg dst, ULong imm64 ) {
890 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
891 i->tag = ARM64in_Imm64;
892 i->ARM64in.Imm64.dst = dst;
893 i->ARM64in.Imm64.imm64 = imm64;
894 return i;
896 ARM64Instr* ARM64Instr_LdSt64 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
897 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
898 i->tag = ARM64in_LdSt64;
899 i->ARM64in.LdSt64.isLoad = isLoad;
900 i->ARM64in.LdSt64.rD = rD;
901 i->ARM64in.LdSt64.amode = amode;
902 return i;
904 ARM64Instr* ARM64Instr_LdSt32 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
905 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
906 i->tag = ARM64in_LdSt32;
907 i->ARM64in.LdSt32.isLoad = isLoad;
908 i->ARM64in.LdSt32.rD = rD;
909 i->ARM64in.LdSt32.amode = amode;
910 return i;
912 ARM64Instr* ARM64Instr_LdSt16 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
913 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
914 i->tag = ARM64in_LdSt16;
915 i->ARM64in.LdSt16.isLoad = isLoad;
916 i->ARM64in.LdSt16.rD = rD;
917 i->ARM64in.LdSt16.amode = amode;
918 return i;
920 ARM64Instr* ARM64Instr_LdSt8 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
921 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
922 i->tag = ARM64in_LdSt8;
923 i->ARM64in.LdSt8.isLoad = isLoad;
924 i->ARM64in.LdSt8.rD = rD;
925 i->ARM64in.LdSt8.amode = amode;
926 return i;
928 ARM64Instr* ARM64Instr_XDirect ( Addr64 dstGA, ARM64AMode* amPC,
929 ARM64CondCode cond, Bool toFastEP ) {
930 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
931 i->tag = ARM64in_XDirect;
932 i->ARM64in.XDirect.dstGA = dstGA;
933 i->ARM64in.XDirect.amPC = amPC;
934 i->ARM64in.XDirect.cond = cond;
935 i->ARM64in.XDirect.toFastEP = toFastEP;
936 return i;
938 ARM64Instr* ARM64Instr_XIndir ( HReg dstGA, ARM64AMode* amPC,
939 ARM64CondCode cond ) {
940 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
941 i->tag = ARM64in_XIndir;
942 i->ARM64in.XIndir.dstGA = dstGA;
943 i->ARM64in.XIndir.amPC = amPC;
944 i->ARM64in.XIndir.cond = cond;
945 return i;
947 ARM64Instr* ARM64Instr_XAssisted ( HReg dstGA, ARM64AMode* amPC,
948 ARM64CondCode cond, IRJumpKind jk ) {
949 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
950 i->tag = ARM64in_XAssisted;
951 i->ARM64in.XAssisted.dstGA = dstGA;
952 i->ARM64in.XAssisted.amPC = amPC;
953 i->ARM64in.XAssisted.cond = cond;
954 i->ARM64in.XAssisted.jk = jk;
955 return i;
957 ARM64Instr* ARM64Instr_CSel ( HReg dst, HReg argL, HReg argR,
958 ARM64CondCode cond ) {
959 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
960 i->tag = ARM64in_CSel;
961 i->ARM64in.CSel.dst = dst;
962 i->ARM64in.CSel.argL = argL;
963 i->ARM64in.CSel.argR = argR;
964 i->ARM64in.CSel.cond = cond;
965 return i;
967 ARM64Instr* ARM64Instr_Call ( ARM64CondCode cond, Addr64 target, Int nArgRegs,
968 RetLoc rloc ) {
969 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
970 i->tag = ARM64in_Call;
971 i->ARM64in.Call.cond = cond;
972 i->ARM64in.Call.target = target;
973 i->ARM64in.Call.nArgRegs = nArgRegs;
974 i->ARM64in.Call.rloc = rloc;
975 vassert(is_sane_RetLoc(rloc));
976 return i;
978 extern ARM64Instr* ARM64Instr_AddToSP ( Int simm ) {
979 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
980 i->tag = ARM64in_AddToSP;
981 i->ARM64in.AddToSP.simm = simm;
982 vassert(-4096 < simm && simm < 4096);
983 vassert(0 == (simm & 0xF));
984 return i;
986 extern ARM64Instr* ARM64Instr_FromSP ( HReg dst ) {
987 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
988 i->tag = ARM64in_FromSP;
989 i->ARM64in.FromSP.dst = dst;
990 return i;
992 ARM64Instr* ARM64Instr_Mul ( HReg dst, HReg argL, HReg argR,
993 ARM64MulOp op ) {
994 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
995 i->tag = ARM64in_Mul;
996 i->ARM64in.Mul.dst = dst;
997 i->ARM64in.Mul.argL = argL;
998 i->ARM64in.Mul.argR = argR;
999 i->ARM64in.Mul.op = op;
1000 return i;
1002 ARM64Instr* ARM64Instr_LdrEX ( Int szB ) {
1003 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1004 i->tag = ARM64in_LdrEX;
1005 i->ARM64in.LdrEX.szB = szB;
1006 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1007 return i;
1009 ARM64Instr* ARM64Instr_StrEX ( Int szB ) {
1010 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1011 i->tag = ARM64in_StrEX;
1012 i->ARM64in.StrEX.szB = szB;
1013 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1014 return i;
1016 ARM64Instr* ARM64Instr_CAS ( Int szB ) {
1017 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1018 i->tag = ARM64in_CAS;
1019 i->ARM64in.CAS.szB = szB;
1020 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1021 return i;
1023 ARM64Instr* ARM64Instr_CASP ( Int szB ) {
1024 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1025 i->tag = ARM64in_CASP;
1026 i->ARM64in.CASP.szB = szB;
1027 vassert(szB == 8 || szB == 4);
1028 return i;
1030 ARM64Instr* ARM64Instr_MFence ( void ) {
1031 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1032 i->tag = ARM64in_MFence;
1033 return i;
1035 ARM64Instr* ARM64Instr_ClrEX ( void ) {
1036 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1037 i->tag = ARM64in_ClrEX;
1038 return i;
1040 ARM64Instr* ARM64Instr_VLdStH ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) {
1041 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1042 i->tag = ARM64in_VLdStH;
1043 i->ARM64in.VLdStH.isLoad = isLoad;
1044 i->ARM64in.VLdStH.hD = sD;
1045 i->ARM64in.VLdStH.rN = rN;
1046 i->ARM64in.VLdStH.uimm12 = uimm12;
1047 vassert(uimm12 < 8192 && 0 == (uimm12 & 1));
1048 return i;
1050 ARM64Instr* ARM64Instr_VLdStS ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) {
1051 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1052 i->tag = ARM64in_VLdStS;
1053 i->ARM64in.VLdStS.isLoad = isLoad;
1054 i->ARM64in.VLdStS.sD = sD;
1055 i->ARM64in.VLdStS.rN = rN;
1056 i->ARM64in.VLdStS.uimm12 = uimm12;
1057 vassert(uimm12 < 16384 && 0 == (uimm12 & 3));
1058 return i;
1060 ARM64Instr* ARM64Instr_VLdStD ( Bool isLoad, HReg dD, HReg rN, UInt uimm12 ) {
1061 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1062 i->tag = ARM64in_VLdStD;
1063 i->ARM64in.VLdStD.isLoad = isLoad;
1064 i->ARM64in.VLdStD.dD = dD;
1065 i->ARM64in.VLdStD.rN = rN;
1066 i->ARM64in.VLdStD.uimm12 = uimm12;
1067 vassert(uimm12 < 32768 && 0 == (uimm12 & 7));
1068 return i;
1070 ARM64Instr* ARM64Instr_VLdStQ ( Bool isLoad, HReg rQ, HReg rN ) {
1071 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1072 i->tag = ARM64in_VLdStQ;
1073 i->ARM64in.VLdStQ.isLoad = isLoad;
1074 i->ARM64in.VLdStQ.rQ = rQ;
1075 i->ARM64in.VLdStQ.rN = rN;
1076 return i;
1078 ARM64Instr* ARM64Instr_VCvtI2F ( ARM64CvtOp how, HReg rD, HReg rS ) {
1079 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1080 i->tag = ARM64in_VCvtI2F;
1081 i->ARM64in.VCvtI2F.how = how;
1082 i->ARM64in.VCvtI2F.rD = rD;
1083 i->ARM64in.VCvtI2F.rS = rS;
1084 return i;
1086 ARM64Instr* ARM64Instr_VCvtF2I ( ARM64CvtOp how, HReg rD, HReg rS,
1087 UChar armRM ) {
1088 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1089 i->tag = ARM64in_VCvtF2I;
1090 i->ARM64in.VCvtF2I.how = how;
1091 i->ARM64in.VCvtF2I.rD = rD;
1092 i->ARM64in.VCvtF2I.rS = rS;
1093 i->ARM64in.VCvtF2I.armRM = armRM;
1094 vassert(armRM <= 3);
1095 return i;
1097 ARM64Instr* ARM64Instr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
1098 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1099 i->tag = ARM64in_VCvtSD;
1100 i->ARM64in.VCvtSD.sToD = sToD;
1101 i->ARM64in.VCvtSD.dst = dst;
1102 i->ARM64in.VCvtSD.src = src;
1103 return i;
1105 ARM64Instr* ARM64Instr_VCvtHS ( Bool hToS, HReg dst, HReg src ) {
1106 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1107 i->tag = ARM64in_VCvtHS;
1108 i->ARM64in.VCvtHS.hToS = hToS;
1109 i->ARM64in.VCvtHS.dst = dst;
1110 i->ARM64in.VCvtHS.src = src;
1111 return i;
1113 ARM64Instr* ARM64Instr_VCvtHD ( Bool hToD, HReg dst, HReg src ) {
1114 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1115 i->tag = ARM64in_VCvtHD;
1116 i->ARM64in.VCvtHD.hToD = hToD;
1117 i->ARM64in.VCvtHD.dst = dst;
1118 i->ARM64in.VCvtHD.src = src;
1119 return i;
1121 ARM64Instr* ARM64Instr_VUnaryD ( ARM64FpUnaryOp op, HReg dst, HReg src ) {
1122 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1123 i->tag = ARM64in_VUnaryD;
1124 i->ARM64in.VUnaryD.op = op;
1125 i->ARM64in.VUnaryD.dst = dst;
1126 i->ARM64in.VUnaryD.src = src;
1127 return i;
1129 ARM64Instr* ARM64Instr_VUnaryS ( ARM64FpUnaryOp op, HReg dst, HReg src ) {
1130 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1131 i->tag = ARM64in_VUnaryS;
1132 i->ARM64in.VUnaryS.op = op;
1133 i->ARM64in.VUnaryS.dst = dst;
1134 i->ARM64in.VUnaryS.src = src;
1135 return i;
1137 ARM64Instr* ARM64Instr_VBinD ( ARM64FpBinOp op,
1138 HReg dst, HReg argL, HReg argR ) {
1139 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1140 i->tag = ARM64in_VBinD;
1141 i->ARM64in.VBinD.op = op;
1142 i->ARM64in.VBinD.dst = dst;
1143 i->ARM64in.VBinD.argL = argL;
1144 i->ARM64in.VBinD.argR = argR;
1145 return i;
1147 ARM64Instr* ARM64Instr_VBinS ( ARM64FpBinOp op,
1148 HReg dst, HReg argL, HReg argR ) {
1149 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1150 i->tag = ARM64in_VBinS;
1151 i->ARM64in.VBinS.op = op;
1152 i->ARM64in.VBinS.dst = dst;
1153 i->ARM64in.VBinS.argL = argL;
1154 i->ARM64in.VBinS.argR = argR;
1155 return i;
1157 ARM64Instr* ARM64Instr_VCmpD ( HReg argL, HReg argR ) {
1158 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1159 i->tag = ARM64in_VCmpD;
1160 i->ARM64in.VCmpD.argL = argL;
1161 i->ARM64in.VCmpD.argR = argR;
1162 return i;
1164 ARM64Instr* ARM64Instr_VCmpS ( HReg argL, HReg argR ) {
1165 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1166 i->tag = ARM64in_VCmpS;
1167 i->ARM64in.VCmpS.argL = argL;
1168 i->ARM64in.VCmpS.argR = argR;
1169 return i;
1171 ARM64Instr* ARM64Instr_VFCSel ( HReg dst, HReg argL, HReg argR,
1172 ARM64CondCode cond, Bool isD ) {
1173 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1174 i->tag = ARM64in_VFCSel;
1175 i->ARM64in.VFCSel.dst = dst;
1176 i->ARM64in.VFCSel.argL = argL;
1177 i->ARM64in.VFCSel.argR = argR;
1178 i->ARM64in.VFCSel.cond = cond;
1179 i->ARM64in.VFCSel.isD = isD;
1180 return i;
1182 ARM64Instr* ARM64Instr_FPCR ( Bool toFPCR, HReg iReg ) {
1183 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1184 i->tag = ARM64in_FPCR;
1185 i->ARM64in.FPCR.toFPCR = toFPCR;
1186 i->ARM64in.FPCR.iReg = iReg;
1187 return i;
1189 ARM64Instr* ARM64Instr_FPSR ( Bool toFPSR, HReg iReg ) {
1190 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1191 i->tag = ARM64in_FPSR;
1192 i->ARM64in.FPSR.toFPSR = toFPSR;
1193 i->ARM64in.FPSR.iReg = iReg;
1194 return i;
1196 ARM64Instr* ARM64Instr_VBinV ( ARM64VecBinOp op,
1197 HReg dst, HReg argL, HReg argR ) {
1198 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1199 i->tag = ARM64in_VBinV;
1200 i->ARM64in.VBinV.op = op;
1201 i->ARM64in.VBinV.dst = dst;
1202 i->ARM64in.VBinV.argL = argL;
1203 i->ARM64in.VBinV.argR = argR;
1204 return i;
1206 ARM64Instr* ARM64Instr_VModifyV ( ARM64VecModifyOp op, HReg mod, HReg arg ) {
1207 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1208 i->tag = ARM64in_VModifyV;
1209 i->ARM64in.VModifyV.op = op;
1210 i->ARM64in.VModifyV.mod = mod;
1211 i->ARM64in.VModifyV.arg = arg;
1212 return i;
1214 ARM64Instr* ARM64Instr_VUnaryV ( ARM64VecUnaryOp op, HReg dst, HReg arg ) {
1215 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1216 i->tag = ARM64in_VUnaryV;
1217 i->ARM64in.VUnaryV.op = op;
1218 i->ARM64in.VUnaryV.dst = dst;
1219 i->ARM64in.VUnaryV.arg = arg;
1220 return i;
1222 ARM64Instr* ARM64Instr_VNarrowV ( ARM64VecNarrowOp op,
1223 UInt dszBlg2, HReg dst, HReg src ) {
1224 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1225 i->tag = ARM64in_VNarrowV;
1226 i->ARM64in.VNarrowV.op = op;
1227 i->ARM64in.VNarrowV.dszBlg2 = dszBlg2;
1228 i->ARM64in.VNarrowV.dst = dst;
1229 i->ARM64in.VNarrowV.src = src;
1230 vassert(dszBlg2 == 0 || dszBlg2 == 1 || dszBlg2 == 2);
1231 return i;
1233 ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftImmOp op,
1234 HReg dst, HReg src, UInt amt ) {
1235 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1236 i->tag = ARM64in_VShiftImmV;
1237 i->ARM64in.VShiftImmV.op = op;
1238 i->ARM64in.VShiftImmV.dst = dst;
1239 i->ARM64in.VShiftImmV.src = src;
1240 i->ARM64in.VShiftImmV.amt = amt;
1241 UInt minSh = 0;
1242 UInt maxSh = 0;
1243 switch (op) {
1244 /* For right shifts, the allowed shift amounts are 1 .. lane_size.
1245 For left shifts, the allowed shift amounts are 0 .. lane_size-1.
1247 case ARM64vecshi_USHR64x2: case ARM64vecshi_SSHR64x2:
1248 case ARM64vecshi_UQSHRN2SD: case ARM64vecshi_SQSHRN2SD:
1249 case ARM64vecshi_SQSHRUN2SD:
1250 case ARM64vecshi_UQRSHRN2SD: case ARM64vecshi_SQRSHRN2SD:
1251 case ARM64vecshi_SQRSHRUN2SD:
1252 minSh = 1; maxSh = 64; break;
1253 case ARM64vecshi_SHL64x2:
1254 case ARM64vecshi_UQSHL64x2: case ARM64vecshi_SQSHL64x2:
1255 case ARM64vecshi_SQSHLU64x2:
1256 minSh = 0; maxSh = 63; break;
1257 case ARM64vecshi_USHR32x4: case ARM64vecshi_SSHR32x4:
1258 case ARM64vecshi_UQSHRN4HS: case ARM64vecshi_SQSHRN4HS:
1259 case ARM64vecshi_SQSHRUN4HS:
1260 case ARM64vecshi_UQRSHRN4HS: case ARM64vecshi_SQRSHRN4HS:
1261 case ARM64vecshi_SQRSHRUN4HS:
1262 minSh = 1; maxSh = 32; break;
1263 case ARM64vecshi_SHL32x4:
1264 case ARM64vecshi_UQSHL32x4: case ARM64vecshi_SQSHL32x4:
1265 case ARM64vecshi_SQSHLU32x4:
1266 minSh = 0; maxSh = 31; break;
1267 case ARM64vecshi_USHR16x8: case ARM64vecshi_SSHR16x8:
1268 case ARM64vecshi_UQSHRN8BH: case ARM64vecshi_SQSHRN8BH:
1269 case ARM64vecshi_SQSHRUN8BH:
1270 case ARM64vecshi_UQRSHRN8BH: case ARM64vecshi_SQRSHRN8BH:
1271 case ARM64vecshi_SQRSHRUN8BH:
1272 minSh = 1; maxSh = 16; break;
1273 case ARM64vecshi_SHL16x8:
1274 case ARM64vecshi_UQSHL16x8: case ARM64vecshi_SQSHL16x8:
1275 case ARM64vecshi_SQSHLU16x8:
1276 minSh = 0; maxSh = 15; break;
1277 case ARM64vecshi_USHR8x16: case ARM64vecshi_SSHR8x16:
1278 minSh = 1; maxSh = 8; break;
1279 case ARM64vecshi_SHL8x16:
1280 case ARM64vecshi_UQSHL8x16: case ARM64vecshi_SQSHL8x16:
1281 case ARM64vecshi_SQSHLU8x16:
1282 minSh = 0; maxSh = 7; break;
1283 default:
1284 vassert(0);
1286 vassert(maxSh > 0);
1287 vassert(amt >= minSh && amt <= maxSh);
1288 return i;
1290 ARM64Instr* ARM64Instr_VExtV ( HReg dst, HReg srcLo, HReg srcHi, UInt amtB ) {
1291 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1292 i->tag = ARM64in_VExtV;
1293 i->ARM64in.VExtV.dst = dst;
1294 i->ARM64in.VExtV.srcLo = srcLo;
1295 i->ARM64in.VExtV.srcHi = srcHi;
1296 i->ARM64in.VExtV.amtB = amtB;
1297 vassert(amtB >= 1 && amtB <= 15);
1298 return i;
1300 ARM64Instr* ARM64Instr_VImmQ (HReg rQ, UShort imm) {
1301 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1302 i->tag = ARM64in_VImmQ;
1303 i->ARM64in.VImmQ.rQ = rQ;
1304 i->ARM64in.VImmQ.imm = imm;
1305 /* Check that this is something that can actually be emitted. */
1306 switch (imm) {
1307 case 0x0000: case 0x0001: case 0x0003:
1308 case 0x000F: case 0x003F: case 0x00FF: case 0xFFFF:
1309 break;
1310 default:
1311 vassert(0);
1313 return i;
1315 ARM64Instr* ARM64Instr_VDfromX ( HReg rD, HReg rX ) {
1316 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1317 i->tag = ARM64in_VDfromX;
1318 i->ARM64in.VDfromX.rD = rD;
1319 i->ARM64in.VDfromX.rX = rX;
1320 return i;
1322 ARM64Instr* ARM64Instr_VQfromX ( HReg rQ, HReg rXlo ) {
1323 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1324 i->tag = ARM64in_VQfromX;
1325 i->ARM64in.VQfromX.rQ = rQ;
1326 i->ARM64in.VQfromX.rXlo = rXlo;
1327 return i;
1329 ARM64Instr* ARM64Instr_VQfromXX ( HReg rQ, HReg rXhi, HReg rXlo ) {
1330 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1331 i->tag = ARM64in_VQfromXX;
1332 i->ARM64in.VQfromXX.rQ = rQ;
1333 i->ARM64in.VQfromXX.rXhi = rXhi;
1334 i->ARM64in.VQfromXX.rXlo = rXlo;
1335 return i;
1337 ARM64Instr* ARM64Instr_VXfromQ ( HReg rX, HReg rQ, UInt laneNo ) {
1338 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1339 i->tag = ARM64in_VXfromQ;
1340 i->ARM64in.VXfromQ.rX = rX;
1341 i->ARM64in.VXfromQ.rQ = rQ;
1342 i->ARM64in.VXfromQ.laneNo = laneNo;
1343 vassert(laneNo <= 1);
1344 return i;
1346 ARM64Instr* ARM64Instr_VXfromDorS ( HReg rX, HReg rDorS, Bool fromD ) {
1347 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1348 i->tag = ARM64in_VXfromDorS;
1349 i->ARM64in.VXfromDorS.rX = rX;
1350 i->ARM64in.VXfromDorS.rDorS = rDorS;
1351 i->ARM64in.VXfromDorS.fromD = fromD;
1352 return i;
1354 ARM64Instr* ARM64Instr_VMov ( UInt szB, HReg dst, HReg src ) {
1355 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1356 i->tag = ARM64in_VMov;
1357 i->ARM64in.VMov.szB = szB;
1358 i->ARM64in.VMov.dst = dst;
1359 i->ARM64in.VMov.src = src;
1360 switch (szB) {
1361 case 16:
1362 vassert(hregClass(src) == HRcVec128);
1363 vassert(hregClass(dst) == HRcVec128);
1364 break;
1365 case 8:
1366 vassert(hregClass(src) == HRcFlt64);
1367 vassert(hregClass(dst) == HRcFlt64);
1368 break;
1369 default:
1370 vpanic("ARM64Instr_VMov");
1372 return i;
1374 ARM64Instr* ARM64Instr_EvCheck ( ARM64AMode* amCounter,
1375 ARM64AMode* amFailAddr ) {
1376 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1377 i->tag = ARM64in_EvCheck;
1378 i->ARM64in.EvCheck.amCounter = amCounter;
1379 i->ARM64in.EvCheck.amFailAddr = amFailAddr;
1380 return i;
1382 ARM64Instr* ARM64Instr_ProfInc ( void ) {
1383 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1384 i->tag = ARM64in_ProfInc;
1385 return i;
1388 /* ... */
1390 void ppARM64Instr ( const ARM64Instr* i ) {
1391 switch (i->tag) {
1392 case ARM64in_Arith:
1393 vex_printf("%s ", i->ARM64in.Arith.isAdd ? "add" : "sub");
1394 ppHRegARM64(i->ARM64in.Arith.dst);
1395 vex_printf(", ");
1396 ppHRegARM64(i->ARM64in.Arith.argL);
1397 vex_printf(", ");
1398 ppARM64RIA(i->ARM64in.Arith.argR);
1399 return;
1400 case ARM64in_Cmp:
1401 vex_printf("cmp%s ", i->ARM64in.Cmp.is64 ? " " : "(w)" );
1402 ppHRegARM64(i->ARM64in.Cmp.argL);
1403 vex_printf(", ");
1404 ppARM64RIA(i->ARM64in.Cmp.argR);
1405 return;
1406 case ARM64in_Logic:
1407 vex_printf("%s ", showARM64LogicOp(i->ARM64in.Logic.op));
1408 ppHRegARM64(i->ARM64in.Logic.dst);
1409 vex_printf(", ");
1410 ppHRegARM64(i->ARM64in.Logic.argL);
1411 vex_printf(", ");
1412 ppARM64RIL(i->ARM64in.Logic.argR);
1413 return;
1414 case ARM64in_Test:
1415 vex_printf("tst ");
1416 ppHRegARM64(i->ARM64in.Test.argL);
1417 vex_printf(", ");
1418 ppARM64RIL(i->ARM64in.Test.argR);
1419 return;
1420 case ARM64in_Shift:
1421 vex_printf("%s ", showARM64ShiftOp(i->ARM64in.Shift.op));
1422 ppHRegARM64(i->ARM64in.Shift.dst);
1423 vex_printf(", ");
1424 ppHRegARM64(i->ARM64in.Shift.argL);
1425 vex_printf(", ");
1426 ppARM64RI6(i->ARM64in.Shift.argR);
1427 return;
1428 case ARM64in_Unary:
1429 vex_printf("%s ", showARM64UnaryOp(i->ARM64in.Unary.op));
1430 ppHRegARM64(i->ARM64in.Unary.dst);
1431 vex_printf(", ");
1432 ppHRegARM64(i->ARM64in.Unary.src);
1433 return;
1434 case ARM64in_Set64:
1435 vex_printf("cset ");
1436 ppHRegARM64(i->ARM64in.Set64.dst);
1437 vex_printf(", %s", showARM64CondCode(i->ARM64in.Set64.cond));
1438 return;
1439 case ARM64in_MovI:
1440 vex_printf("mov ");
1441 ppHRegARM64(i->ARM64in.MovI.dst);
1442 vex_printf(", ");
1443 ppHRegARM64(i->ARM64in.MovI.src);
1444 return;
1445 case ARM64in_Imm64:
1446 vex_printf("imm64 ");
1447 ppHRegARM64(i->ARM64in.Imm64.dst);
1448 vex_printf(", 0x%llx", i->ARM64in.Imm64.imm64);
1449 return;
1450 case ARM64in_LdSt64:
1451 if (i->ARM64in.LdSt64.isLoad) {
1452 vex_printf("ldr ");
1453 ppHRegARM64(i->ARM64in.LdSt64.rD);
1454 vex_printf(", ");
1455 ppARM64AMode(i->ARM64in.LdSt64.amode);
1456 } else {
1457 vex_printf("str ");
1458 ppARM64AMode(i->ARM64in.LdSt64.amode);
1459 vex_printf(", ");
1460 ppHRegARM64(i->ARM64in.LdSt64.rD);
1462 return;
1463 case ARM64in_LdSt32:
1464 if (i->ARM64in.LdSt32.isLoad) {
1465 vex_printf("ldruw ");
1466 ppHRegARM64(i->ARM64in.LdSt32.rD);
1467 vex_printf(", ");
1468 ppARM64AMode(i->ARM64in.LdSt32.amode);
1469 } else {
1470 vex_printf("strw ");
1471 ppARM64AMode(i->ARM64in.LdSt32.amode);
1472 vex_printf(", ");
1473 ppHRegARM64(i->ARM64in.LdSt32.rD);
1475 return;
1476 case ARM64in_LdSt16:
1477 if (i->ARM64in.LdSt16.isLoad) {
1478 vex_printf("ldruh ");
1479 ppHRegARM64(i->ARM64in.LdSt16.rD);
1480 vex_printf(", ");
1481 ppARM64AMode(i->ARM64in.LdSt16.amode);
1482 } else {
1483 vex_printf("strh ");
1484 ppARM64AMode(i->ARM64in.LdSt16.amode);
1485 vex_printf(", ");
1486 ppHRegARM64(i->ARM64in.LdSt16.rD);
1488 return;
1489 case ARM64in_LdSt8:
1490 if (i->ARM64in.LdSt8.isLoad) {
1491 vex_printf("ldrub ");
1492 ppHRegARM64(i->ARM64in.LdSt8.rD);
1493 vex_printf(", ");
1494 ppARM64AMode(i->ARM64in.LdSt8.amode);
1495 } else {
1496 vex_printf("strb ");
1497 ppARM64AMode(i->ARM64in.LdSt8.amode);
1498 vex_printf(", ");
1499 ppHRegARM64(i->ARM64in.LdSt8.rD);
1501 return;
1502 case ARM64in_XDirect:
1503 vex_printf("(xDirect) ");
1504 vex_printf("if (%%pstate.%s) { ",
1505 showARM64CondCode(i->ARM64in.XDirect.cond));
1506 vex_printf("imm64 x9,0x%llx; ", i->ARM64in.XDirect.dstGA);
1507 vex_printf("str x9,");
1508 ppARM64AMode(i->ARM64in.XDirect.amPC);
1509 vex_printf("; imm64-exactly4 x9,$disp_cp_chain_me_to_%sEP; ",
1510 i->ARM64in.XDirect.toFastEP ? "fast" : "slow");
1511 vex_printf("blr x9 }");
1512 return;
1513 case ARM64in_XIndir:
1514 vex_printf("(xIndir) ");
1515 vex_printf("if (%%pstate.%s) { ",
1516 showARM64CondCode(i->ARM64in.XIndir.cond));
1517 vex_printf("str ");
1518 ppHRegARM64(i->ARM64in.XIndir.dstGA);
1519 vex_printf(",");
1520 ppARM64AMode(i->ARM64in.XIndir.amPC);
1521 vex_printf("; imm64 x9,$disp_cp_xindir; ");
1522 vex_printf("br x9 }");
1523 return;
1524 case ARM64in_XAssisted:
1525 vex_printf("(xAssisted) ");
1526 vex_printf("if (%%pstate.%s) { ",
1527 showARM64CondCode(i->ARM64in.XAssisted.cond));
1528 vex_printf("str ");
1529 ppHRegARM64(i->ARM64in.XAssisted.dstGA);
1530 vex_printf(",");
1531 ppARM64AMode(i->ARM64in.XAssisted.amPC);
1532 vex_printf("; movw x21,$IRJumpKind_to_TRCVAL(%d); ",
1533 (Int)i->ARM64in.XAssisted.jk);
1534 vex_printf("imm64 x9,$disp_cp_xassisted; ");
1535 vex_printf("br x9 }");
1536 return;
1537 case ARM64in_CSel:
1538 vex_printf("csel ");
1539 ppHRegARM64(i->ARM64in.CSel.dst);
1540 vex_printf(", ");
1541 ppHRegARM64(i->ARM64in.CSel.argL);
1542 vex_printf(", ");
1543 ppHRegARM64(i->ARM64in.CSel.argR);
1544 vex_printf(", %s", showARM64CondCode(i->ARM64in.CSel.cond));
1545 return;
1546 case ARM64in_Call:
1547 vex_printf("call%s ",
1548 i->ARM64in.Call.cond==ARM64cc_AL
1549 ? " " : showARM64CondCode(i->ARM64in.Call.cond));
1550 vex_printf("0x%llx [nArgRegs=%d, ",
1551 i->ARM64in.Call.target, i->ARM64in.Call.nArgRegs);
1552 ppRetLoc(i->ARM64in.Call.rloc);
1553 vex_printf("]");
1554 return;
1555 case ARM64in_AddToSP: {
1556 Int simm = i->ARM64in.AddToSP.simm;
1557 vex_printf("%s xsp, xsp, #%d", simm < 0 ? "sub" : "add",
1558 simm < 0 ? -simm : simm);
1559 return;
1561 case ARM64in_FromSP:
1562 vex_printf("mov ");
1563 ppHRegARM64(i->ARM64in.FromSP.dst);
1564 vex_printf(", xsp");
1565 return;
1566 case ARM64in_Mul:
1567 vex_printf("%s ", showARM64MulOp(i->ARM64in.Mul.op));
1568 ppHRegARM64(i->ARM64in.Mul.dst);
1569 vex_printf(", ");
1570 ppHRegARM64(i->ARM64in.Mul.argL);
1571 vex_printf(", ");
1572 ppHRegARM64(i->ARM64in.Mul.argR);
1573 return;
1575 case ARM64in_LdrEX: {
1576 const HChar* sz = " ";
1577 switch (i->ARM64in.LdrEX.szB) {
1578 case 1: sz = "b"; break;
1579 case 2: sz = "h"; break;
1580 case 4: case 8: break;
1581 default: vassert(0);
1583 vex_printf("ldxr%s %c2, [x4]",
1584 sz, i->ARM64in.LdrEX.szB == 8 ? 'x' : 'w');
1585 return;
1587 case ARM64in_StrEX: {
1588 const HChar* sz = " ";
1589 switch (i->ARM64in.StrEX.szB) {
1590 case 1: sz = "b"; break;
1591 case 2: sz = "h"; break;
1592 case 4: case 8: break;
1593 default: vassert(0);
1595 vex_printf("stxr%s w0, %c2, [x4]",
1596 sz, i->ARM64in.StrEX.szB == 8 ? 'x' : 'w');
1597 return;
1599 case ARM64in_CAS: {
1600 vex_printf("x1 = cas(%dbit)(x3, x5 -> x7)", 8 * i->ARM64in.CAS.szB);
1601 return;
1603 case ARM64in_CASP: {
1604 vex_printf("x0,x1 = casp(%dbit)(x2, x4,x5 -> x6,x7)", 8 * i->ARM64in.CASP.szB);
1605 return;
1607 case ARM64in_MFence:
1608 vex_printf("(mfence) dsb sy; dmb sy; isb");
1609 return;
1610 case ARM64in_ClrEX:
1611 vex_printf("clrex #15");
1612 return;
1613 case ARM64in_VLdStH:
1614 if (i->ARM64in.VLdStH.isLoad) {
1615 vex_printf("ldr ");
1616 ppHRegARM64asHreg(i->ARM64in.VLdStH.hD);
1617 vex_printf(", %u(", i->ARM64in.VLdStH.uimm12);
1618 ppHRegARM64(i->ARM64in.VLdStH.rN);
1619 vex_printf(")");
1620 } else {
1621 vex_printf("str ");
1622 vex_printf("%u(", i->ARM64in.VLdStH.uimm12);
1623 ppHRegARM64(i->ARM64in.VLdStH.rN);
1624 vex_printf("), ");
1625 ppHRegARM64asHreg(i->ARM64in.VLdStH.hD);
1627 return;
1628 case ARM64in_VLdStS:
1629 if (i->ARM64in.VLdStS.isLoad) {
1630 vex_printf("ldr ");
1631 ppHRegARM64asSreg(i->ARM64in.VLdStS.sD);
1632 vex_printf(", %u(", i->ARM64in.VLdStS.uimm12);
1633 ppHRegARM64(i->ARM64in.VLdStS.rN);
1634 vex_printf(")");
1635 } else {
1636 vex_printf("str ");
1637 vex_printf("%u(", i->ARM64in.VLdStS.uimm12);
1638 ppHRegARM64(i->ARM64in.VLdStS.rN);
1639 vex_printf("), ");
1640 ppHRegARM64asSreg(i->ARM64in.VLdStS.sD);
1642 return;
1643 case ARM64in_VLdStD:
1644 if (i->ARM64in.VLdStD.isLoad) {
1645 vex_printf("ldr ");
1646 ppHRegARM64(i->ARM64in.VLdStD.dD);
1647 vex_printf(", %u(", i->ARM64in.VLdStD.uimm12);
1648 ppHRegARM64(i->ARM64in.VLdStD.rN);
1649 vex_printf(")");
1650 } else {
1651 vex_printf("str ");
1652 vex_printf("%u(", i->ARM64in.VLdStD.uimm12);
1653 ppHRegARM64(i->ARM64in.VLdStD.rN);
1654 vex_printf("), ");
1655 ppHRegARM64(i->ARM64in.VLdStD.dD);
1657 return;
1658 case ARM64in_VLdStQ:
1659 if (i->ARM64in.VLdStQ.isLoad)
1660 vex_printf("ld1.2d {");
1661 else
1662 vex_printf("st1.2d {");
1663 ppHRegARM64(i->ARM64in.VLdStQ.rQ);
1664 vex_printf("}, [");
1665 ppHRegARM64(i->ARM64in.VLdStQ.rN);
1666 vex_printf("]");
1667 return;
1668 case ARM64in_VCvtI2F: {
1669 HChar syn = '?';
1670 UInt fszB = 0;
1671 UInt iszB = 0;
1672 characteriseARM64CvtOp(&syn, &fszB, &iszB, i->ARM64in.VCvtI2F.how);
1673 vex_printf("%ccvtf ", syn);
1674 ppHRegARM64(i->ARM64in.VCvtI2F.rD);
1675 vex_printf("(%c-reg), ", fszB == 4 ? 'S' : 'D');
1676 ppHRegARM64(i->ARM64in.VCvtI2F.rS);
1677 vex_printf("(%c-reg)", iszB == 4 ? 'W' : 'X');
1678 return;
1680 case ARM64in_VCvtF2I: {
1681 HChar syn = '?';
1682 UInt fszB = 0;
1683 UInt iszB = 0;
1684 HChar rmo = '?';
1685 characteriseARM64CvtOp(&syn, &fszB, &iszB, i->ARM64in.VCvtF2I.how);
1686 UChar armRM = i->ARM64in.VCvtF2I.armRM;
1687 if (armRM < 4) rmo = "npmz"[armRM];
1688 vex_printf("fcvt%c%c ", rmo, syn);
1689 ppHRegARM64(i->ARM64in.VCvtF2I.rD);
1690 vex_printf("(%c-reg), ", iszB == 4 ? 'W' : 'X');
1691 ppHRegARM64(i->ARM64in.VCvtF2I.rS);
1692 vex_printf("(%c-reg)", fszB == 4 ? 'S' : 'D');
1693 return;
1695 case ARM64in_VCvtSD:
1696 vex_printf("fcvt%s ", i->ARM64in.VCvtSD.sToD ? "s2d" : "d2s");
1697 if (i->ARM64in.VCvtSD.sToD) {
1698 ppHRegARM64(i->ARM64in.VCvtSD.dst);
1699 vex_printf(", ");
1700 ppHRegARM64asSreg(i->ARM64in.VCvtSD.src);
1701 } else {
1702 ppHRegARM64asSreg(i->ARM64in.VCvtSD.dst);
1703 vex_printf(", ");
1704 ppHRegARM64(i->ARM64in.VCvtSD.src);
1706 return;
1707 case ARM64in_VCvtHS:
1708 vex_printf("fcvt%s ", i->ARM64in.VCvtHS.hToS ? "h2s" : "s2h");
1709 if (i->ARM64in.VCvtHS.hToS) {
1710 ppHRegARM64asSreg(i->ARM64in.VCvtHS.dst);
1711 vex_printf(", ");
1712 ppHRegARM64asHreg(i->ARM64in.VCvtHS.src);
1713 } else {
1714 ppHRegARM64asHreg(i->ARM64in.VCvtHS.dst);
1715 vex_printf(", ");
1716 ppHRegARM64asSreg(i->ARM64in.VCvtHS.src);
1718 return;
1719 case ARM64in_VCvtHD:
1720 vex_printf("fcvt%s ", i->ARM64in.VCvtHD.hToD ? "h2d" : "d2h");
1721 if (i->ARM64in.VCvtHD.hToD) {
1722 ppHRegARM64(i->ARM64in.VCvtHD.dst);
1723 vex_printf(", ");
1724 ppHRegARM64asHreg(i->ARM64in.VCvtHD.src);
1725 } else {
1726 ppHRegARM64asHreg(i->ARM64in.VCvtHD.dst);
1727 vex_printf(", ");
1728 ppHRegARM64(i->ARM64in.VCvtHD.src);
1730 return;
1731 case ARM64in_VUnaryD:
1732 vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryD.op));
1733 ppHRegARM64(i->ARM64in.VUnaryD.dst);
1734 vex_printf(", ");
1735 ppHRegARM64(i->ARM64in.VUnaryD.src);
1736 return;
1737 case ARM64in_VUnaryS:
1738 vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryS.op));
1739 ppHRegARM64asSreg(i->ARM64in.VUnaryS.dst);
1740 vex_printf(", ");
1741 ppHRegARM64asSreg(i->ARM64in.VUnaryS.src);
1742 return;
1743 case ARM64in_VBinD:
1744 vex_printf("f%s ", showARM64FpBinOp(i->ARM64in.VBinD.op));
1745 ppHRegARM64(i->ARM64in.VBinD.dst);
1746 vex_printf(", ");
1747 ppHRegARM64(i->ARM64in.VBinD.argL);
1748 vex_printf(", ");
1749 ppHRegARM64(i->ARM64in.VBinD.argR);
1750 return;
1751 case ARM64in_VBinS:
1752 vex_printf("f%s ", showARM64FpBinOp(i->ARM64in.VBinS.op));
1753 ppHRegARM64asSreg(i->ARM64in.VBinS.dst);
1754 vex_printf(", ");
1755 ppHRegARM64asSreg(i->ARM64in.VBinS.argL);
1756 vex_printf(", ");
1757 ppHRegARM64asSreg(i->ARM64in.VBinS.argR);
1758 return;
1759 case ARM64in_VCmpD:
1760 vex_printf("fcmp ");
1761 ppHRegARM64(i->ARM64in.VCmpD.argL);
1762 vex_printf(", ");
1763 ppHRegARM64(i->ARM64in.VCmpD.argR);
1764 return;
1765 case ARM64in_VCmpS:
1766 vex_printf("fcmp ");
1767 ppHRegARM64asSreg(i->ARM64in.VCmpS.argL);
1768 vex_printf(", ");
1769 ppHRegARM64asSreg(i->ARM64in.VCmpS.argR);
1770 return;
1771 case ARM64in_VFCSel: {
1772 UInt (*ppHRegARM64fp)(HReg)
1773 = (i->ARM64in.VFCSel.isD ? ppHRegARM64 : ppHRegARM64asSreg);
1774 vex_printf("fcsel ");
1775 ppHRegARM64fp(i->ARM64in.VFCSel.dst);
1776 vex_printf(", ");
1777 ppHRegARM64fp(i->ARM64in.VFCSel.argL);
1778 vex_printf(", ");
1779 ppHRegARM64fp(i->ARM64in.VFCSel.argR);
1780 vex_printf(", %s", showARM64CondCode(i->ARM64in.VFCSel.cond));
1781 return;
1783 case ARM64in_FPCR:
1784 if (i->ARM64in.FPCR.toFPCR) {
1785 vex_printf("msr fpcr, ");
1786 ppHRegARM64(i->ARM64in.FPCR.iReg);
1787 } else {
1788 vex_printf("mrs ");
1789 ppHRegARM64(i->ARM64in.FPCR.iReg);
1790 vex_printf(", fpcr");
1792 return;
1793 case ARM64in_FPSR:
1794 if (i->ARM64in.FPSR.toFPSR) {
1795 vex_printf("msr fpsr, ");
1796 ppHRegARM64(i->ARM64in.FPSR.iReg);
1797 } else {
1798 vex_printf("mrs ");
1799 ppHRegARM64(i->ARM64in.FPSR.iReg);
1800 vex_printf(", fpsr");
1802 return;
1803 case ARM64in_VBinV: {
1804 const HChar* nm = "??";
1805 const HChar* ar = "??";
1806 showARM64VecBinOp(&nm, &ar, i->ARM64in.VBinV.op);
1807 vex_printf("%s ", nm);
1808 ppHRegARM64(i->ARM64in.VBinV.dst);
1809 vex_printf(".%s, ", ar);
1810 ppHRegARM64(i->ARM64in.VBinV.argL);
1811 vex_printf(".%s, ", ar);
1812 ppHRegARM64(i->ARM64in.VBinV.argR);
1813 vex_printf(".%s", ar);
1814 return;
1816 case ARM64in_VModifyV: {
1817 const HChar* nm = "??";
1818 const HChar* ar = "??";
1819 showARM64VecModifyOp(&nm, &ar, i->ARM64in.VModifyV.op);
1820 vex_printf("%s ", nm);
1821 ppHRegARM64(i->ARM64in.VModifyV.mod);
1822 vex_printf(".%s, ", ar);
1823 ppHRegARM64(i->ARM64in.VModifyV.arg);
1824 vex_printf(".%s", ar);
1825 return;
1827 case ARM64in_VUnaryV: {
1828 const HChar* nm = "??";
1829 const HChar* ar = "??";
1830 showARM64VecUnaryOp(&nm, &ar, i->ARM64in.VUnaryV.op);
1831 vex_printf("%s ", nm);
1832 ppHRegARM64(i->ARM64in.VUnaryV.dst);
1833 vex_printf(".%s, ", ar);
1834 ppHRegARM64(i->ARM64in.VUnaryV.arg);
1835 vex_printf(".%s", ar);
1836 return;
1838 case ARM64in_VNarrowV: {
1839 UInt dszBlg2 = i->ARM64in.VNarrowV.dszBlg2;
1840 const HChar* darr[3] = { "8b", "4h", "2s" };
1841 const HChar* sarr[3] = { "8h", "4s", "2d" };
1842 const HChar* nm = showARM64VecNarrowOp(i->ARM64in.VNarrowV.op);
1843 vex_printf("%s ", nm);
1844 ppHRegARM64(i->ARM64in.VNarrowV.dst);
1845 vex_printf(".%s, ", dszBlg2 < 3 ? darr[dszBlg2] : "??");
1846 ppHRegARM64(i->ARM64in.VNarrowV.src);
1847 vex_printf(".%s", dszBlg2 < 3 ? sarr[dszBlg2] : "??");
1848 return;
1850 case ARM64in_VShiftImmV: {
1851 const HChar* nm = "??";
1852 const HChar* ar = "??";
1853 showARM64VecShiftImmOp(&nm, &ar, i->ARM64in.VShiftImmV.op);
1854 vex_printf("%s ", nm);
1855 ppHRegARM64(i->ARM64in.VShiftImmV.dst);
1856 vex_printf(".%s, ", ar);
1857 ppHRegARM64(i->ARM64in.VShiftImmV.src);
1858 vex_printf(".%s, #%u", ar, i->ARM64in.VShiftImmV.amt);
1859 return;
1861 case ARM64in_VExtV: {
1862 vex_printf("ext ");
1863 ppHRegARM64(i->ARM64in.VExtV.dst);
1864 vex_printf(".16b, ");
1865 ppHRegARM64(i->ARM64in.VExtV.srcLo);
1866 vex_printf(".16b, ");
1867 ppHRegARM64(i->ARM64in.VExtV.srcHi);
1868 vex_printf(".16b, #%u", i->ARM64in.VExtV.amtB);
1869 return;
1871 case ARM64in_VImmQ:
1872 vex_printf("qimm ");
1873 ppHRegARM64(i->ARM64in.VImmQ.rQ);
1874 vex_printf(", Bits16toBytes16(0x%x)", (UInt)i->ARM64in.VImmQ.imm);
1875 return;
1876 case ARM64in_VDfromX:
1877 vex_printf("fmov ");
1878 ppHRegARM64(i->ARM64in.VDfromX.rD);
1879 vex_printf(", ");
1880 ppHRegARM64(i->ARM64in.VDfromX.rX);
1881 return;
1882 case ARM64in_VQfromX:
1883 vex_printf("fmov ");
1884 ppHRegARM64(i->ARM64in.VQfromX.rQ);
1885 vex_printf(".d[0], ");
1886 ppHRegARM64(i->ARM64in.VQfromX.rXlo);
1887 return;
1888 case ARM64in_VQfromXX:
1889 vex_printf("qFromXX ");
1890 ppHRegARM64(i->ARM64in.VQfromXX.rQ);
1891 vex_printf(", ");
1892 ppHRegARM64(i->ARM64in.VQfromXX.rXhi);
1893 vex_printf(", ");
1894 ppHRegARM64(i->ARM64in.VQfromXX.rXlo);
1895 return;
1896 case ARM64in_VXfromQ:
1897 vex_printf("fmov ");
1898 ppHRegARM64(i->ARM64in.VXfromQ.rX);
1899 vex_printf(", ");
1900 ppHRegARM64(i->ARM64in.VXfromQ.rQ);
1901 vex_printf(".d[%u]", i->ARM64in.VXfromQ.laneNo);
1902 return;
1903 case ARM64in_VXfromDorS:
1904 vex_printf("fmov ");
1905 ppHRegARM64(i->ARM64in.VXfromDorS.rX);
1906 vex_printf("(%c-reg), ", i->ARM64in.VXfromDorS.fromD ? 'X':'W');
1907 ppHRegARM64(i->ARM64in.VXfromDorS.rDorS);
1908 vex_printf("(%c-reg)", i->ARM64in.VXfromDorS.fromD ? 'D' : 'S');
1909 return;
1910 case ARM64in_VMov: {
1911 UChar aux = '?';
1912 switch (i->ARM64in.VMov.szB) {
1913 case 16: aux = 'q'; break;
1914 case 8: aux = 'd'; break;
1915 case 4: aux = 's'; break;
1916 default: break;
1918 vex_printf("mov(%c) ", aux);
1919 ppHRegARM64(i->ARM64in.VMov.dst);
1920 vex_printf(", ");
1921 ppHRegARM64(i->ARM64in.VMov.src);
1922 return;
1924 case ARM64in_EvCheck:
1925 vex_printf("(evCheck) ldr w9,");
1926 ppARM64AMode(i->ARM64in.EvCheck.amCounter);
1927 vex_printf("; subs w9,w9,$1; str w9,");
1928 ppARM64AMode(i->ARM64in.EvCheck.amCounter);
1929 vex_printf("; bpl nofail; ldr x9,");
1930 ppARM64AMode(i->ARM64in.EvCheck.amFailAddr);
1931 vex_printf("; br x9; nofail:");
1932 return;
1933 case ARM64in_ProfInc:
1934 vex_printf("(profInc) imm64-fixed4 x9,$NotKnownYet; "
1935 "ldr x8,[x9]; add x8,x8,#1, str x8,[x9]");
1936 return;
1937 default:
1938 vex_printf("ppARM64Instr: unhandled case (tag %d)", (Int)i->tag);
1939 vpanic("ppARM64Instr(1)");
1940 return;
1945 /* --------- Helpers for register allocation. --------- */
1947 void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 )
1949 vassert(mode64 == True);
1950 initHRegUsage(u);
1951 switch (i->tag) {
1952 case ARM64in_Arith:
1953 addHRegUse(u, HRmWrite, i->ARM64in.Arith.dst);
1954 addHRegUse(u, HRmRead, i->ARM64in.Arith.argL);
1955 addRegUsage_ARM64RIA(u, i->ARM64in.Arith.argR);
1956 return;
1957 case ARM64in_Cmp:
1958 addHRegUse(u, HRmRead, i->ARM64in.Cmp.argL);
1959 addRegUsage_ARM64RIA(u, i->ARM64in.Cmp.argR);
1960 return;
1961 case ARM64in_Logic:
1962 addHRegUse(u, HRmWrite, i->ARM64in.Logic.dst);
1963 addHRegUse(u, HRmRead, i->ARM64in.Logic.argL);
1964 addRegUsage_ARM64RIL(u, i->ARM64in.Logic.argR);
1965 return;
1966 case ARM64in_Test:
1967 addHRegUse(u, HRmRead, i->ARM64in.Test.argL);
1968 addRegUsage_ARM64RIL(u, i->ARM64in.Test.argR);
1969 return;
1970 case ARM64in_Shift:
1971 addHRegUse(u, HRmWrite, i->ARM64in.Shift.dst);
1972 addHRegUse(u, HRmRead, i->ARM64in.Shift.argL);
1973 addRegUsage_ARM64RI6(u, i->ARM64in.Shift.argR);
1974 return;
1975 case ARM64in_Unary:
1976 addHRegUse(u, HRmWrite, i->ARM64in.Unary.dst);
1977 addHRegUse(u, HRmRead, i->ARM64in.Unary.src);
1978 return;
1979 case ARM64in_Set64:
1980 addHRegUse(u, HRmWrite, i->ARM64in.Set64.dst);
1981 return;
1982 case ARM64in_MovI:
1983 addHRegUse(u, HRmWrite, i->ARM64in.MovI.dst);
1984 addHRegUse(u, HRmRead, i->ARM64in.MovI.src);
1985 u->isRegRegMove = True;
1986 u->regMoveSrc = i->ARM64in.MovI.src;
1987 u->regMoveDst = i->ARM64in.MovI.dst;
1988 return;
1989 case ARM64in_Imm64:
1990 addHRegUse(u, HRmWrite, i->ARM64in.Imm64.dst);
1991 return;
1992 case ARM64in_LdSt64:
1993 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt64.amode);
1994 if (i->ARM64in.LdSt64.isLoad) {
1995 addHRegUse(u, HRmWrite, i->ARM64in.LdSt64.rD);
1996 } else {
1997 addHRegUse(u, HRmRead, i->ARM64in.LdSt64.rD);
1999 return;
2000 case ARM64in_LdSt32:
2001 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt32.amode);
2002 if (i->ARM64in.LdSt32.isLoad) {
2003 addHRegUse(u, HRmWrite, i->ARM64in.LdSt32.rD);
2004 } else {
2005 addHRegUse(u, HRmRead, i->ARM64in.LdSt32.rD);
2007 return;
2008 case ARM64in_LdSt16:
2009 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt16.amode);
2010 if (i->ARM64in.LdSt16.isLoad) {
2011 addHRegUse(u, HRmWrite, i->ARM64in.LdSt16.rD);
2012 } else {
2013 addHRegUse(u, HRmRead, i->ARM64in.LdSt16.rD);
2015 return;
2016 case ARM64in_LdSt8:
2017 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt8.amode);
2018 if (i->ARM64in.LdSt8.isLoad) {
2019 addHRegUse(u, HRmWrite, i->ARM64in.LdSt8.rD);
2020 } else {
2021 addHRegUse(u, HRmRead, i->ARM64in.LdSt8.rD);
2023 return;
2024 /* XDirect/XIndir/XAssisted are also a bit subtle. They
2025 conditionally exit the block. Hence we only need to list (1)
2026 the registers that they read, and (2) the registers that they
2027 write in the case where the block is not exited. (2) is
2028 empty, hence only (1) is relevant here. */
2029 case ARM64in_XDirect:
2030 addRegUsage_ARM64AMode(u, i->ARM64in.XDirect.amPC);
2031 return;
2032 case ARM64in_XIndir:
2033 addHRegUse(u, HRmRead, i->ARM64in.XIndir.dstGA);
2034 addRegUsage_ARM64AMode(u, i->ARM64in.XIndir.amPC);
2035 return;
2036 case ARM64in_XAssisted:
2037 addHRegUse(u, HRmRead, i->ARM64in.XAssisted.dstGA);
2038 addRegUsage_ARM64AMode(u, i->ARM64in.XAssisted.amPC);
2039 return;
2040 case ARM64in_CSel:
2041 addHRegUse(u, HRmWrite, i->ARM64in.CSel.dst);
2042 addHRegUse(u, HRmRead, i->ARM64in.CSel.argL);
2043 addHRegUse(u, HRmRead, i->ARM64in.CSel.argR);
2044 return;
2045 case ARM64in_Call:
2046 /* logic and comments copied/modified from x86 back end */
2047 /* This is a bit subtle. */
2048 /* First off, claim it trashes all the caller-saved regs
2049 which fall within the register allocator's jurisdiction.
2050 These I believe to be x0 to x7 and the 128-bit vector
2051 registers in use, q16 .. q20. */
2052 addHRegUse(u, HRmWrite, hregARM64_X0());
2053 addHRegUse(u, HRmWrite, hregARM64_X1());
2054 addHRegUse(u, HRmWrite, hregARM64_X2());
2055 addHRegUse(u, HRmWrite, hregARM64_X3());
2056 addHRegUse(u, HRmWrite, hregARM64_X4());
2057 addHRegUse(u, HRmWrite, hregARM64_X5());
2058 addHRegUse(u, HRmWrite, hregARM64_X6());
2059 addHRegUse(u, HRmWrite, hregARM64_X7());
2060 addHRegUse(u, HRmWrite, hregARM64_Q16());
2061 addHRegUse(u, HRmWrite, hregARM64_Q17());
2062 addHRegUse(u, HRmWrite, hregARM64_Q18());
2063 addHRegUse(u, HRmWrite, hregARM64_Q19());
2064 addHRegUse(u, HRmWrite, hregARM64_Q20());
2065 /* Now we have to state any parameter-carrying registers
2066 which might be read. This depends on nArgRegs. */
2067 switch (i->ARM64in.Call.nArgRegs) {
2068 case 8: addHRegUse(u, HRmRead, hregARM64_X7()); /*fallthru*/
2069 case 7: addHRegUse(u, HRmRead, hregARM64_X6()); /*fallthru*/
2070 case 6: addHRegUse(u, HRmRead, hregARM64_X5()); /*fallthru*/
2071 case 5: addHRegUse(u, HRmRead, hregARM64_X4()); /*fallthru*/
2072 case 4: addHRegUse(u, HRmRead, hregARM64_X3()); /*fallthru*/
2073 case 3: addHRegUse(u, HRmRead, hregARM64_X2()); /*fallthru*/
2074 case 2: addHRegUse(u, HRmRead, hregARM64_X1()); /*fallthru*/
2075 case 1: addHRegUse(u, HRmRead, hregARM64_X0()); break;
2076 case 0: break;
2077 default: vpanic("getRegUsage_ARM64:Call:regparms");
2079 /* Finally, there is the issue that the insn trashes a
2080 register because the literal target address has to be
2081 loaded into a register. However, we reserve x9 for that
2082 purpose so there's no further complexity here. Stating x9
2083 as trashed is pointless since it's not under the control
2084 of the allocator, but what the hell. */
2085 addHRegUse(u, HRmWrite, hregARM64_X9());
2086 return;
2087 case ARM64in_AddToSP:
2088 /* Only changes SP, but regalloc doesn't control that, hence
2089 we don't care. */
2090 return;
2091 case ARM64in_FromSP:
2092 addHRegUse(u, HRmWrite, i->ARM64in.FromSP.dst);
2093 return;
2094 case ARM64in_Mul:
2095 addHRegUse(u, HRmWrite, i->ARM64in.Mul.dst);
2096 addHRegUse(u, HRmRead, i->ARM64in.Mul.argL);
2097 addHRegUse(u, HRmRead, i->ARM64in.Mul.argR);
2098 return;
2099 case ARM64in_LdrEX:
2100 addHRegUse(u, HRmRead, hregARM64_X4());
2101 addHRegUse(u, HRmWrite, hregARM64_X2());
2102 return;
2103 case ARM64in_StrEX:
2104 addHRegUse(u, HRmRead, hregARM64_X4());
2105 addHRegUse(u, HRmWrite, hregARM64_X0());
2106 addHRegUse(u, HRmRead, hregARM64_X2());
2107 return;
2108 case ARM64in_CAS:
2109 addHRegUse(u, HRmRead, hregARM64_X3());
2110 addHRegUse(u, HRmRead, hregARM64_X5());
2111 addHRegUse(u, HRmRead, hregARM64_X7());
2112 addHRegUse(u, HRmWrite, hregARM64_X1());
2113 /* Pointless to state this since X8 is not available to RA. */
2114 addHRegUse(u, HRmWrite, hregARM64_X8());
2115 break;
2116 case ARM64in_CASP:
2117 addHRegUse(u, HRmRead, hregARM64_X2());
2118 addHRegUse(u, HRmRead, hregARM64_X4());
2119 addHRegUse(u, HRmRead, hregARM64_X5());
2120 addHRegUse(u, HRmRead, hregARM64_X6());
2121 addHRegUse(u, HRmRead, hregARM64_X7());
2122 addHRegUse(u, HRmWrite, hregARM64_X0());
2123 addHRegUse(u, HRmWrite, hregARM64_X1());
2124 addHRegUse(u, HRmWrite, hregARM64_X9());
2125 addHRegUse(u, HRmWrite, hregARM64_X8());
2126 break;
2127 case ARM64in_MFence:
2128 return;
2129 case ARM64in_ClrEX:
2130 return;
2131 case ARM64in_VLdStH:
2132 addHRegUse(u, HRmRead, i->ARM64in.VLdStH.rN);
2133 if (i->ARM64in.VLdStH.isLoad) {
2134 addHRegUse(u, HRmWrite, i->ARM64in.VLdStH.hD);
2135 } else {
2136 addHRegUse(u, HRmRead, i->ARM64in.VLdStH.hD);
2138 return;
2139 case ARM64in_VLdStS:
2140 addHRegUse(u, HRmRead, i->ARM64in.VLdStS.rN);
2141 if (i->ARM64in.VLdStS.isLoad) {
2142 addHRegUse(u, HRmWrite, i->ARM64in.VLdStS.sD);
2143 } else {
2144 addHRegUse(u, HRmRead, i->ARM64in.VLdStS.sD);
2146 return;
2147 case ARM64in_VLdStD:
2148 addHRegUse(u, HRmRead, i->ARM64in.VLdStD.rN);
2149 if (i->ARM64in.VLdStD.isLoad) {
2150 addHRegUse(u, HRmWrite, i->ARM64in.VLdStD.dD);
2151 } else {
2152 addHRegUse(u, HRmRead, i->ARM64in.VLdStD.dD);
2154 return;
2155 case ARM64in_VLdStQ:
2156 addHRegUse(u, HRmRead, i->ARM64in.VLdStQ.rN);
2157 if (i->ARM64in.VLdStQ.isLoad)
2158 addHRegUse(u, HRmWrite, i->ARM64in.VLdStQ.rQ);
2159 else
2160 addHRegUse(u, HRmRead, i->ARM64in.VLdStQ.rQ);
2161 return;
2162 case ARM64in_VCvtI2F:
2163 addHRegUse(u, HRmRead, i->ARM64in.VCvtI2F.rS);
2164 addHRegUse(u, HRmWrite, i->ARM64in.VCvtI2F.rD);
2165 return;
2166 case ARM64in_VCvtF2I:
2167 addHRegUse(u, HRmRead, i->ARM64in.VCvtF2I.rS);
2168 addHRegUse(u, HRmWrite, i->ARM64in.VCvtF2I.rD);
2169 return;
2170 case ARM64in_VCvtSD:
2171 addHRegUse(u, HRmWrite, i->ARM64in.VCvtSD.dst);
2172 addHRegUse(u, HRmRead, i->ARM64in.VCvtSD.src);
2173 return;
2174 case ARM64in_VCvtHS:
2175 addHRegUse(u, HRmWrite, i->ARM64in.VCvtHS.dst);
2176 addHRegUse(u, HRmRead, i->ARM64in.VCvtHS.src);
2177 return;
2178 case ARM64in_VCvtHD:
2179 addHRegUse(u, HRmWrite, i->ARM64in.VCvtHD.dst);
2180 addHRegUse(u, HRmRead, i->ARM64in.VCvtHD.src);
2181 return;
2182 case ARM64in_VUnaryD:
2183 addHRegUse(u, HRmWrite, i->ARM64in.VUnaryD.dst);
2184 addHRegUse(u, HRmRead, i->ARM64in.VUnaryD.src);
2185 return;
2186 case ARM64in_VUnaryS:
2187 addHRegUse(u, HRmWrite, i->ARM64in.VUnaryS.dst);
2188 addHRegUse(u, HRmRead, i->ARM64in.VUnaryS.src);
2189 return;
2190 case ARM64in_VBinD:
2191 addHRegUse(u, HRmWrite, i->ARM64in.VBinD.dst);
2192 addHRegUse(u, HRmRead, i->ARM64in.VBinD.argL);
2193 addHRegUse(u, HRmRead, i->ARM64in.VBinD.argR);
2194 return;
2195 case ARM64in_VBinS:
2196 addHRegUse(u, HRmWrite, i->ARM64in.VBinS.dst);
2197 addHRegUse(u, HRmRead, i->ARM64in.VBinS.argL);
2198 addHRegUse(u, HRmRead, i->ARM64in.VBinS.argR);
2199 return;
2200 case ARM64in_VCmpD:
2201 addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argL);
2202 addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argR);
2203 return;
2204 case ARM64in_VCmpS:
2205 addHRegUse(u, HRmRead, i->ARM64in.VCmpS.argL);
2206 addHRegUse(u, HRmRead, i->ARM64in.VCmpS.argR);
2207 return;
2208 case ARM64in_VFCSel:
2209 addHRegUse(u, HRmRead, i->ARM64in.VFCSel.argL);
2210 addHRegUse(u, HRmRead, i->ARM64in.VFCSel.argR);
2211 addHRegUse(u, HRmWrite, i->ARM64in.VFCSel.dst);
2212 return;
2213 case ARM64in_FPCR:
2214 if (i->ARM64in.FPCR.toFPCR)
2215 addHRegUse(u, HRmRead, i->ARM64in.FPCR.iReg);
2216 else
2217 addHRegUse(u, HRmWrite, i->ARM64in.FPCR.iReg);
2218 return;
2219 case ARM64in_FPSR:
2220 if (i->ARM64in.FPSR.toFPSR)
2221 addHRegUse(u, HRmRead, i->ARM64in.FPSR.iReg);
2222 else
2223 addHRegUse(u, HRmWrite, i->ARM64in.FPSR.iReg);
2224 return;
2225 case ARM64in_VBinV:
2226 addHRegUse(u, HRmWrite, i->ARM64in.VBinV.dst);
2227 addHRegUse(u, HRmRead, i->ARM64in.VBinV.argL);
2228 addHRegUse(u, HRmRead, i->ARM64in.VBinV.argR);
2229 return;
2230 case ARM64in_VModifyV:
2231 addHRegUse(u, HRmWrite, i->ARM64in.VModifyV.mod);
2232 addHRegUse(u, HRmRead, i->ARM64in.VModifyV.mod);
2233 addHRegUse(u, HRmRead, i->ARM64in.VModifyV.arg);
2234 return;
2235 case ARM64in_VUnaryV:
2236 addHRegUse(u, HRmWrite, i->ARM64in.VUnaryV.dst);
2237 addHRegUse(u, HRmRead, i->ARM64in.VUnaryV.arg);
2238 return;
2239 case ARM64in_VNarrowV:
2240 addHRegUse(u, HRmWrite, i->ARM64in.VNarrowV.dst);
2241 addHRegUse(u, HRmRead, i->ARM64in.VNarrowV.src);
2242 return;
2243 case ARM64in_VShiftImmV:
2244 addHRegUse(u, HRmWrite, i->ARM64in.VShiftImmV.dst);
2245 addHRegUse(u, HRmRead, i->ARM64in.VShiftImmV.src);
2246 return;
2247 case ARM64in_VExtV:
2248 addHRegUse(u, HRmWrite, i->ARM64in.VExtV.dst);
2249 addHRegUse(u, HRmRead, i->ARM64in.VExtV.srcLo);
2250 addHRegUse(u, HRmRead, i->ARM64in.VExtV.srcHi);
2251 return;
2252 case ARM64in_VImmQ:
2253 addHRegUse(u, HRmWrite, i->ARM64in.VImmQ.rQ);
2254 return;
2255 case ARM64in_VDfromX:
2256 addHRegUse(u, HRmWrite, i->ARM64in.VDfromX.rD);
2257 addHRegUse(u, HRmRead, i->ARM64in.VDfromX.rX);
2258 return;
2259 case ARM64in_VQfromX:
2260 addHRegUse(u, HRmWrite, i->ARM64in.VQfromX.rQ);
2261 addHRegUse(u, HRmRead, i->ARM64in.VQfromX.rXlo);
2262 return;
2263 case ARM64in_VQfromXX:
2264 addHRegUse(u, HRmWrite, i->ARM64in.VQfromXX.rQ);
2265 addHRegUse(u, HRmRead, i->ARM64in.VQfromXX.rXhi);
2266 addHRegUse(u, HRmRead, i->ARM64in.VQfromXX.rXlo);
2267 return;
2268 case ARM64in_VXfromQ:
2269 addHRegUse(u, HRmWrite, i->ARM64in.VXfromQ.rX);
2270 addHRegUse(u, HRmRead, i->ARM64in.VXfromQ.rQ);
2271 return;
2272 case ARM64in_VXfromDorS:
2273 addHRegUse(u, HRmWrite, i->ARM64in.VXfromDorS.rX);
2274 addHRegUse(u, HRmRead, i->ARM64in.VXfromDorS.rDorS);
2275 return;
2276 case ARM64in_VMov:
2277 addHRegUse(u, HRmWrite, i->ARM64in.VMov.dst);
2278 addHRegUse(u, HRmRead, i->ARM64in.VMov.src);
2279 u->isRegRegMove = True;
2280 u->regMoveSrc = i->ARM64in.VMov.src;
2281 u->regMoveDst = i->ARM64in.VMov.dst;
2282 return;
2283 case ARM64in_EvCheck:
2284 /* We expect both amodes only to mention x21, so this is in
2285 fact pointless, since x21 isn't allocatable, but
2286 anyway.. */
2287 addRegUsage_ARM64AMode(u, i->ARM64in.EvCheck.amCounter);
2288 addRegUsage_ARM64AMode(u, i->ARM64in.EvCheck.amFailAddr);
2289 addHRegUse(u, HRmWrite, hregARM64_X9()); /* also unavail to RA */
2290 return;
2291 case ARM64in_ProfInc:
2292 /* Again, pointless to actually state these since neither
2293 is available to RA. */
2294 addHRegUse(u, HRmWrite, hregARM64_X9()); /* unavail to RA */
2295 addHRegUse(u, HRmWrite, hregARM64_X8()); /* unavail to RA */
2296 return;
2297 default:
2298 ppARM64Instr(i);
2299 vpanic("getRegUsage_ARM64Instr");
2304 void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 )
2306 vassert(mode64 == True);
2307 switch (i->tag) {
2308 case ARM64in_Arith:
2309 i->ARM64in.Arith.dst = lookupHRegRemap(m, i->ARM64in.Arith.dst);
2310 i->ARM64in.Arith.argL = lookupHRegRemap(m, i->ARM64in.Arith.argL);
2311 mapRegs_ARM64RIA(m, i->ARM64in.Arith.argR);
2312 return;
2313 case ARM64in_Cmp:
2314 i->ARM64in.Cmp.argL = lookupHRegRemap(m, i->ARM64in.Cmp.argL);
2315 mapRegs_ARM64RIA(m, i->ARM64in.Cmp.argR);
2316 return;
2317 case ARM64in_Logic:
2318 i->ARM64in.Logic.dst = lookupHRegRemap(m, i->ARM64in.Logic.dst);
2319 i->ARM64in.Logic.argL = lookupHRegRemap(m, i->ARM64in.Logic.argL);
2320 mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR);
2321 return;
2322 case ARM64in_Test:
2323 i->ARM64in.Test.argL = lookupHRegRemap(m, i->ARM64in.Test.argL);
2324 mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR);
2325 return;
2326 case ARM64in_Shift:
2327 i->ARM64in.Shift.dst = lookupHRegRemap(m, i->ARM64in.Shift.dst);
2328 i->ARM64in.Shift.argL = lookupHRegRemap(m, i->ARM64in.Shift.argL);
2329 mapRegs_ARM64RI6(m, i->ARM64in.Shift.argR);
2330 return;
2331 case ARM64in_Unary:
2332 i->ARM64in.Unary.dst = lookupHRegRemap(m, i->ARM64in.Unary.dst);
2333 i->ARM64in.Unary.src = lookupHRegRemap(m, i->ARM64in.Unary.src);
2334 return;
2335 case ARM64in_Set64:
2336 i->ARM64in.Set64.dst = lookupHRegRemap(m, i->ARM64in.Set64.dst);
2337 return;
2338 case ARM64in_MovI:
2339 i->ARM64in.MovI.dst = lookupHRegRemap(m, i->ARM64in.MovI.dst);
2340 i->ARM64in.MovI.src = lookupHRegRemap(m, i->ARM64in.MovI.src);
2341 return;
2342 case ARM64in_Imm64:
2343 i->ARM64in.Imm64.dst = lookupHRegRemap(m, i->ARM64in.Imm64.dst);
2344 return;
2345 case ARM64in_LdSt64:
2346 i->ARM64in.LdSt64.rD = lookupHRegRemap(m, i->ARM64in.LdSt64.rD);
2347 mapRegs_ARM64AMode(m, i->ARM64in.LdSt64.amode);
2348 return;
2349 case ARM64in_LdSt32:
2350 i->ARM64in.LdSt32.rD = lookupHRegRemap(m, i->ARM64in.LdSt32.rD);
2351 mapRegs_ARM64AMode(m, i->ARM64in.LdSt32.amode);
2352 return;
2353 case ARM64in_LdSt16:
2354 i->ARM64in.LdSt16.rD = lookupHRegRemap(m, i->ARM64in.LdSt16.rD);
2355 mapRegs_ARM64AMode(m, i->ARM64in.LdSt16.amode);
2356 return;
2357 case ARM64in_LdSt8:
2358 i->ARM64in.LdSt8.rD = lookupHRegRemap(m, i->ARM64in.LdSt8.rD);
2359 mapRegs_ARM64AMode(m, i->ARM64in.LdSt8.amode);
2360 return;
2361 case ARM64in_XDirect:
2362 mapRegs_ARM64AMode(m, i->ARM64in.XDirect.amPC);
2363 return;
2364 case ARM64in_XIndir:
2365 i->ARM64in.XIndir.dstGA
2366 = lookupHRegRemap(m, i->ARM64in.XIndir.dstGA);
2367 mapRegs_ARM64AMode(m, i->ARM64in.XIndir.amPC);
2368 return;
2369 case ARM64in_XAssisted:
2370 i->ARM64in.XAssisted.dstGA
2371 = lookupHRegRemap(m, i->ARM64in.XAssisted.dstGA);
2372 mapRegs_ARM64AMode(m, i->ARM64in.XAssisted.amPC);
2373 return;
2374 case ARM64in_CSel:
2375 i->ARM64in.CSel.dst = lookupHRegRemap(m, i->ARM64in.CSel.dst);
2376 i->ARM64in.CSel.argL = lookupHRegRemap(m, i->ARM64in.CSel.argL);
2377 i->ARM64in.CSel.argR = lookupHRegRemap(m, i->ARM64in.CSel.argR);
2378 return;
2379 case ARM64in_Call:
2380 return;
2381 case ARM64in_AddToSP:
2382 return;
2383 case ARM64in_FromSP:
2384 i->ARM64in.FromSP.dst = lookupHRegRemap(m, i->ARM64in.FromSP.dst);
2385 return;
2386 case ARM64in_Mul:
2387 i->ARM64in.Mul.dst = lookupHRegRemap(m, i->ARM64in.Mul.dst);
2388 i->ARM64in.Mul.argL = lookupHRegRemap(m, i->ARM64in.Mul.argL);
2389 i->ARM64in.Mul.argR = lookupHRegRemap(m, i->ARM64in.Mul.argR);
2390 break;
2391 case ARM64in_LdrEX:
2392 return;
2393 case ARM64in_StrEX:
2394 return;
2395 case ARM64in_CAS:
2396 return;
2397 case ARM64in_CASP:
2398 return;
2399 case ARM64in_MFence:
2400 return;
2401 case ARM64in_ClrEX:
2402 return;
2403 case ARM64in_VLdStH:
2404 i->ARM64in.VLdStH.hD = lookupHRegRemap(m, i->ARM64in.VLdStH.hD);
2405 i->ARM64in.VLdStH.rN = lookupHRegRemap(m, i->ARM64in.VLdStH.rN);
2406 return;
2407 case ARM64in_VLdStS:
2408 i->ARM64in.VLdStS.sD = lookupHRegRemap(m, i->ARM64in.VLdStS.sD);
2409 i->ARM64in.VLdStS.rN = lookupHRegRemap(m, i->ARM64in.VLdStS.rN);
2410 return;
2411 case ARM64in_VLdStD:
2412 i->ARM64in.VLdStD.dD = lookupHRegRemap(m, i->ARM64in.VLdStD.dD);
2413 i->ARM64in.VLdStD.rN = lookupHRegRemap(m, i->ARM64in.VLdStD.rN);
2414 return;
2415 case ARM64in_VLdStQ:
2416 i->ARM64in.VLdStQ.rQ = lookupHRegRemap(m, i->ARM64in.VLdStQ.rQ);
2417 i->ARM64in.VLdStQ.rN = lookupHRegRemap(m, i->ARM64in.VLdStQ.rN);
2418 return;
2419 case ARM64in_VCvtI2F:
2420 i->ARM64in.VCvtI2F.rS = lookupHRegRemap(m, i->ARM64in.VCvtI2F.rS);
2421 i->ARM64in.VCvtI2F.rD = lookupHRegRemap(m, i->ARM64in.VCvtI2F.rD);
2422 return;
2423 case ARM64in_VCvtF2I:
2424 i->ARM64in.VCvtF2I.rS = lookupHRegRemap(m, i->ARM64in.VCvtF2I.rS);
2425 i->ARM64in.VCvtF2I.rD = lookupHRegRemap(m, i->ARM64in.VCvtF2I.rD);
2426 return;
2427 case ARM64in_VCvtSD:
2428 i->ARM64in.VCvtSD.dst = lookupHRegRemap(m, i->ARM64in.VCvtSD.dst);
2429 i->ARM64in.VCvtSD.src = lookupHRegRemap(m, i->ARM64in.VCvtSD.src);
2430 return;
2431 case ARM64in_VCvtHS:
2432 i->ARM64in.VCvtHS.dst = lookupHRegRemap(m, i->ARM64in.VCvtHS.dst);
2433 i->ARM64in.VCvtHS.src = lookupHRegRemap(m, i->ARM64in.VCvtHS.src);
2434 return;
2435 case ARM64in_VCvtHD:
2436 i->ARM64in.VCvtHD.dst = lookupHRegRemap(m, i->ARM64in.VCvtHD.dst);
2437 i->ARM64in.VCvtHD.src = lookupHRegRemap(m, i->ARM64in.VCvtHD.src);
2438 return;
2439 case ARM64in_VUnaryD:
2440 i->ARM64in.VUnaryD.dst = lookupHRegRemap(m, i->ARM64in.VUnaryD.dst);
2441 i->ARM64in.VUnaryD.src = lookupHRegRemap(m, i->ARM64in.VUnaryD.src);
2442 return;
2443 case ARM64in_VUnaryS:
2444 i->ARM64in.VUnaryS.dst = lookupHRegRemap(m, i->ARM64in.VUnaryS.dst);
2445 i->ARM64in.VUnaryS.src = lookupHRegRemap(m, i->ARM64in.VUnaryS.src);
2446 return;
2447 case ARM64in_VBinD:
2448 i->ARM64in.VBinD.dst = lookupHRegRemap(m, i->ARM64in.VBinD.dst);
2449 i->ARM64in.VBinD.argL = lookupHRegRemap(m, i->ARM64in.VBinD.argL);
2450 i->ARM64in.VBinD.argR = lookupHRegRemap(m, i->ARM64in.VBinD.argR);
2451 return;
2452 case ARM64in_VBinS:
2453 i->ARM64in.VBinS.dst = lookupHRegRemap(m, i->ARM64in.VBinS.dst);
2454 i->ARM64in.VBinS.argL = lookupHRegRemap(m, i->ARM64in.VBinS.argL);
2455 i->ARM64in.VBinS.argR = lookupHRegRemap(m, i->ARM64in.VBinS.argR);
2456 return;
2457 case ARM64in_VCmpD:
2458 i->ARM64in.VCmpD.argL = lookupHRegRemap(m, i->ARM64in.VCmpD.argL);
2459 i->ARM64in.VCmpD.argR = lookupHRegRemap(m, i->ARM64in.VCmpD.argR);
2460 return;
2461 case ARM64in_VCmpS:
2462 i->ARM64in.VCmpS.argL = lookupHRegRemap(m, i->ARM64in.VCmpS.argL);
2463 i->ARM64in.VCmpS.argR = lookupHRegRemap(m, i->ARM64in.VCmpS.argR);
2464 return;
2465 case ARM64in_VFCSel:
2466 i->ARM64in.VFCSel.argL = lookupHRegRemap(m, i->ARM64in.VFCSel.argL);
2467 i->ARM64in.VFCSel.argR = lookupHRegRemap(m, i->ARM64in.VFCSel.argR);
2468 i->ARM64in.VFCSel.dst = lookupHRegRemap(m, i->ARM64in.VFCSel.dst);
2469 return;
2470 case ARM64in_FPCR:
2471 i->ARM64in.FPCR.iReg = lookupHRegRemap(m, i->ARM64in.FPCR.iReg);
2472 return;
2473 case ARM64in_FPSR:
2474 i->ARM64in.FPSR.iReg = lookupHRegRemap(m, i->ARM64in.FPSR.iReg);
2475 return;
2476 case ARM64in_VBinV:
2477 i->ARM64in.VBinV.dst = lookupHRegRemap(m, i->ARM64in.VBinV.dst);
2478 i->ARM64in.VBinV.argL = lookupHRegRemap(m, i->ARM64in.VBinV.argL);
2479 i->ARM64in.VBinV.argR = lookupHRegRemap(m, i->ARM64in.VBinV.argR);
2480 return;
2481 case ARM64in_VModifyV:
2482 i->ARM64in.VModifyV.mod = lookupHRegRemap(m, i->ARM64in.VModifyV.mod);
2483 i->ARM64in.VModifyV.arg = lookupHRegRemap(m, i->ARM64in.VModifyV.arg);
2484 return;
2485 case ARM64in_VUnaryV:
2486 i->ARM64in.VUnaryV.dst = lookupHRegRemap(m, i->ARM64in.VUnaryV.dst);
2487 i->ARM64in.VUnaryV.arg = lookupHRegRemap(m, i->ARM64in.VUnaryV.arg);
2488 return;
2489 case ARM64in_VNarrowV:
2490 i->ARM64in.VNarrowV.dst = lookupHRegRemap(m, i->ARM64in.VNarrowV.dst);
2491 i->ARM64in.VNarrowV.src = lookupHRegRemap(m, i->ARM64in.VNarrowV.src);
2492 return;
2493 case ARM64in_VShiftImmV:
2494 i->ARM64in.VShiftImmV.dst
2495 = lookupHRegRemap(m, i->ARM64in.VShiftImmV.dst);
2496 i->ARM64in.VShiftImmV.src
2497 = lookupHRegRemap(m, i->ARM64in.VShiftImmV.src);
2498 return;
2499 case ARM64in_VExtV:
2500 i->ARM64in.VExtV.dst = lookupHRegRemap(m, i->ARM64in.VExtV.dst);
2501 i->ARM64in.VExtV.srcLo = lookupHRegRemap(m, i->ARM64in.VExtV.srcLo);
2502 i->ARM64in.VExtV.srcHi = lookupHRegRemap(m, i->ARM64in.VExtV.srcHi);
2503 return;
2504 case ARM64in_VImmQ:
2505 i->ARM64in.VImmQ.rQ = lookupHRegRemap(m, i->ARM64in.VImmQ.rQ);
2506 return;
2507 case ARM64in_VDfromX:
2508 i->ARM64in.VDfromX.rD
2509 = lookupHRegRemap(m, i->ARM64in.VDfromX.rD);
2510 i->ARM64in.VDfromX.rX
2511 = lookupHRegRemap(m, i->ARM64in.VDfromX.rX);
2512 return;
2513 case ARM64in_VQfromX:
2514 i->ARM64in.VQfromX.rQ
2515 = lookupHRegRemap(m, i->ARM64in.VQfromX.rQ);
2516 i->ARM64in.VQfromX.rXlo
2517 = lookupHRegRemap(m, i->ARM64in.VQfromX.rXlo);
2518 return;
2519 case ARM64in_VQfromXX:
2520 i->ARM64in.VQfromXX.rQ
2521 = lookupHRegRemap(m, i->ARM64in.VQfromXX.rQ);
2522 i->ARM64in.VQfromXX.rXhi
2523 = lookupHRegRemap(m, i->ARM64in.VQfromXX.rXhi);
2524 i->ARM64in.VQfromXX.rXlo
2525 = lookupHRegRemap(m, i->ARM64in.VQfromXX.rXlo);
2526 return;
2527 case ARM64in_VXfromQ:
2528 i->ARM64in.VXfromQ.rX
2529 = lookupHRegRemap(m, i->ARM64in.VXfromQ.rX);
2530 i->ARM64in.VXfromQ.rQ
2531 = lookupHRegRemap(m, i->ARM64in.VXfromQ.rQ);
2532 return;
2533 case ARM64in_VXfromDorS:
2534 i->ARM64in.VXfromDorS.rX
2535 = lookupHRegRemap(m, i->ARM64in.VXfromDorS.rX);
2536 i->ARM64in.VXfromDorS.rDorS
2537 = lookupHRegRemap(m, i->ARM64in.VXfromDorS.rDorS);
2538 return;
2539 case ARM64in_VMov:
2540 i->ARM64in.VMov.dst = lookupHRegRemap(m, i->ARM64in.VMov.dst);
2541 i->ARM64in.VMov.src = lookupHRegRemap(m, i->ARM64in.VMov.src);
2542 return;
2543 case ARM64in_EvCheck:
2544 /* We expect both amodes only to mention x21, so this is in
2545 fact pointless, since x21 isn't allocatable, but
2546 anyway.. */
2547 mapRegs_ARM64AMode(m, i->ARM64in.EvCheck.amCounter);
2548 mapRegs_ARM64AMode(m, i->ARM64in.EvCheck.amFailAddr);
2549 return;
2550 case ARM64in_ProfInc:
2551 /* hardwires x8 and x9 -- nothing to modify. */
2552 return;
2553 default:
2554 ppARM64Instr(i);
2555 vpanic("mapRegs_ARM64Instr");
2559 /* Generate arm spill/reload instructions under the direction of the
2560 register allocator. Note it's critical these don't write the
2561 condition codes. */
2563 void genSpill_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2564 HReg rreg, Int offsetB, Bool mode64 )
2566 HRegClass rclass;
2567 vassert(offsetB >= 0);
2568 vassert(!hregIsVirtual(rreg));
2569 vassert(mode64 == True);
2570 *i1 = *i2 = NULL;
2571 rclass = hregClass(rreg);
2572 switch (rclass) {
2573 case HRcInt64:
2574 vassert(0 == (offsetB & 7));
2575 offsetB >>= 3;
2576 vassert(offsetB < 4096);
2577 *i1 = ARM64Instr_LdSt64(
2578 False/*!isLoad*/,
2579 rreg,
2580 ARM64AMode_RI12(hregARM64_X21(), offsetB, 8)
2582 return;
2583 case HRcFlt64:
2584 vassert(0 == (offsetB & 7));
2585 vassert(offsetB >= 0 && offsetB < 32768);
2586 *i1 = ARM64Instr_VLdStD(False/*!isLoad*/,
2587 rreg, hregARM64_X21(), offsetB);
2588 return;
2589 case HRcVec128: {
2590 HReg x21 = hregARM64_X21(); // baseblock
2591 HReg x9 = hregARM64_X9(); // spill temporary
2592 vassert(0 == (offsetB & 15)); // check sane alignment
2593 vassert(offsetB < 4096);
2594 *i1 = ARM64Instr_Arith(x9, x21, ARM64RIA_I12(offsetB, 0), True);
2595 *i2 = ARM64Instr_VLdStQ(False/*!isLoad*/, rreg, x9);
2596 return;
2598 default:
2599 ppHRegClass(rclass);
2600 vpanic("genSpill_ARM: unimplemented regclass");
2604 void genReload_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2605 HReg rreg, Int offsetB, Bool mode64 )
2607 HRegClass rclass;
2608 vassert(offsetB >= 0);
2609 vassert(!hregIsVirtual(rreg));
2610 vassert(mode64 == True);
2611 *i1 = *i2 = NULL;
2612 rclass = hregClass(rreg);
2613 switch (rclass) {
2614 case HRcInt64:
2615 vassert(0 == (offsetB & 7));
2616 offsetB >>= 3;
2617 vassert(offsetB < 4096);
2618 *i1 = ARM64Instr_LdSt64(
2619 True/*isLoad*/,
2620 rreg,
2621 ARM64AMode_RI12(hregARM64_X21(), offsetB, 8)
2623 return;
2624 case HRcFlt64:
2625 vassert(0 == (offsetB & 7));
2626 vassert(offsetB >= 0 && offsetB < 32768);
2627 *i1 = ARM64Instr_VLdStD(True/*isLoad*/,
2628 rreg, hregARM64_X21(), offsetB);
2629 return;
2630 case HRcVec128: {
2631 HReg x21 = hregARM64_X21(); // baseblock
2632 HReg x9 = hregARM64_X9(); // spill temporary
2633 vassert(0 == (offsetB & 15)); // check sane alignment
2634 vassert(offsetB < 4096);
2635 *i1 = ARM64Instr_Arith(x9, x21, ARM64RIA_I12(offsetB, 0), True);
2636 *i2 = ARM64Instr_VLdStQ(True/*isLoad*/, rreg, x9);
2637 return;
2639 default:
2640 ppHRegClass(rclass);
2641 vpanic("genReload_ARM: unimplemented regclass");
2645 ARM64Instr* genMove_ARM64(HReg from, HReg to, Bool mode64)
2647 switch (hregClass(from)) {
2648 case HRcInt64:
2649 return ARM64Instr_MovI(to, from);
2650 case HRcFlt64:
2651 return ARM64Instr_VMov(8, to, from);
2652 case HRcVec128:
2653 return ARM64Instr_VMov(16, to, from);
2654 default:
2655 ppHRegClass(hregClass(from));
2656 vpanic("genMove_ARM64: unimplemented regclass");
2661 /* Emit an instruction into buf and return the number of bytes used.
2662 Note that buf is not the insn's final place, and therefore it is
2663 imperative to emit position-independent code. */
2665 static inline UInt iregEnc ( HReg r )
2667 UInt n;
2668 vassert(hregClass(r) == HRcInt64);
2669 vassert(!hregIsVirtual(r));
2670 n = hregEncoding(r);
2671 vassert(n <= 30);
2672 return n;
2675 static inline UInt dregEnc ( HReg r )
2677 UInt n;
2678 vassert(hregClass(r) == HRcFlt64);
2679 vassert(!hregIsVirtual(r));
2680 n = hregEncoding(r);
2681 vassert(n <= 31);
2682 return n;
2685 static inline UInt qregEnc ( HReg r )
2687 UInt n;
2688 vassert(hregClass(r) == HRcVec128);
2689 vassert(!hregIsVirtual(r));
2690 n = hregEncoding(r);
2691 vassert(n <= 31);
2692 return n;
2695 #define BITS4(zzb3,zzb2,zzb1,zzb0) \
2696 (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
2698 #define X00 BITS4(0,0, 0,0)
2699 #define X01 BITS4(0,0, 0,1)
2700 #define X10 BITS4(0,0, 1,0)
2701 #define X11 BITS4(0,0, 1,1)
2703 #define X000 BITS4(0, 0,0,0)
2704 #define X001 BITS4(0, 0,0,1)
2705 #define X010 BITS4(0, 0,1,0)
2706 #define X011 BITS4(0, 0,1,1)
2707 #define X100 BITS4(0, 1,0,0)
2708 #define X101 BITS4(0, 1,0,1)
2709 #define X110 BITS4(0, 1,1,0)
2710 #define X111 BITS4(0, 1,1,1)
2712 #define X0000 BITS4(0,0,0,0)
2713 #define X0001 BITS4(0,0,0,1)
2714 #define X0010 BITS4(0,0,1,0)
2715 #define X0011 BITS4(0,0,1,1)
2717 #define BITS8(zzb7,zzb6,zzb5,zzb4,zzb3,zzb2,zzb1,zzb0) \
2718 ((BITS4(zzb7,zzb6,zzb5,zzb4) << 4) | BITS4(zzb3,zzb2,zzb1,zzb0))
2720 #define X00000 BITS8(0,0,0, 0,0,0,0,0)
2721 #define X00001 BITS8(0,0,0, 0,0,0,0,1)
2722 #define X00110 BITS8(0,0,0, 0,0,1,1,0)
2723 #define X00111 BITS8(0,0,0, 0,0,1,1,1)
2724 #define X01000 BITS8(0,0,0, 0,1,0,0,0)
2725 #define X10000 BITS8(0,0,0, 1,0,0,0,0)
2726 #define X11000 BITS8(0,0,0, 1,1,0,0,0)
2727 #define X11110 BITS8(0,0,0, 1,1,1,1,0)
2728 #define X11111 BITS8(0,0,0, 1,1,1,1,1)
2730 #define X000000 BITS8(0,0, 0,0,0,0,0,0)
2731 #define X000001 BITS8(0,0, 0,0,0,0,0,1)
2732 #define X000010 BITS8(0,0, 0,0,0,0,1,0)
2733 #define X000011 BITS8(0,0, 0,0,0,0,1,1)
2734 #define X000100 BITS8(0,0, 0,0,0,1,0,0)
2735 #define X000110 BITS8(0,0, 0,0,0,1,1,0)
2736 #define X000111 BITS8(0,0, 0,0,0,1,1,1)
2737 #define X001000 BITS8(0,0, 0,0,1,0,0,0)
2738 #define X001001 BITS8(0,0, 0,0,1,0,0,1)
2739 #define X001010 BITS8(0,0, 0,0,1,0,1,0)
2740 #define X001011 BITS8(0,0, 0,0,1,0,1,1)
2741 #define X001101 BITS8(0,0, 0,0,1,1,0,1)
2742 #define X001110 BITS8(0,0, 0,0,1,1,1,0)
2743 #define X001111 BITS8(0,0, 0,0,1,1,1,1)
2744 #define X010000 BITS8(0,0, 0,1,0,0,0,0)
2745 #define X010001 BITS8(0,0, 0,1,0,0,0,1)
2746 #define X010010 BITS8(0,0, 0,1,0,0,1,0)
2747 #define X010011 BITS8(0,0, 0,1,0,0,1,1)
2748 #define X010101 BITS8(0,0, 0,1,0,1,0,1)
2749 #define X010110 BITS8(0,0, 0,1,0,1,1,0)
2750 #define X010111 BITS8(0,0, 0,1,0,1,1,1)
2751 #define X011001 BITS8(0,0, 0,1,1,0,0,1)
2752 #define X011010 BITS8(0,0, 0,1,1,0,1,0)
2753 #define X011011 BITS8(0,0, 0,1,1,0,1,1)
2754 #define X011101 BITS8(0,0, 0,1,1,1,0,1)
2755 #define X011110 BITS8(0,0, 0,1,1,1,1,0)
2756 #define X011111 BITS8(0,0, 0,1,1,1,1,1)
2757 #define X100001 BITS8(0,0, 1,0,0,0,0,1)
2758 #define X100011 BITS8(0,0, 1,0,0,0,1,1)
2759 #define X100100 BITS8(0,0, 1,0,0,1,0,0)
2760 #define X100101 BITS8(0,0, 1,0,0,1,0,1)
2761 #define X100110 BITS8(0,0, 1,0,0,1,1,0)
2762 #define X100111 BITS8(0,0, 1,0,0,1,1,1)
2763 #define X101101 BITS8(0,0, 1,0,1,1,0,1)
2764 #define X101110 BITS8(0,0, 1,0,1,1,1,0)
2765 #define X110000 BITS8(0,0, 1,1,0,0,0,0)
2766 #define X110001 BITS8(0,0, 1,1,0,0,0,1)
2767 #define X110010 BITS8(0,0, 1,1,0,0,1,0)
2768 #define X110100 BITS8(0,0, 1,1,0,1,0,0)
2769 #define X110101 BITS8(0,0, 1,1,0,1,0,1)
2770 #define X110110 BITS8(0,0, 1,1,0,1,1,0)
2771 #define X110111 BITS8(0,0, 1,1,0,1,1,1)
2772 #define X111000 BITS8(0,0, 1,1,1,0,0,0)
2773 #define X111001 BITS8(0,0, 1,1,1,0,0,1)
2774 #define X111101 BITS8(0,0, 1,1,1,1,0,1)
2775 #define X111110 BITS8(0,0, 1,1,1,1,1,0)
2776 #define X111111 BITS8(0,0, 1,1,1,1,1,1)
2778 #define X0001000 BITS8(0, 0,0,0,1,0,0,0)
2779 #define X0010000 BITS8(0, 0,0,1,0,0,0,0)
2780 #define X0100000 BITS8(0, 0,1,0,0,0,0,0)
2781 #define X1000000 BITS8(0, 1,0,0,0,0,0,0)
2783 #define X00100000 BITS8(0,0,1,0,0,0,0,0)
2784 #define X00100001 BITS8(0,0,1,0,0,0,0,1)
2785 #define X00100010 BITS8(0,0,1,0,0,0,1,0)
2786 #define X00100011 BITS8(0,0,1,0,0,0,1,1)
2787 #define X01010000 BITS8(0,1,0,1,0,0,0,0)
2788 #define X01010001 BITS8(0,1,0,1,0,0,0,1)
2789 #define X01010100 BITS8(0,1,0,1,0,1,0,0)
2790 #define X01011000 BITS8(0,1,0,1,1,0,0,0)
2791 #define X01100000 BITS8(0,1,1,0,0,0,0,0)
2792 #define X01100001 BITS8(0,1,1,0,0,0,0,1)
2793 #define X01100010 BITS8(0,1,1,0,0,0,1,0)
2794 #define X01100011 BITS8(0,1,1,0,0,0,1,1)
2795 #define X01110000 BITS8(0,1,1,1,0,0,0,0)
2796 #define X01110001 BITS8(0,1,1,1,0,0,0,1)
2797 #define X01110010 BITS8(0,1,1,1,0,0,1,0)
2798 #define X01110011 BITS8(0,1,1,1,0,0,1,1)
2799 #define X01110100 BITS8(0,1,1,1,0,1,0,0)
2800 #define X01110101 BITS8(0,1,1,1,0,1,0,1)
2801 #define X01110110 BITS8(0,1,1,1,0,1,1,0)
2802 #define X01110111 BITS8(0,1,1,1,0,1,1,1)
2803 #define X11000001 BITS8(1,1,0,0,0,0,0,1)
2804 #define X11000011 BITS8(1,1,0,0,0,0,1,1)
2805 #define X11010100 BITS8(1,1,0,1,0,1,0,0)
2806 #define X11010110 BITS8(1,1,0,1,0,1,1,0)
2807 #define X11011000 BITS8(1,1,0,1,1,0,0,0)
2808 #define X11011010 BITS8(1,1,0,1,1,0,1,0)
2809 #define X11011110 BITS8(1,1,0,1,1,1,1,0)
2810 #define X11100010 BITS8(1,1,1,0,0,0,1,0)
2811 #define X11110001 BITS8(1,1,1,1,0,0,0,1)
2812 #define X11110011 BITS8(1,1,1,1,0,0,1,1)
2813 #define X11110101 BITS8(1,1,1,1,0,1,0,1)
2814 #define X11110111 BITS8(1,1,1,1,0,1,1,1)
2817 /* --- 4 fields --- */
2819 static inline UInt X_8_19_1_4 ( UInt f1, UInt f2, UInt f3, UInt f4 ) {
2820 vassert(8+19+1+4 == 32);
2821 vassert(f1 < (1<<8));
2822 vassert(f2 < (1<<19));
2823 vassert(f3 < (1<<1));
2824 vassert(f4 < (1<<4));
2825 UInt w = 0;
2826 w = (w << 8) | f1;
2827 w = (w << 19) | f2;
2828 w = (w << 1) | f3;
2829 w = (w << 4) | f4;
2830 return w;
2833 /* --- 5 fields --- */
2835 static inline UInt X_3_6_2_16_5 ( UInt f1, UInt f2,
2836 UInt f3, UInt f4, UInt f5 ) {
2837 vassert(3+6+2+16+5 == 32);
2838 vassert(f1 < (1<<3));
2839 vassert(f2 < (1<<6));
2840 vassert(f3 < (1<<2));
2841 vassert(f4 < (1<<16));
2842 vassert(f5 < (1<<5));
2843 UInt w = 0;
2844 w = (w << 3) | f1;
2845 w = (w << 6) | f2;
2846 w = (w << 2) | f3;
2847 w = (w << 16) | f4;
2848 w = (w << 5) | f5;
2849 return w;
2852 /* --- 6 fields --- */
2854 static inline UInt X_2_6_2_12_5_5 ( UInt f1, UInt f2, UInt f3,
2855 UInt f4, UInt f5, UInt f6 ) {
2856 vassert(2+6+2+12+5+5 == 32);
2857 vassert(f1 < (1<<2));
2858 vassert(f2 < (1<<6));
2859 vassert(f3 < (1<<2));
2860 vassert(f4 < (1<<12));
2861 vassert(f5 < (1<<5));
2862 vassert(f6 < (1<<5));
2863 UInt w = 0;
2864 w = (w << 2) | f1;
2865 w = (w << 6) | f2;
2866 w = (w << 2) | f3;
2867 w = (w << 12) | f4;
2868 w = (w << 5) | f5;
2869 w = (w << 5) | f6;
2870 return w;
2873 static inline UInt X_3_8_5_6_5_5 ( UInt f1, UInt f2, UInt f3,
2874 UInt f4, UInt f5, UInt f6 ) {
2875 vassert(3+8+5+6+5+5 == 32);
2876 vassert(f1 < (1<<3));
2877 vassert(f2 < (1<<8));
2878 vassert(f3 < (1<<5));
2879 vassert(f4 < (1<<6));
2880 vassert(f5 < (1<<5));
2881 vassert(f6 < (1<<5));
2882 UInt w = 0;
2883 w = (w << 3) | f1;
2884 w = (w << 8) | f2;
2885 w = (w << 5) | f3;
2886 w = (w << 6) | f4;
2887 w = (w << 5) | f5;
2888 w = (w << 5) | f6;
2889 return w;
2892 static inline UInt X_3_5_8_6_5_5 ( UInt f1, UInt f2, UInt f3,
2893 UInt f4, UInt f5, UInt f6 ) {
2894 vassert(3+8+5+6+5+5 == 32);
2895 vassert(f1 < (1<<3));
2896 vassert(f2 < (1<<5));
2897 vassert(f3 < (1<<8));
2898 vassert(f4 < (1<<6));
2899 vassert(f5 < (1<<5));
2900 vassert(f6 < (1<<5));
2901 UInt w = 0;
2902 w = (w << 3) | f1;
2903 w = (w << 5) | f2;
2904 w = (w << 8) | f3;
2905 w = (w << 6) | f4;
2906 w = (w << 5) | f5;
2907 w = (w << 5) | f6;
2908 return w;
2911 static inline UInt X_3_6_7_6_5_5 ( UInt f1, UInt f2, UInt f3,
2912 UInt f4, UInt f5, UInt f6 ) {
2913 vassert(3+6+7+6+5+5 == 32);
2914 vassert(f1 < (1<<3));
2915 vassert(f2 < (1<<6));
2916 vassert(f3 < (1<<7));
2917 vassert(f4 < (1<<6));
2918 vassert(f5 < (1<<5));
2919 vassert(f6 < (1<<5));
2920 UInt w = 0;
2921 w = (w << 3) | f1;
2922 w = (w << 6) | f2;
2923 w = (w << 7) | f3;
2924 w = (w << 6) | f4;
2925 w = (w << 5) | f5;
2926 w = (w << 5) | f6;
2927 return w;
2930 /* --- 7 fields --- */
2932 static inline UInt X_2_6_3_9_2_5_5 ( UInt f1, UInt f2, UInt f3,
2933 UInt f4, UInt f5, UInt f6, UInt f7 ) {
2934 vassert(2+6+3+9+2+5+5 == 32);
2935 vassert(f1 < (1<<2));
2936 vassert(f2 < (1<<6));
2937 vassert(f3 < (1<<3));
2938 vassert(f4 < (1<<9));
2939 vassert(f5 < (1<<2));
2940 vassert(f6 < (1<<5));
2941 vassert(f7 < (1<<5));
2942 UInt w = 0;
2943 w = (w << 2) | f1;
2944 w = (w << 6) | f2;
2945 w = (w << 3) | f3;
2946 w = (w << 9) | f4;
2947 w = (w << 2) | f5;
2948 w = (w << 5) | f6;
2949 w = (w << 5) | f7;
2950 return w;
2953 static inline UInt X_3_6_1_6_6_5_5 ( UInt f1, UInt f2, UInt f3,
2954 UInt f4, UInt f5, UInt f6, UInt f7 ) {
2955 vassert(3+6+1+6+6+5+5 == 32);
2956 vassert(f1 < (1<<3));
2957 vassert(f2 < (1<<6));
2958 vassert(f3 < (1<<1));
2959 vassert(f4 < (1<<6));
2960 vassert(f5 < (1<<6));
2961 vassert(f6 < (1<<5));
2962 vassert(f7 < (1<<5));
2963 UInt w = 0;
2964 w = (w << 3) | f1;
2965 w = (w << 6) | f2;
2966 w = (w << 1) | f3;
2967 w = (w << 6) | f4;
2968 w = (w << 6) | f5;
2969 w = (w << 5) | f6;
2970 w = (w << 5) | f7;
2971 return w;
2975 //ZZ #define X0000 BITS4(0,0,0,0)
2976 //ZZ #define X0001 BITS4(0,0,0,1)
2977 //ZZ #define X0010 BITS4(0,0,1,0)
2978 //ZZ #define X0011 BITS4(0,0,1,1)
2979 //ZZ #define X0100 BITS4(0,1,0,0)
2980 //ZZ #define X0101 BITS4(0,1,0,1)
2981 //ZZ #define X0110 BITS4(0,1,1,0)
2982 //ZZ #define X0111 BITS4(0,1,1,1)
2983 //ZZ #define X1000 BITS4(1,0,0,0)
2984 //ZZ #define X1001 BITS4(1,0,0,1)
2985 //ZZ #define X1010 BITS4(1,0,1,0)
2986 //ZZ #define X1011 BITS4(1,0,1,1)
2987 //ZZ #define X1100 BITS4(1,1,0,0)
2988 //ZZ #define X1101 BITS4(1,1,0,1)
2989 //ZZ #define X1110 BITS4(1,1,1,0)
2990 //ZZ #define X1111 BITS4(1,1,1,1)
2992 #define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
2993 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2994 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2995 (((zzx3) & 0xF) << 12))
2997 #define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2) \
2998 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2999 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
3000 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8))
3002 #define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0) \
3003 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
3004 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
3005 (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) << 0))
3007 #define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
3008 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
3009 (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
3010 (((zzx0) & 0xF) << 0))
3012 #define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0) \
3013 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
3014 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
3015 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8) | \
3016 (((zzx1) & 0xF) << 4) | (((zzx0) & 0xF) << 0))
3018 #define XX______(zzx7,zzx6) \
3019 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
3023 /* Get an immediate into a register, using only that register. */
3024 static UInt* imm64_to_ireg ( UInt* p, Int xD, ULong imm64 )
3026 if (imm64 == 0) {
3027 // This has to be special-cased, since the logic below
3028 // will leave the register unchanged in this case.
3029 // MOVZ xD, #0, LSL #0
3030 *p++ = X_3_6_2_16_5(X110, X100101, X00, 0/*imm16*/, xD);
3031 return p;
3034 // There must be at least one non-zero halfword. Find the
3035 // lowest nonzero such, and use MOVZ to install it and zero
3036 // out the rest of the register.
3037 UShort h[4];
3038 h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
3039 h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
3040 h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
3041 h[0] = (UShort)((imm64 >> 0) & 0xFFFF);
3043 UInt i;
3044 for (i = 0; i < 4; i++) {
3045 if (h[i] != 0)
3046 break;
3048 vassert(i < 4);
3050 // MOVZ xD, h[i], LSL (16*i)
3051 *p++ = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
3053 // Work on upwards through h[i], using MOVK to stuff in any
3054 // remaining nonzero elements.
3055 i++;
3056 for (; i < 4; i++) {
3057 if (h[i] == 0)
3058 continue;
3059 // MOVK xD, h[i], LSL (16*i)
3060 *p++ = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
3063 return p;
3066 /* Get an immediate into a register, using only that register, and
3067 generating exactly 4 instructions, regardless of the value of the
3068 immediate. This is used when generating sections of code that need
3069 to be patched later, so as to guarantee a specific size. */
3070 static UInt* imm64_to_ireg_EXACTLY4 ( UInt* p, Int xD, ULong imm64 )
3072 UShort h[4];
3073 h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
3074 h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
3075 h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
3076 h[0] = (UShort)((imm64 >> 0) & 0xFFFF);
3077 // Work on upwards through h[i], using MOVK to stuff in the
3078 // remaining elements.
3079 UInt i;
3080 for (i = 0; i < 4; i++) {
3081 if (i == 0) {
3082 // MOVZ xD, h[0], LSL (16*0)
3083 *p++ = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
3084 } else {
3085 // MOVK xD, h[i], LSL (16*i)
3086 *p++ = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
3089 return p;
3092 /* Check whether p points at a 4-insn sequence cooked up by
3093 imm64_to_ireg_EXACTLY4(). */
3094 static Bool is_imm64_to_ireg_EXACTLY4 ( UInt* p, Int xD, ULong imm64 )
3096 UShort h[4];
3097 h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
3098 h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
3099 h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
3100 h[0] = (UShort)((imm64 >> 0) & 0xFFFF);
3101 // Work on upwards through h[i], using MOVK to stuff in the
3102 // remaining elements.
3103 UInt i;
3104 for (i = 0; i < 4; i++) {
3105 UInt expected;
3106 if (i == 0) {
3107 // MOVZ xD, h[0], LSL (16*0)
3108 expected = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
3109 } else {
3110 // MOVK xD, h[i], LSL (16*i)
3111 expected = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
3113 if (p[i] != expected)
3114 return False;
3116 return True;
3120 /* Generate a 8 bit store or 8-to-64 unsigned widening load from/to
3121 rD, using the given amode for the address. */
3122 static UInt* do_load_or_store8 ( UInt* p,
3123 Bool isLoad, UInt wD, ARM64AMode* am )
3125 vassert(wD <= 30);
3126 if (am->tag == ARM64am_RI9) {
3127 /* STURB Wd, [Xn|SP + simm9]: 00 111000 000 simm9 00 n d
3128 LDURB Wd, [Xn|SP + simm9]: 00 111000 010 simm9 00 n d
3130 Int simm9 = am->ARM64am.RI9.simm9;
3131 vassert(-256 <= simm9 && simm9 <= 255);
3132 UInt instr = X_2_6_3_9_2_5_5(X00, X111000, isLoad ? X010 : X000,
3133 simm9 & 0x1FF, X00,
3134 iregEnc(am->ARM64am.RI9.reg), wD);
3135 *p++ = instr;
3136 return p;
3138 if (am->tag == ARM64am_RI12) {
3139 /* STRB Wd, [Xn|SP + uimm12 * 1]: 00 111 001 00 imm12 n d
3140 LDRB Wd, [Xn|SP + uimm12 * 1]: 00 111 001 01 imm12 n d
3142 UInt uimm12 = am->ARM64am.RI12.uimm12;
3143 UInt scale = am->ARM64am.RI12.szB;
3144 vassert(scale == 1); /* failure of this is serious. Do not ignore. */
3145 UInt xN = iregEnc(am->ARM64am.RI12.reg);
3146 vassert(xN <= 30);
3147 UInt instr = X_2_6_2_12_5_5(X00, X111001, isLoad ? X01 : X00,
3148 uimm12, xN, wD);
3149 *p++ = instr;
3150 return p;
3152 if (am->tag == ARM64am_RR) {
3153 /* STRB Xd, [Xn|SP, Xm]: 00 111 000 001 m 011 0 10 n d
3154 LDRB Xd, [Xn|SP, Xm]: 00 111 000 011 m 011 0 10 n d
3156 UInt xN = iregEnc(am->ARM64am.RR.base);
3157 UInt xM = iregEnc(am->ARM64am.RR.index);
3158 vassert(xN <= 30);
3159 UInt instr = X_3_8_5_6_5_5(X001, isLoad ? X11000011 : X11000001,
3160 xM, X011010, xN, wD);
3161 *p++ = instr;
3162 return p;
3164 vpanic("do_load_or_store8");
3165 vassert(0);
3169 /* Generate a 16 bit store or 16-to-64 unsigned widening load from/to
3170 rD, using the given amode for the address. */
3171 static UInt* do_load_or_store16 ( UInt* p,
3172 Bool isLoad, UInt wD, ARM64AMode* am )
3174 vassert(wD <= 30);
3175 if (am->tag == ARM64am_RI9) {
3176 /* STURH Wd, [Xn|SP + simm9]: 01 111000 000 simm9 00 n d
3177 LDURH Wd, [Xn|SP + simm9]: 01 111000 010 simm9 00 n d
3179 Int simm9 = am->ARM64am.RI9.simm9;
3180 vassert(-256 <= simm9 && simm9 <= 255);
3181 UInt instr = X_2_6_3_9_2_5_5(X01, X111000, isLoad ? X010 : X000,
3182 simm9 & 0x1FF, X00,
3183 iregEnc(am->ARM64am.RI9.reg), wD);
3184 *p++ = instr;
3185 return p;
3187 if (am->tag == ARM64am_RI12) {
3188 /* STRH Wd, [Xn|SP + uimm12 * 2]: 01 111 001 00 imm12 n d
3189 LDRH Wd, [Xn|SP + uimm12 * 2]: 01 111 001 01 imm12 n d
3191 UInt uimm12 = am->ARM64am.RI12.uimm12;
3192 UInt scale = am->ARM64am.RI12.szB;
3193 vassert(scale == 2); /* failure of this is serious. Do not ignore. */
3194 UInt xN = iregEnc(am->ARM64am.RI12.reg);
3195 vassert(xN <= 30);
3196 UInt instr = X_2_6_2_12_5_5(X01, X111001, isLoad ? X01 : X00,
3197 uimm12, xN, wD);
3198 *p++ = instr;
3199 return p;
3201 if (am->tag == ARM64am_RR) {
3202 /* STRH Xd, [Xn|SP, Xm]: 01 111 000 001 m 011 0 10 n d
3203 LDRH Xd, [Xn|SP, Xm]: 01 111 000 011 m 011 0 10 n d
3205 UInt xN = iregEnc(am->ARM64am.RR.base);
3206 UInt xM = iregEnc(am->ARM64am.RR.index);
3207 vassert(xN <= 30);
3208 UInt instr = X_3_8_5_6_5_5(X011, isLoad ? X11000011 : X11000001,
3209 xM, X011010, xN, wD);
3210 *p++ = instr;
3211 return p;
3213 vpanic("do_load_or_store16");
3214 vassert(0);
3218 /* Generate a 32 bit store or 32-to-64 unsigned widening load from/to
3219 rD, using the given amode for the address. */
3220 static UInt* do_load_or_store32 ( UInt* p,
3221 Bool isLoad, UInt wD, ARM64AMode* am )
3223 vassert(wD <= 30);
3224 if (am->tag == ARM64am_RI9) {
3225 /* STUR Wd, [Xn|SP + simm9]: 10 111000 000 simm9 00 n d
3226 LDUR Wd, [Xn|SP + simm9]: 10 111000 010 simm9 00 n d
3228 Int simm9 = am->ARM64am.RI9.simm9;
3229 vassert(-256 <= simm9 && simm9 <= 255);
3230 UInt instr = X_2_6_3_9_2_5_5(X10, X111000, isLoad ? X010 : X000,
3231 simm9 & 0x1FF, X00,
3232 iregEnc(am->ARM64am.RI9.reg), wD);
3233 *p++ = instr;
3234 return p;
3236 if (am->tag == ARM64am_RI12) {
3237 /* STR Wd, [Xn|SP + uimm12 * 4]: 10 111 001 00 imm12 n d
3238 LDR Wd, [Xn|SP + uimm12 * 4]: 10 111 001 01 imm12 n d
3240 UInt uimm12 = am->ARM64am.RI12.uimm12;
3241 UInt scale = am->ARM64am.RI12.szB;
3242 vassert(scale == 4); /* failure of this is serious. Do not ignore. */
3243 UInt xN = iregEnc(am->ARM64am.RI12.reg);
3244 vassert(xN <= 30);
3245 UInt instr = X_2_6_2_12_5_5(X10, X111001, isLoad ? X01 : X00,
3246 uimm12, xN, wD);
3247 *p++ = instr;
3248 return p;
3250 if (am->tag == ARM64am_RR) {
3251 /* STR Wd, [Xn|SP, Xm]: 10 111 000 001 m 011 0 10 n d
3252 LDR Wd, [Xn|SP, Xm]: 10 111 000 011 m 011 0 10 n d
3254 UInt xN = iregEnc(am->ARM64am.RR.base);
3255 UInt xM = iregEnc(am->ARM64am.RR.index);
3256 vassert(xN <= 30);
3257 UInt instr = X_3_8_5_6_5_5(X101, isLoad ? X11000011 : X11000001,
3258 xM, X011010, xN, wD);
3259 *p++ = instr;
3260 return p;
3262 vpanic("do_load_or_store32");
3263 vassert(0);
3267 /* Generate a 64 bit load or store to/from xD, using the given amode
3268 for the address. */
3269 static UInt* do_load_or_store64 ( UInt* p,
3270 Bool isLoad, UInt xD, ARM64AMode* am )
3272 /* In all these cases, Rn can't be 31 since that means SP. */
3273 vassert(xD <= 30);
3274 if (am->tag == ARM64am_RI9) {
3275 /* STUR Xd, [Xn|SP + simm9]: 11 111000 000 simm9 00 n d
3276 LDUR Xd, [Xn|SP + simm9]: 11 111000 010 simm9 00 n d
3278 Int simm9 = am->ARM64am.RI9.simm9;
3279 vassert(-256 <= simm9 && simm9 <= 255);
3280 UInt xN = iregEnc(am->ARM64am.RI9.reg);
3281 vassert(xN <= 30);
3282 UInt instr = X_2_6_3_9_2_5_5(X11, X111000, isLoad ? X010 : X000,
3283 simm9 & 0x1FF, X00, xN, xD);
3284 *p++ = instr;
3285 return p;
3287 if (am->tag == ARM64am_RI12) {
3288 /* STR Xd, [Xn|SP + uimm12 * 8]: 11 111 001 00 imm12 n d
3289 LDR Xd, [Xn|SP + uimm12 * 8]: 11 111 001 01 imm12 n d
3291 UInt uimm12 = am->ARM64am.RI12.uimm12;
3292 UInt scale = am->ARM64am.RI12.szB;
3293 vassert(scale == 8); /* failure of this is serious. Do not ignore. */
3294 UInt xN = iregEnc(am->ARM64am.RI12.reg);
3295 vassert(xN <= 30);
3296 UInt instr = X_2_6_2_12_5_5(X11, X111001, isLoad ? X01 : X00,
3297 uimm12, xN, xD);
3298 *p++ = instr;
3299 return p;
3301 if (am->tag == ARM64am_RR) {
3302 /* STR Xd, [Xn|SP, Xm]: 11 111 000 001 m 011 0 10 n d
3303 LDR Xd, [Xn|SP, Xm]: 11 111 000 011 m 011 0 10 n d
3305 UInt xN = iregEnc(am->ARM64am.RR.base);
3306 UInt xM = iregEnc(am->ARM64am.RR.index);
3307 vassert(xN <= 30);
3308 UInt instr = X_3_8_5_6_5_5(X111, isLoad ? X11000011 : X11000001,
3309 xM, X011010, xN, xD);
3310 *p++ = instr;
3311 return p;
3313 vpanic("do_load_or_store64");
3314 vassert(0);
3318 /* Emit an instruction into buf and return the number of bytes used.
3319 Note that buf is not the insn's final place, and therefore it is
3320 imperative to emit position-independent code. If the emitted
3321 instruction was a profiler inc, set *is_profInc to True, else
3322 leave it unchanged. */
3324 Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
3325 UChar* buf, Int nbuf, const ARM64Instr* i,
3326 Bool mode64, VexEndness endness_host,
3327 const void* disp_cp_chain_me_to_slowEP,
3328 const void* disp_cp_chain_me_to_fastEP,
3329 const void* disp_cp_xindir,
3330 const void* disp_cp_xassisted )
3332 UInt* p = (UInt*)buf;
3333 vassert(nbuf >= 32);
3334 vassert(mode64 == True);
3335 vassert(0 == (((HWord)buf) & 3));
3337 switch (i->tag) {
3338 case ARM64in_Arith: {
3339 UInt rD = iregEnc(i->ARM64in.Arith.dst);
3340 UInt rN = iregEnc(i->ARM64in.Arith.argL);
3341 ARM64RIA* argR = i->ARM64in.Arith.argR;
3342 switch (argR->tag) {
3343 case ARM64riA_I12:
3344 *p++ = X_2_6_2_12_5_5(
3345 i->ARM64in.Arith.isAdd ? X10 : X11,
3346 X010001,
3347 argR->ARM64riA.I12.shift == 12 ? X01 : X00,
3348 argR->ARM64riA.I12.imm12, rN, rD
3350 break;
3351 case ARM64riA_R: {
3352 UInt rM = iregEnc(i->ARM64in.Arith.argR->ARM64riA.R.reg);
3353 *p++ = X_3_8_5_6_5_5(
3354 i->ARM64in.Arith.isAdd ? X100 : X110,
3355 X01011000, rM, X000000, rN, rD
3357 break;
3359 default:
3360 goto bad;
3362 goto done;
3364 case ARM64in_Cmp: {
3365 UInt rD = 31; /* XZR, we are going to dump the result */
3366 UInt rN = iregEnc(i->ARM64in.Cmp.argL);
3367 ARM64RIA* argR = i->ARM64in.Cmp.argR;
3368 Bool is64 = i->ARM64in.Cmp.is64;
3369 switch (argR->tag) {
3370 case ARM64riA_I12:
3371 /* 1 11 10001 sh imm12 Rn Rd = SUBS Xd, Xn, #imm */
3372 /* 0 11 10001 sh imm12 Rn Rd = SUBS Wd, Wn, #imm */
3373 *p++ = X_2_6_2_12_5_5(
3374 is64 ? X11 : X01, X110001,
3375 argR->ARM64riA.I12.shift == 12 ? X01 : X00,
3376 argR->ARM64riA.I12.imm12, rN, rD);
3377 break;
3378 case ARM64riA_R: {
3379 /* 1 11 01011 00 0 Rm 000000 Rn Rd = SUBS Xd, Xn, Xm */
3380 /* 0 11 01011 00 0 Rm 000000 Rn Rd = SUBS Wd, Wn, Wm */
3381 UInt rM = iregEnc(i->ARM64in.Cmp.argR->ARM64riA.R.reg);
3382 *p++ = X_3_8_5_6_5_5(is64 ? X111 : X011,
3383 X01011000, rM, X000000, rN, rD);
3384 break;
3386 default:
3387 goto bad;
3389 goto done;
3391 case ARM64in_Logic: {
3392 UInt rD = iregEnc(i->ARM64in.Logic.dst);
3393 UInt rN = iregEnc(i->ARM64in.Logic.argL);
3394 ARM64RIL* argR = i->ARM64in.Logic.argR;
3395 UInt opc = 0; /* invalid */
3396 vassert(rD < 31);
3397 vassert(rN < 31);
3398 switch (i->ARM64in.Logic.op) {
3399 case ARM64lo_OR: opc = X101; break;
3400 case ARM64lo_AND: opc = X100; break;
3401 case ARM64lo_XOR: opc = X110; break;
3402 default: break;
3404 vassert(opc != 0);
3405 switch (argR->tag) {
3406 case ARM64riL_I13: {
3407 /* 1 01 100100 N immR immS Rn Rd = ORR <Xd|Sp>, Xn, #imm */
3408 /* 1 00 100100 N immR immS Rn Rd = AND <Xd|Sp>, Xn, #imm */
3409 /* 1 10 100100 N immR immS Rn Rd = EOR <Xd|Sp>, Xn, #imm */
3410 *p++ = X_3_6_1_6_6_5_5(
3411 opc, X100100, argR->ARM64riL.I13.bitN,
3412 argR->ARM64riL.I13.immR, argR->ARM64riL.I13.immS,
3413 rN, rD
3415 break;
3417 case ARM64riL_R: {
3418 /* 1 01 01010 00 0 m 000000 n d = ORR Xd, Xn, Xm */
3419 /* 1 00 01010 00 0 m 000000 n d = AND Xd, Xn, Xm */
3420 /* 1 10 01010 00 0 m 000000 n d = EOR Xd, Xn, Xm */
3421 UInt rM = iregEnc(argR->ARM64riL.R.reg);
3422 vassert(rM < 31);
3423 *p++ = X_3_8_5_6_5_5(opc, X01010000, rM, X000000, rN, rD);
3424 break;
3426 default:
3427 goto bad;
3429 goto done;
3431 case ARM64in_Test: {
3432 UInt rD = 31; /* XZR, we are going to dump the result */
3433 UInt rN = iregEnc(i->ARM64in.Test.argL);
3434 ARM64RIL* argR = i->ARM64in.Test.argR;
3435 switch (argR->tag) {
3436 case ARM64riL_I13: {
3437 /* 1 11 100100 N immR immS Rn Rd = ANDS Xd, Xn, #imm */
3438 *p++ = X_3_6_1_6_6_5_5(
3439 X111, X100100, argR->ARM64riL.I13.bitN,
3440 argR->ARM64riL.I13.immR, argR->ARM64riL.I13.immS,
3441 rN, rD
3443 break;
3445 default:
3446 goto bad;
3448 goto done;
3450 case ARM64in_Shift: {
3451 UInt rD = iregEnc(i->ARM64in.Shift.dst);
3452 UInt rN = iregEnc(i->ARM64in.Shift.argL);
3453 ARM64RI6* argR = i->ARM64in.Shift.argR;
3454 vassert(rD < 31);
3455 vassert(rN < 31);
3456 switch (argR->tag) {
3457 case ARM64ri6_I6: {
3458 /* 110 1001101 (63-sh) (64-sh) nn dd LSL Xd, Xn, sh */
3459 /* 110 1001101 sh 63 nn dd LSR Xd, Xn, sh */
3460 /* 100 1001101 sh 63 nn dd ASR Xd, Xn, sh */
3461 UInt sh = argR->ARM64ri6.I6.imm6;
3462 vassert(sh > 0 && sh < 64);
3463 switch (i->ARM64in.Shift.op) {
3464 case ARM64sh_SHL:
3465 *p++ = X_3_6_1_6_6_5_5(X110, X100110,
3466 1, 64-sh, 63-sh, rN, rD);
3467 break;
3468 case ARM64sh_SHR:
3469 *p++ = X_3_6_1_6_6_5_5(X110, X100110, 1, sh, 63, rN, rD);
3470 break;
3471 case ARM64sh_SAR:
3472 *p++ = X_3_6_1_6_6_5_5(X100, X100110, 1, sh, 63, rN, rD);
3473 break;
3474 default:
3475 vassert(0);
3477 break;
3479 case ARM64ri6_R: {
3480 /* 100 1101 0110 mm 001000 nn dd LSL Xd, Xn, Xm */
3481 /* 100 1101 0110 mm 001001 nn dd LSR Xd, Xn, Xm */
3482 /* 100 1101 0110 mm 001010 nn dd ASR Xd, Xn, Xm */
3483 UInt rM = iregEnc(argR->ARM64ri6.R.reg);
3484 vassert(rM < 31);
3485 UInt subOpc = 0;
3486 switch (i->ARM64in.Shift.op) {
3487 case ARM64sh_SHL: subOpc = X001000; break;
3488 case ARM64sh_SHR: subOpc = X001001; break;
3489 case ARM64sh_SAR: subOpc = X001010; break;
3490 default: vassert(0);
3492 *p++ = X_3_8_5_6_5_5(X100, X11010110, rM, subOpc, rN, rD);
3493 break;
3495 default:
3496 vassert(0);
3498 goto done;
3500 case ARM64in_Unary: {
3501 UInt rDst = iregEnc(i->ARM64in.Unary.dst);
3502 UInt rSrc = iregEnc(i->ARM64in.Unary.src);
3503 switch (i->ARM64in.Unary.op) {
3504 case ARM64un_CLZ:
3505 /* 1 10 1101 0110 00000 00010 0 nn dd CLZ Xd, Xn */
3506 /* 1 10 1101 0110 00000 00010 1 nn dd CLS Xd, Xn (unimp) */
3507 *p++ = X_3_8_5_6_5_5(X110,
3508 X11010110, X00000, X000100, rSrc, rDst);
3509 goto done;
3510 case ARM64un_NEG:
3511 /* 1 10 01011 000 m 000000 11111 d NEG Xd,Xm */
3512 /* 0 10 01011 000 m 000000 11111 d NEG Wd,Wm (unimp) */
3513 *p++ = X_3_8_5_6_5_5(X110,
3514 X01011000, rSrc, X000000, X11111, rDst);
3515 goto done;
3516 case ARM64un_NOT: {
3517 /* 1 01 01010 00 1 m 000000 11111 d MVN Xd,Xm */
3518 *p++ = X_3_8_5_6_5_5(X101,
3519 X01010001, rSrc, X000000, X11111, rDst);
3520 goto done;
3522 default:
3523 break;
3525 goto bad;
3527 case ARM64in_Set64: {
3528 /* 1 00 1101 0100 11111 invert(cond) 01 11111 Rd CSET Rd, Cond */
3529 UInt rDst = iregEnc(i->ARM64in.Set64.dst);
3530 UInt cc = (UInt)i->ARM64in.Set64.cond;
3531 vassert(cc < 14);
3532 *p++ = X_3_8_5_6_5_5(X100, X11010100, X11111,
3533 ((cc ^ 1) << 2) | X01, X11111, rDst);
3534 goto done;
3536 case ARM64in_MovI: {
3537 /* We generate the "preferred form", ORR Xd, XZR, Xm
3538 101 01010 00 0 m 000000 11111 d
3540 UInt instr = 0xAA0003E0;
3541 UInt d = iregEnc(i->ARM64in.MovI.dst);
3542 UInt m = iregEnc(i->ARM64in.MovI.src);
3543 *p++ = instr | ((m & 31) << 16) | ((d & 31) << 0);
3544 goto done;
3546 case ARM64in_Imm64: {
3547 p = imm64_to_ireg( p, iregEnc(i->ARM64in.Imm64.dst),
3548 i->ARM64in.Imm64.imm64 );
3549 goto done;
3551 case ARM64in_LdSt64: {
3552 p = do_load_or_store64( p, i->ARM64in.LdSt64.isLoad,
3553 iregEnc(i->ARM64in.LdSt64.rD),
3554 i->ARM64in.LdSt64.amode );
3555 goto done;
3557 case ARM64in_LdSt32: {
3558 p = do_load_or_store32( p, i->ARM64in.LdSt32.isLoad,
3559 iregEnc(i->ARM64in.LdSt32.rD),
3560 i->ARM64in.LdSt32.amode );
3561 goto done;
3563 case ARM64in_LdSt16: {
3564 p = do_load_or_store16( p, i->ARM64in.LdSt16.isLoad,
3565 iregEnc(i->ARM64in.LdSt16.rD),
3566 i->ARM64in.LdSt16.amode );
3567 goto done;
3569 case ARM64in_LdSt8: {
3570 p = do_load_or_store8( p, i->ARM64in.LdSt8.isLoad,
3571 iregEnc(i->ARM64in.LdSt8.rD),
3572 i->ARM64in.LdSt8.amode );
3573 goto done;
3576 case ARM64in_XDirect: {
3577 /* NB: what goes on here has to be very closely coordinated
3578 with chainXDirect_ARM64 and unchainXDirect_ARM64 below. */
3579 /* We're generating chain-me requests here, so we need to be
3580 sure this is actually allowed -- no-redir translations
3581 can't use chain-me's. Hence: */
3582 vassert(disp_cp_chain_me_to_slowEP != NULL);
3583 vassert(disp_cp_chain_me_to_fastEP != NULL);
3585 /* Use ptmp for backpatching conditional jumps. */
3586 UInt* ptmp = NULL;
3588 /* First off, if this is conditional, create a conditional
3589 jump over the rest of it. Or at least, leave a space for
3590 it that we will shortly fill in. */
3591 if (i->ARM64in.XDirect.cond != ARM64cc_AL) {
3592 vassert(i->ARM64in.XDirect.cond != ARM64cc_NV);
3593 ptmp = p;
3594 *p++ = 0;
3597 /* Update the guest PC. */
3598 /* imm64 x9, dstGA */
3599 /* str x9, amPC */
3600 p = imm64_to_ireg(p, /*x*/9, i->ARM64in.XDirect.dstGA);
3601 p = do_load_or_store64(p, False/*!isLoad*/,
3602 /*x*/9, i->ARM64in.XDirect.amPC);
3604 /* --- FIRST PATCHABLE BYTE follows --- */
3605 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
3606 calling to) backs up the return address, so as to find the
3607 address of the first patchable byte. So: don't change the
3608 number of instructions (5) below. */
3609 /* movw x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[15:0] */
3610 /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[31:15], lsl 16 */
3611 /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[47:32], lsl 32 */
3612 /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[63:48], lsl 48 */
3613 /* blr x9 */
3614 const void* disp_cp_chain_me
3615 = i->ARM64in.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
3616 : disp_cp_chain_me_to_slowEP;
3617 p = imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)disp_cp_chain_me);
3618 *p++ = 0xD63F0120;
3619 /* --- END of PATCHABLE BYTES --- */
3621 /* Fix up the conditional jump, if there was one. */
3622 if (i->ARM64in.XDirect.cond != ARM64cc_AL) {
3623 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3624 vassert(delta > 0 && delta <= 40);
3625 vassert((delta & 3) == 0);
3626 UInt notCond = 1 ^ (UInt)i->ARM64in.XDirect.cond;
3627 vassert(notCond <= 13); /* Neither AL nor NV */
3628 vassert(ptmp != NULL);
3629 delta = delta >> 2;
3630 *ptmp = X_8_19_1_4(X01010100, delta & ((1<<19)-1), 0, notCond);
3632 goto done;
3635 case ARM64in_XIndir: {
3636 // XIndir is more or less the same as XAssisted, except
3637 // we don't have a trc value to hand back, so there's no
3638 // write to r21
3639 /* Use ptmp for backpatching conditional jumps. */
3640 //UInt* ptmp = NULL;
3642 /* First off, if this is conditional, create a conditional
3643 jump over the rest of it. Or at least, leave a space for
3644 it that we will shortly fill in. */
3645 if (i->ARM64in.XIndir.cond != ARM64cc_AL) {
3646 vassert(0); //ATC
3647 //ZZ vassert(i->ARMin.XIndir.cond != ARMcc_NV);
3648 //ZZ ptmp = p;
3649 //ZZ *p++ = 0;
3652 /* Update the guest PC. */
3653 /* str r-dstGA, amPC */
3654 p = do_load_or_store64(p, False/*!isLoad*/,
3655 iregEnc(i->ARM64in.XIndir.dstGA),
3656 i->ARM64in.XIndir.amPC);
3658 /* imm64 x9, VG_(disp_cp_xindir) */
3659 /* br x9 */
3660 p = imm64_to_ireg(p, /*x*/9, (Addr)disp_cp_xindir);
3661 *p++ = 0xD61F0120; /* br x9 */
3663 /* Fix up the conditional jump, if there was one. */
3664 if (i->ARM64in.XIndir.cond != ARM64cc_AL) {
3665 vassert(0); //ATC
3666 //ZZ Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3667 //ZZ vassert(delta > 0 && delta < 40);
3668 //ZZ vassert((delta & 3) == 0);
3669 //ZZ UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
3670 //ZZ vassert(notCond <= 13); /* Neither AL nor NV */
3671 //ZZ delta = (delta >> 2) - 2;
3672 //ZZ *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3674 goto done;
3677 case ARM64in_XAssisted: {
3678 /* Use ptmp for backpatching conditional jumps. */
3679 UInt* ptmp = NULL;
3681 /* First off, if this is conditional, create a conditional
3682 jump over the rest of it. Or at least, leave a space for
3683 it that we will shortly fill in. I think this can only
3684 ever happen when VEX is driven by the switchbacker. */
3685 if (i->ARM64in.XAssisted.cond != ARM64cc_AL) {
3686 vassert(i->ARM64in.XDirect.cond != ARM64cc_NV);
3687 ptmp = p;
3688 *p++ = 0;
3691 /* Update the guest PC. */
3692 /* str r-dstGA, amPC */
3693 p = do_load_or_store64(p, False/*!isLoad*/,
3694 iregEnc(i->ARM64in.XAssisted.dstGA),
3695 i->ARM64in.XAssisted.amPC);
3697 /* movw r21, $magic_number */
3698 UInt trcval = 0;
3699 switch (i->ARM64in.XAssisted.jk) {
3700 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
3701 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
3702 //case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break;
3703 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
3704 //case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
3705 //case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
3706 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
3707 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
3708 case Ijk_FlushDCache: trcval = VEX_TRC_JMP_FLUSHDCACHE; break;
3709 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
3710 case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
3711 //case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
3712 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
3713 /* We don't expect to see the following being assisted. */
3714 //case Ijk_Ret:
3715 //case Ijk_Call:
3716 /* fallthrough */
3717 default:
3718 ppIRJumpKind(i->ARM64in.XAssisted.jk);
3719 vpanic("emit_ARM64Instr.ARM64in_XAssisted: "
3720 "unexpected jump kind");
3722 vassert(trcval != 0);
3723 p = imm64_to_ireg(p, /*x*/21, (ULong)trcval);
3725 /* imm64 x9, VG_(disp_cp_xassisted) */
3726 /* br x9 */
3727 p = imm64_to_ireg(p, /*x*/9, (Addr)disp_cp_xassisted);
3728 *p++ = 0xD61F0120; /* br x9 */
3730 /* Fix up the conditional jump, if there was one. */
3731 if (i->ARM64in.XAssisted.cond != ARM64cc_AL) {
3732 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3733 vassert(delta > 0 && delta < 40);
3734 vassert((delta & 3) == 0);
3735 UInt notCond = 1 ^ (UInt)i->ARM64in.XDirect.cond;
3736 vassert(notCond <= 13); /* Neither AL nor NV */
3737 vassert(ptmp != NULL);
3738 delta = delta >> 2;
3739 *ptmp = X_8_19_1_4(X01010100, delta & ((1<<19)-1), 0, notCond);
3741 goto done;
3744 case ARM64in_CSel: {
3745 /* 100 1101 0100 mm cond 00 nn dd = CSEL Xd, Xn, Xm, cond */
3746 UInt dd = iregEnc(i->ARM64in.CSel.dst);
3747 UInt nn = iregEnc(i->ARM64in.CSel.argL);
3748 UInt mm = iregEnc(i->ARM64in.CSel.argR);
3749 UInt cond = (UInt)i->ARM64in.CSel.cond;
3750 vassert(dd < 31 && nn < 31 && mm < 31 && cond < 16);
3751 *p++ = X_3_8_5_6_5_5(X100, X11010100, mm, cond << 2, nn, dd);
3752 goto done;
3755 case ARM64in_Call: {
3756 /* We'll use x9 as a scratch register to put the target
3757 address in. */
3758 if (i->ARM64in.Call.cond != ARM64cc_AL
3759 && i->ARM64in.Call.rloc.pri != RLPri_None) {
3760 /* The call might not happen (it isn't unconditional) and
3761 it returns a result. In this case we will need to
3762 generate a control flow diamond to put 0x555..555 in
3763 the return register(s) in the case where the call
3764 doesn't happen. If this ever becomes necessary, maybe
3765 copy code from the 32-bit ARM equivalent. Until that
3766 day, just give up. */
3767 goto bad;
3770 UInt* ptmp = NULL;
3771 if (i->ARM64in.Call.cond != ARM64cc_AL) {
3772 /* Create a hole to put a conditional branch in. We'll
3773 patch it once we know the branch length. */
3774 ptmp = p;
3775 *p++ = 0;
3778 // x9 = &target
3779 p = imm64_to_ireg( (UInt*)p, /*x*/9, (ULong)i->ARM64in.Call.target );
3780 // blr x9
3781 *p++ = 0xD63F0120;
3783 // Patch the hole if necessary
3784 if (i->ARM64in.Call.cond != ARM64cc_AL) {
3785 ULong dist = (ULong)(p - ptmp);
3786 /* imm64_to_ireg produces between 1 and 4 insns, and
3787 then there's the BLR itself. Hence: */
3788 vassert(dist >= 2 && dist <= 5);
3789 vassert(ptmp != NULL);
3790 // 01010100 simm19 0 cond = B.cond (here + simm19 << 2)
3791 *ptmp = X_8_19_1_4(X01010100, dist, 0,
3792 1 ^ (UInt)i->ARM64in.Call.cond);
3793 } else {
3794 vassert(ptmp == NULL);
3797 goto done;
3800 case ARM64in_AddToSP: {
3801 /* 10,0 10001 00 imm12 11111 11111 ADD xsp, xsp, #imm12
3802 11,0 10001 00 imm12 11111 11111 SUB xsp, xsp, #imm12
3804 Int simm12 = i->ARM64in.AddToSP.simm;
3805 vassert(-4096 < simm12 && simm12 < 4096);
3806 vassert(0 == (simm12 & 0xF));
3807 if (simm12 >= 0) {
3808 *p++ = X_2_6_2_12_5_5(X10, X010001, X00, simm12, X11111, X11111);
3809 } else {
3810 *p++ = X_2_6_2_12_5_5(X11, X010001, X00, -simm12, X11111, X11111);
3812 goto done;
3815 case ARM64in_FromSP: {
3816 /* 10,0 10001 00 0..(12)..0 11111 dd MOV Xd, xsp */
3817 UInt dd = iregEnc(i->ARM64in.FromSP.dst);
3818 vassert(dd < 31);
3819 *p++ = X_2_6_2_12_5_5(X10, X010001, X00, 0, X11111, dd);
3820 goto done;
3823 case ARM64in_Mul: {
3824 /* 100 11011 110 mm 011111 nn dd UMULH Xd, Xn,Xm
3825 100 11011 010 mm 011111 nn dd SMULH Xd, Xn,Xm
3826 100 11011 000 mm 011111 nn dd MUL Xd, Xn,Xm
3828 UInt dd = iregEnc(i->ARM64in.Mul.dst);
3829 UInt nn = iregEnc(i->ARM64in.Mul.argL);
3830 UInt mm = iregEnc(i->ARM64in.Mul.argR);
3831 vassert(dd < 31 && nn < 31 && mm < 31);
3832 switch (i->ARM64in.Mul.op) {
3833 case ARM64mul_ZX:
3834 *p++ = X_3_8_5_6_5_5(X100, X11011110, mm, X011111, nn, dd);
3835 goto done;
3836 case ARM64mul_SX:
3837 *p++ = X_3_8_5_6_5_5(X100, X11011010, mm, X011111, nn, dd);
3838 goto done;
3839 case ARM64mul_PLAIN:
3840 *p++ = X_3_8_5_6_5_5(X100, X11011000, mm, X011111, nn, dd);
3841 goto done;
3842 default:
3843 vassert(0);
3845 goto bad;
3847 case ARM64in_LdrEX: {
3848 /* 085F7C82 ldxrb w2, [x4]
3849 485F7C82 ldxrh w2, [x4]
3850 885F7C82 ldxr w2, [x4]
3851 C85F7C82 ldxr x2, [x4]
3853 switch (i->ARM64in.LdrEX.szB) {
3854 case 1: *p++ = 0x085F7C82; goto done;
3855 case 2: *p++ = 0x485F7C82; goto done;
3856 case 4: *p++ = 0x885F7C82; goto done;
3857 case 8: *p++ = 0xC85F7C82; goto done;
3858 default: break;
3860 goto bad;
3862 case ARM64in_StrEX: {
3863 /* 08007C82 stxrb w0, w2, [x4]
3864 48007C82 stxrh w0, w2, [x4]
3865 88007C82 stxr w0, w2, [x4]
3866 C8007C82 stxr w0, x2, [x4]
3868 switch (i->ARM64in.StrEX.szB) {
3869 case 1: *p++ = 0x08007C82; goto done;
3870 case 2: *p++ = 0x48007C82; goto done;
3871 case 4: *p++ = 0x88007C82; goto done;
3872 case 8: *p++ = 0xC8007C82; goto done;
3873 default: break;
3875 goto bad;
3877 case ARM64in_CAS: {
3878 /* This isn't simple. For an explanation see the comment in
3879 host_arm64_defs.h on the definition of ARM64Instr case CAS.
3881 NOTE: We could place "loop:" after mov/and but then we need
3882 an additional scratch register.
3884 /* Generate:
3886 loop:
3887 -- one of:
3888 mov x8, x5 // AA0503E8
3889 and x8, x5, #0xFFFFFFFF // 92407CA8
3890 and x8, x5, #0xFFFF // 92403CA8
3891 and x8, x5, #0xFF // 92401CA8
3893 -- one of:
3894 ldxr x1, [x3] // C85F7C61
3895 ldxr w1, [x3] // 885F7C61
3896 ldxrh w1, [x3] // 485F7C61
3897 ldxrb w1, [x3] // 085F7C61
3899 -- always:
3900 cmp x1, x8 // EB08003F
3901 bne out // 54000061
3903 -- one of:
3904 stxr w8, x7, [x3] // C8087C67
3905 stxr w8, w7, [x3] // 88087C67
3906 stxrh w8, w7, [x3] // 48087C67
3907 stxrb w8, w7, [x3] // 08087C67
3909 -- always:
3910 cbne w8, loop // 35FFFF68
3911 out:
3913 switch (i->ARM64in.CAS.szB) {
3914 case 8: *p++ = 0xAA0503E8; break;
3915 case 4: *p++ = 0x92407CA8; break;
3916 case 2: *p++ = 0x92403CA8; break;
3917 case 1: *p++ = 0x92401CA8; break;
3918 default: vassert(0);
3920 switch (i->ARM64in.CAS.szB) {
3921 case 8: *p++ = 0xC85F7C61; break;
3922 case 4: *p++ = 0x885F7C61; break;
3923 case 2: *p++ = 0x485F7C61; break;
3924 case 1: *p++ = 0x085F7C61; break;
3926 *p++ = 0xEB08003F;
3927 *p++ = 0x54000061;
3928 switch (i->ARM64in.CAS.szB) {
3929 case 8: *p++ = 0xC8087C67; break;
3930 case 4: *p++ = 0x88087C67; break;
3931 case 2: *p++ = 0x48087C67; break;
3932 case 1: *p++ = 0x08087C67; break;
3934 *p++ = 0x35FFFF68;
3935 goto done;
3937 case ARM64in_CASP: {
3938 /* Generate:
3939 CASP <Xs>, <X(s+1)>, <Xt>, <X(t+1)>, [<Xn|SP>{,#0}]
3941 Register allocation (see ARM64in_CASP in getRegUsage_ARM64Instr):
3942 Xn: memory address
3943 -> X2 (INPUT)
3944 Xs, X(s+1): values to be compared with value read from address
3945 -> X4,X5 (INPUTS)
3946 -> X0,X1 (OUTPUTS) loaded from memory and compared with
3947 scratch registers X8,X9 (CLOBBERED) which contain
3948 contents of X4,X5
3949 Xt, X(t+1): values to be stored to memory if X0,X1==X8,X9
3950 -> X6,X7 (INPUT)
3952 loop:
3953 -- two of:
3954 mov x8, x4 // AA0403E8
3955 mov x9, x5 // AA0503E9
3956 and x8, x4, #0xFFFFFFFF // 92407C88
3957 and x9, x5, #0xFFFFFFFF // 92407CA9
3959 -- one of:
3960 ldxp x0,x1, [x2] // C87F0440
3961 ldxp w0,w1, [x2] // 887F0440
3963 -- always:
3964 cmp x0, x8 // EB08001F
3965 bne out // 540000E1 (b.ne #28 <out>)
3966 cmp x1, x9 // EB09003F
3967 bne out // 540000A1 (b.ne #20 <out>)
3969 -- one of:
3970 stxp w1, x6, x7, [x2] // C8211C46
3971 stxp w1, w6, w7, [x2] // 88211C46
3973 -- always:
3974 cbnz w1, loop // 35FFFE81 (cbnz w1, #-48 <loop>)
3975 out:
3977 switch (i->ARM64in.CASP.szB) {
3978 case 8: *p++ = 0xAA0403E8; *p++ = 0xAA0503E9; break;
3979 case 4: *p++ = 0x92407C88; *p++ = 0x92407CA9; break;
3980 default: vassert(0);
3982 switch (i->ARM64in.CASP.szB) {
3983 case 8: *p++ = 0xC87F0440; break;
3984 case 4: *p++ = 0x887F0440; break;
3985 default: vassert(0);
3987 *p++ = 0xEB08001F;
3988 *p++ = 0x540000E1;
3989 *p++ = 0xEB09003F;
3990 *p++ = 0x540000A1;
3991 switch (i->ARM64in.CASP.szB) {
3992 case 8: *p++ = 0xC8211C46; break;
3993 case 4: *p++ = 0x88211C46; break;
3994 default: vassert(0);
3996 *p++ = 0x35FFFE81;
3997 goto done;
3999 case ARM64in_MFence: {
4000 *p++ = 0xD5033F9F; /* DSB sy */
4001 *p++ = 0xD5033FBF; /* DMB sy */
4002 *p++ = 0xD5033FDF; /* ISB */
4003 goto done;
4005 case ARM64in_ClrEX: {
4006 *p++ = 0xD5033F5F; /* clrex #15 */
4007 goto done;
4009 case ARM64in_VLdStH: {
4010 /* 01 111101 01 imm12 n t LDR Ht, [Xn|SP, #imm12 * 2]
4011 01 111101 00 imm12 n t STR Ht, [Xn|SP, #imm12 * 2]
4013 UInt hD = dregEnc(i->ARM64in.VLdStH.hD);
4014 UInt rN = iregEnc(i->ARM64in.VLdStH.rN);
4015 UInt uimm12 = i->ARM64in.VLdStH.uimm12;
4016 Bool isLD = i->ARM64in.VLdStH.isLoad;
4017 vassert(uimm12 < 8192 && 0 == (uimm12 & 1));
4018 uimm12 >>= 1;
4019 vassert(uimm12 < (1<<12));
4020 vassert(hD < 32);
4021 vassert(rN < 31);
4022 *p++ = X_2_6_2_12_5_5(X01, X111101, isLD ? X01 : X00,
4023 uimm12, rN, hD);
4024 goto done;
4026 case ARM64in_VLdStS: {
4027 /* 10 111101 01 imm12 n t LDR St, [Xn|SP, #imm12 * 4]
4028 10 111101 00 imm12 n t STR St, [Xn|SP, #imm12 * 4]
4030 UInt sD = dregEnc(i->ARM64in.VLdStS.sD);
4031 UInt rN = iregEnc(i->ARM64in.VLdStS.rN);
4032 UInt uimm12 = i->ARM64in.VLdStS.uimm12;
4033 Bool isLD = i->ARM64in.VLdStS.isLoad;
4034 vassert(uimm12 < 16384 && 0 == (uimm12 & 3));
4035 uimm12 >>= 2;
4036 vassert(uimm12 < (1<<12));
4037 vassert(sD < 32);
4038 vassert(rN < 31);
4039 *p++ = X_2_6_2_12_5_5(X10, X111101, isLD ? X01 : X00,
4040 uimm12, rN, sD);
4041 goto done;
4043 case ARM64in_VLdStD: {
4044 /* 11 111101 01 imm12 n t LDR Dt, [Xn|SP, #imm12 * 8]
4045 11 111101 00 imm12 n t STR Dt, [Xn|SP, #imm12 * 8]
4047 UInt dD = dregEnc(i->ARM64in.VLdStD.dD);
4048 UInt rN = iregEnc(i->ARM64in.VLdStD.rN);
4049 UInt uimm12 = i->ARM64in.VLdStD.uimm12;
4050 Bool isLD = i->ARM64in.VLdStD.isLoad;
4051 vassert(uimm12 < 32768 && 0 == (uimm12 & 7));
4052 uimm12 >>= 3;
4053 vassert(uimm12 < (1<<12));
4054 vassert(dD < 32);
4055 vassert(rN < 31);
4056 *p++ = X_2_6_2_12_5_5(X11, X111101, isLD ? X01 : X00,
4057 uimm12, rN, dD);
4058 goto done;
4060 case ARM64in_VLdStQ: {
4061 /* 0100 1100 0000 0000 0111 11 rN rQ st1 {vQ.2d}, [<rN|SP>]
4062 0100 1100 0100 0000 0111 11 rN rQ ld1 {vQ.2d}, [<rN|SP>]
4064 UInt rQ = qregEnc(i->ARM64in.VLdStQ.rQ);
4065 UInt rN = iregEnc(i->ARM64in.VLdStQ.rN);
4066 vassert(rQ < 32);
4067 vassert(rN < 31);
4068 if (i->ARM64in.VLdStQ.isLoad) {
4069 *p++ = 0x4C407C00 | (rN << 5) | rQ;
4070 } else {
4071 *p++ = 0x4C007C00 | (rN << 5) | rQ;
4073 goto done;
4075 case ARM64in_VCvtI2F: {
4076 /* 31 28 23 21 20 18 15 9 4
4077 000 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
4078 000 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
4079 100 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
4080 100 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
4081 000 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
4082 000 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
4083 100 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
4084 100 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
4086 UInt rN = iregEnc(i->ARM64in.VCvtI2F.rS);
4087 UInt rD = dregEnc(i->ARM64in.VCvtI2F.rD);
4088 ARM64CvtOp how = i->ARM64in.VCvtI2F.how;
4089 /* Just handle cases as they show up. */
4090 switch (how) {
4091 case ARM64cvt_F32_I32S: /* SCVTF Sd, Wn */
4092 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100010, X000000, rN, rD);
4093 break;
4094 case ARM64cvt_F64_I32S: /* SCVTF Dd, Wn */
4095 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100010, X000000, rN, rD);
4096 break;
4097 case ARM64cvt_F32_I64S: /* SCVTF Sd, Xn */
4098 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100010, X000000, rN, rD);
4099 break;
4100 case ARM64cvt_F64_I64S: /* SCVTF Dd, Xn */
4101 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100010, X000000, rN, rD);
4102 break;
4103 case ARM64cvt_F32_I32U: /* UCVTF Sd, Wn */
4104 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100011, X000000, rN, rD);
4105 break;
4106 case ARM64cvt_F64_I32U: /* UCVTF Dd, Wn */
4107 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100011, X000000, rN, rD);
4108 break;
4109 case ARM64cvt_F32_I64U: /* UCVTF Sd, Xn */
4110 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100011, X000000, rN, rD);
4111 break;
4112 case ARM64cvt_F64_I64U: /* UCVTF Dd, Xn */
4113 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100011, X000000, rN, rD);
4114 break;
4115 default:
4116 goto bad; //ATC
4118 goto done;
4120 case ARM64in_VCvtF2I: {
4121 /* 30 23 20 18 15 9 4
4122 sf 00,11110,0x 1 00 000,000000 n d FCVTNS Rd, Fn (round to
4123 sf 00,11110,0x 1 00 001,000000 n d FCVTNU Rd, Fn nearest)
4124 ---------------- 01 -------------- FCVTP-------- (round to +inf)
4125 ---------------- 10 -------------- FCVTM-------- (round to -inf)
4126 ---------------- 11 -------------- FCVTZ-------- (round to zero)
4128 Rd is Xd when sf==1, Wd when sf==0
4129 Fn is Dn when x==1, Sn when x==0
4130 20:19 carry the rounding mode, using the same encoding as FPCR
4132 UInt rD = iregEnc(i->ARM64in.VCvtF2I.rD);
4133 UInt rN = dregEnc(i->ARM64in.VCvtF2I.rS);
4134 ARM64CvtOp how = i->ARM64in.VCvtF2I.how;
4135 UChar armRM = i->ARM64in.VCvtF2I.armRM;
4136 /* Just handle cases as they show up. */
4137 switch (how) {
4138 case ARM64cvt_F64_I32S: /* FCVTxS Wd, Dn */
4139 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100000 | (armRM << 3),
4140 X000000, rN, rD);
4141 break;
4142 case ARM64cvt_F64_I32U: /* FCVTxU Wd, Dn */
4143 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100001 | (armRM << 3),
4144 X000000, rN, rD);
4145 break;
4146 case ARM64cvt_F64_I64S: /* FCVTxS Xd, Dn */
4147 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100000 | (armRM << 3),
4148 X000000, rN, rD);
4149 break;
4150 case ARM64cvt_F64_I64U: /* FCVTxU Xd, Dn */
4151 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100001 | (armRM << 3),
4152 X000000, rN, rD);
4153 break;
4154 case ARM64cvt_F32_I32S: /* FCVTxS Wd, Sn */
4155 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100000 | (armRM << 3),
4156 X000000, rN, rD);
4157 break;
4158 case ARM64cvt_F32_I32U: /* FCVTxU Wd, Sn */
4159 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100001 | (armRM << 3),
4160 X000000, rN, rD);
4161 break;
4162 case ARM64cvt_F32_I64S: /* FCVTxS Xd, Sn */
4163 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100000 | (armRM << 3),
4164 X000000, rN, rD);
4165 break;
4166 case ARM64cvt_F32_I64U: /* FCVTxU Xd, Sn */
4167 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100001 | (armRM << 3),
4168 X000000, rN, rD);
4169 break;
4170 default:
4171 goto bad; //ATC
4173 goto done;
4175 case ARM64in_VCvtSD: {
4176 /* 31 23 21 16 14 9 4
4177 000,11110, 00 10001 0,1 10000 n d FCVT Dd, Sn (S->D)
4178 ---------- 01 ----- 0,0 --------- FCVT Sd, Dn (D->S)
4179 Rounding, when dst is smaller than src, is per the FPCR.
4181 UInt dd = dregEnc(i->ARM64in.VCvtSD.dst);
4182 UInt nn = dregEnc(i->ARM64in.VCvtSD.src);
4183 if (i->ARM64in.VCvtSD.sToD) {
4184 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100010, X110000, nn, dd);
4185 } else {
4186 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100010, X010000, nn, dd);
4188 goto done;
4190 case ARM64in_VCvtHS: {
4191 /* 31 23 21 16 14 9 4
4192 000,11110, 11 10001 0,0 10000 n d FCVT Sd, Hn (H->S)
4193 ---------- 00 ----- 1,1 --------- FCVT Hd, Sn (S->H)
4194 Rounding, when dst is smaller than src, is per the FPCR.
4196 UInt dd = dregEnc(i->ARM64in.VCvtHS.dst);
4197 UInt nn = dregEnc(i->ARM64in.VCvtHS.src);
4198 if (i->ARM64in.VCvtHS.hToS) {
4199 *p++ = X_3_5_8_6_5_5(X000, X11110, X11100010, X010000, nn, dd);
4200 } else {
4201 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100011, X110000, nn, dd);
4203 goto done;
4205 case ARM64in_VCvtHD: {
4206 /* 31 23 21 16 14 9 4
4207 000,11110, 11 10001 0,1 10000 n d FCVT Dd, Hn (H->D)
4208 ---------- 01 ----- 1,1 --------- FCVT Hd, Dn (D->H)
4209 Rounding, when dst is smaller than src, is per the FPCR.
4211 UInt dd = dregEnc(i->ARM64in.VCvtHD.dst);
4212 UInt nn = dregEnc(i->ARM64in.VCvtHD.src);
4213 if (i->ARM64in.VCvtHD.hToD) {
4214 *p++ = X_3_5_8_6_5_5(X000, X11110, X11100010, X110000, nn, dd);
4215 } else {
4216 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100011, X110000, nn, dd);
4218 goto done;
4220 case ARM64in_VUnaryD: {
4221 /* 31 23 21 16 14 9 4
4222 000,11110 01 1,0000 0,0 10000 n d FMOV Dd, Dn (not handled)
4223 ------------------- 0,1 --------- FABS ------
4224 ------------------- 1,0 --------- FNEG ------
4225 ------------------- 1,1 --------- FSQRT -----
4227 UInt dD = dregEnc(i->ARM64in.VUnaryD.dst);
4228 UInt dN = dregEnc(i->ARM64in.VUnaryD.src);
4229 UInt b16 = 2; /* impossible */
4230 UInt b15 = 2; /* impossible */
4231 switch (i->ARM64in.VUnaryD.op) {
4232 case ARM64fpu_NEG: b16 = 1; b15 = 0; break;
4233 case ARM64fpu_SQRT: b16 = 1; b15 = 1; break;
4234 case ARM64fpu_ABS: b16 = 0; b15 = 1; break;
4235 default: break;
4237 if (b16 < 2 && b15 < 2) {
4238 *p++ = X_3_8_5_6_5_5(X000, X11110011, (X0000 << 1) | b16,
4239 (b15 << 5) | X10000, dN, dD);
4240 goto done;
4243 000, 11110 01 1,001 11,1 10000 n d FRINTI Dd, Dm (round per FPCR)
4245 if (i->ARM64in.VUnaryD.op == ARM64fpu_RINT) {
4246 *p++ = X_3_8_5_6_5_5(X000, X11110011, X00111, X110000, dN, dD);
4247 goto done;
4250 010, 11110 11 1,0000 1,1111 10 n d FRECPX Dd, Dm
4252 if (i->ARM64in.VUnaryD.op == ARM64fpu_RECPX) {
4253 *p++ = X_3_8_5_6_5_5(X010, X11110111, X00001, X111110, dN, dD);
4254 goto done;
4256 goto bad;
4258 case ARM64in_VUnaryS: {
4259 /* 31 23 21 16 14 9 4
4260 000,11110 00 1,0000 0,0 10000 n d FMOV Sd, Sn (not handled)
4261 ------------------- 0,1 --------- FABS ------
4262 ------------------- 1,0 --------- FNEG ------
4263 ------------------- 1,1 --------- FSQRT -----
4265 UInt sD = dregEnc(i->ARM64in.VUnaryS.dst);
4266 UInt sN = dregEnc(i->ARM64in.VUnaryS.src);
4267 UInt b16 = 2; /* impossible */
4268 UInt b15 = 2; /* impossible */
4269 switch (i->ARM64in.VUnaryS.op) {
4270 case ARM64fpu_NEG: b16 = 1; b15 = 0; break;
4271 case ARM64fpu_SQRT: b16 = 1; b15 = 1; break;
4272 case ARM64fpu_ABS: b16 = 0; b15 = 1; break;
4273 default: break;
4275 if (b16 < 2 && b15 < 2) {
4276 *p++ = X_3_8_5_6_5_5(X000, X11110001, (X0000 << 1) | b16,
4277 (b15 << 5) | X10000, sN, sD);
4278 goto done;
4281 000, 11110 00 1,001 11,1 10000 n d FRINTI Sd, Sm (round per FPCR)
4283 if (i->ARM64in.VUnaryS.op == ARM64fpu_RINT) {
4284 *p++ = X_3_8_5_6_5_5(X000, X11110001, X00111, X110000, sN, sD);
4285 goto done;
4288 010, 11110 10 1,0000 1,1111 10 n d FRECPX Sd, Sm
4290 if (i->ARM64in.VUnaryS.op == ARM64fpu_RECPX) {
4291 *p++ = X_3_8_5_6_5_5(X010, X11110101, X00001, X111110, sN, sD);
4292 goto done;
4294 goto bad;
4296 case ARM64in_VBinD: {
4297 /* 31 23 20 15 11 9 4
4298 ---------------- 0000 ------ FMUL --------
4299 000 11110 011 m 0001 10 n d FDIV Dd,Dn,Dm
4300 ---------------- 0010 ------ FADD --------
4301 ---------------- 0011 ------ FSUB --------
4303 UInt dD = dregEnc(i->ARM64in.VBinD.dst);
4304 UInt dN = dregEnc(i->ARM64in.VBinD.argL);
4305 UInt dM = dregEnc(i->ARM64in.VBinD.argR);
4306 UInt b1512 = 16; /* impossible */
4307 switch (i->ARM64in.VBinD.op) {
4308 case ARM64fpb_DIV: b1512 = X0001; break;
4309 case ARM64fpb_MUL: b1512 = X0000; break;
4310 case ARM64fpb_SUB: b1512 = X0011; break;
4311 case ARM64fpb_ADD: b1512 = X0010; break;
4312 default: goto bad;
4314 vassert(b1512 < 16);
4315 *p++
4316 = X_3_8_5_6_5_5(X000, X11110011, dM, (b1512 << 2) | X10, dN, dD);
4317 goto done;
4319 case ARM64in_VBinS: {
4320 /* 31 23 20 15 11 9 4
4321 ---------------- 0000 ------ FMUL --------
4322 000 11110 001 m 0001 10 n d FDIV Dd,Dn,Dm
4323 ---------------- 0010 ------ FADD --------
4324 ---------------- 0011 ------ FSUB --------
4326 UInt sD = dregEnc(i->ARM64in.VBinS.dst);
4327 UInt sN = dregEnc(i->ARM64in.VBinS.argL);
4328 UInt sM = dregEnc(i->ARM64in.VBinS.argR);
4329 UInt b1512 = 16; /* impossible */
4330 switch (i->ARM64in.VBinS.op) {
4331 case ARM64fpb_DIV: b1512 = X0001; break;
4332 case ARM64fpb_MUL: b1512 = X0000; break;
4333 case ARM64fpb_SUB: b1512 = X0011; break;
4334 case ARM64fpb_ADD: b1512 = X0010; break;
4335 default: goto bad;
4337 vassert(b1512 < 16);
4338 *p++
4339 = X_3_8_5_6_5_5(X000, X11110001, sM, (b1512 << 2) | X10, sN, sD);
4340 goto done;
4342 case ARM64in_VCmpD: {
4343 /* 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm */
4344 UInt dN = dregEnc(i->ARM64in.VCmpD.argL);
4345 UInt dM = dregEnc(i->ARM64in.VCmpD.argR);
4346 *p++ = X_3_8_5_6_5_5(X000, X11110011, dM, X001000, dN, X00000);
4347 goto done;
4349 case ARM64in_VCmpS: {
4350 /* 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm */
4351 UInt sN = dregEnc(i->ARM64in.VCmpS.argL);
4352 UInt sM = dregEnc(i->ARM64in.VCmpS.argR);
4353 *p++ = X_3_8_5_6_5_5(X000, X11110001, sM, X001000, sN, X00000);
4354 goto done;
4356 case ARM64in_VFCSel: {
4357 /* 31 23 21 20 15 11 9 5
4358 000 11110 00 1 m cond 11 n d FCSEL Sd,Sn,Sm,cond
4359 000 11110 01 1 m cond 11 n d FCSEL Dd,Dn,Dm,cond
4361 Bool isD = i->ARM64in.VFCSel.isD;
4362 UInt dd = dregEnc(i->ARM64in.VFCSel.dst);
4363 UInt nn = dregEnc(i->ARM64in.VFCSel.argL);
4364 UInt mm = dregEnc(i->ARM64in.VFCSel.argR);
4365 UInt cond = (UInt)i->ARM64in.VFCSel.cond;
4366 vassert(cond < 16);
4367 *p++ = X_3_8_5_6_5_5(X000, isD ? X11110011 : X11110001,
4368 mm, (cond << 2) | X000011, nn, dd);
4369 goto done;
4371 case ARM64in_FPCR: {
4372 Bool toFPCR = i->ARM64in.FPCR.toFPCR;
4373 UInt iReg = iregEnc(i->ARM64in.FPCR.iReg);
4374 if (toFPCR) {
4375 /* 0xD51B44 000 Rt MSR fpcr, rT */
4376 *p++ = 0xD51B4400 | (iReg & 0x1F);
4377 goto done;
4379 goto bad; // FPCR -> iReg case currently ATC
4381 case ARM64in_FPSR: {
4382 Bool toFPSR = i->ARM64in.FPSR.toFPSR;
4383 UInt iReg = iregEnc(i->ARM64in.FPSR.iReg);
4384 if (toFPSR) {
4385 /* 0xD51B44 001 Rt MSR fpsr, rT */
4386 *p++ = 0xD51B4420 | (iReg & 0x1F);
4387 } else {
4388 /* 0xD53B44 001 Rt MRS rT, fpsr */
4389 *p++ = 0xD53B4420 | (iReg & 0x1F);
4391 goto done;
4393 case ARM64in_VBinV: {
4394 /* 31 23 20 15 9 4
4395 010 01110 11 1 m 100001 n d ADD Vd.2d, Vn.2d, Vm.2d
4396 010 01110 10 1 m 100001 n d ADD Vd.4s, Vn.4s, Vm.4s
4397 010 01110 01 1 m 100001 n d ADD Vd.8h, Vn.8h, Vm.8h
4398 010 01110 00 1 m 100001 n d ADD Vd.16b, Vn.16b, Vm.16b
4400 011 01110 11 1 m 100001 n d SUB Vd.2d, Vn.2d, Vm.2d
4401 011 01110 10 1 m 100001 n d SUB Vd.4s, Vn.4s, Vm.4s
4402 011 01110 01 1 m 100001 n d SUB Vd.8h, Vn.8h, Vm.8h
4403 011 01110 00 1 m 100001 n d SUB Vd.16b, Vn.16b, Vm.16b
4405 010 01110 10 1 m 100111 n d MUL Vd.4s, Vn.4s, Vm.4s
4406 010 01110 01 1 m 100111 n d MUL Vd.8h, Vn.8h, Vm.8h
4407 010 01110 00 1 m 100111 n d MUL Vd.16b, Vn.16b, Vm.16b
4409 010 01110 01 1 m 110101 n d FADD Vd.2d, Vn.2d, Vm.2d
4410 010 01110 00 1 m 110101 n d FADD Vd.4s, Vn.4s, Vm.4s
4411 010 01110 11 1 m 110101 n d FSUB Vd.2d, Vn.2d, Vm.2d
4412 010 01110 10 1 m 110101 n d FSUB Vd.4s, Vn.4s, Vm.4s
4414 011 01110 01 1 m 110111 n d FMUL Vd.2d, Vn.2d, Vm.2d
4415 011 01110 00 1 m 110111 n d FMUL Vd.4s, Vn.4s, Vm.4s
4416 011 01110 01 1 m 111111 n d FDIV Vd.2d, Vn.2d, Vm.2d
4417 011 01110 00 1 m 111111 n d FDIV Vd.4s, Vn.4s, Vm.4s
4419 010 01110 01 1 m 111101 n d FMAX Vd.2d, Vn.2d, Vm.2d
4420 010 01110 00 1 m 111101 n d FMAX Vd.4s, Vn.4s, Vm.4s
4421 010 01110 11 1 m 111101 n d FMIN Vd.2d, Vn.2d, Vm.2d
4422 010 01110 10 1 m 111101 n d FMIN Vd.4s, Vn.4s, Vm.4s
4424 011 01110 10 1 m 011001 n d UMAX Vd.4s, Vn.4s, Vm.4s
4425 011 01110 01 1 m 011001 n d UMAX Vd.8h, Vn.8h, Vm.8h
4426 011 01110 00 1 m 011001 n d UMAX Vd.16b, Vn.16b, Vm.16b
4428 011 01110 10 1 m 011011 n d UMIN Vd.4s, Vn.4s, Vm.4s
4429 011 01110 01 1 m 011011 n d UMIN Vd.8h, Vn.8h, Vm.8h
4430 011 01110 00 1 m 011011 n d UMIN Vd.16b, Vn.16b, Vm.16b
4432 010 01110 10 1 m 011001 n d SMAX Vd.4s, Vn.4s, Vm.4s
4433 010 01110 01 1 m 011001 n d SMAX Vd.8h, Vn.8h, Vm.8h
4434 010 01110 00 1 m 011001 n d SMAX Vd.16b, Vn.16b, Vm.16b
4436 010 01110 10 1 m 011011 n d SMIN Vd.4s, Vn.4s, Vm.4s
4437 010 01110 01 1 m 011011 n d SMIN Vd.8h, Vn.8h, Vm.8h
4438 010 01110 00 1 m 011011 n d SMIN Vd.16b, Vn.16b, Vm.16b
4440 010 01110 00 1 m 000111 n d AND Vd, Vn, Vm
4441 010 01110 10 1 m 000111 n d ORR Vd, Vn, Vm
4442 011 01110 00 1 m 000111 n d EOR Vd, Vn, Vm
4444 011 01110 11 1 m 100011 n d CMEQ Vd.2d, Vn.2d, Vm.2d
4445 011 01110 10 1 m 100011 n d CMEQ Vd.4s, Vn.4s, Vm.4s
4446 011 01110 01 1 m 100011 n d CMEQ Vd.8h, Vn.8h, Vm.8h
4447 011 01110 00 1 m 100011 n d CMEQ Vd.16b, Vn.16b, Vm.16b
4449 011 01110 11 1 m 001101 n d CMHI Vd.2d, Vn.2d, Vm.2d
4450 011 01110 10 1 m 001101 n d CMHI Vd.4s, Vn.4s, Vm.4s
4451 011 01110 01 1 m 001101 n d CMHI Vd.8h, Vn.8h, Vm.8h
4452 011 01110 00 1 m 001101 n d CMHI Vd.16b, Vn.16b, Vm.16b
4454 010 01110 11 1 m 001101 n d CMGT Vd.2d, Vn.2d, Vm.2d
4455 010 01110 10 1 m 001101 n d CMGT Vd.4s, Vn.4s, Vm.4s
4456 010 01110 01 1 m 001101 n d CMGT Vd.8h, Vn.8h, Vm.8h
4457 010 01110 00 1 m 001101 n d CMGT Vd.16b, Vn.16b, Vm.16b
4459 010 01110 01 1 m 111001 n d FCMEQ Vd.2d, Vn.2d, Vm.2d
4460 010 01110 00 1 m 111001 n d FCMEQ Vd.4s, Vn.4s, Vm.4s
4462 011 01110 01 1 m 111001 n d FCMGE Vd.2d, Vn.2d, Vm.2d
4463 011 01110 00 1 m 111001 n d FCMGE Vd.4s, Vn.4s, Vm.4s
4465 011 01110 11 1 m 111001 n d FCMGT Vd.2d, Vn.2d, Vm.2d
4466 011 01110 10 1 m 111001 n d FCMGT Vd.4s, Vn.4s, Vm.4s
4468 010 01110 00 0 m 000000 n d TBL Vd.16b, {Vn.16b}, Vm.16b
4470 010 01110 11 0 m 000110 n d UZP1 Vd.2d, Vn.2d, Vm.2d
4471 010 01110 10 0 m 000110 n d UZP1 Vd.4s, Vn.4s, Vm.4s
4472 010 01110 01 0 m 000110 n d UZP1 Vd.8h, Vn.8h, Vm.8h
4473 010 01110 00 0 m 000110 n d UZP1 Vd.16b, Vn.16b, Vm.16b
4475 010 01110 11 0 m 010110 n d UZP2 Vd.2d, Vn.2d, Vm.2d
4476 010 01110 10 0 m 010110 n d UZP2 Vd.4s, Vn.4s, Vm.4s
4477 010 01110 01 0 m 010110 n d UZP2 Vd.8h, Vn.8h, Vm.8h
4478 010 01110 00 0 m 010110 n d UZP2 Vd.16b, Vn.16b, Vm.16b
4480 010 01110 10 0 m 001110 n d ZIP1 Vd.4s, Vn.4s, Vm.4s
4481 010 01110 01 0 m 001110 n d ZIP1 Vd.8h, Vn.8h, Vm.8h
4482 010 01110 10 0 m 001110 n d ZIP1 Vd.16b, Vn.16b, Vm.16b
4484 010 01110 10 0 m 011110 n d ZIP2 Vd.4s, Vn.4s, Vm.4s
4485 010 01110 01 0 m 011110 n d ZIP2 Vd.8h, Vn.8h, Vm.8h
4486 010 01110 10 0 m 011110 n d ZIP2 Vd.16b, Vn.16b, Vm.16b
4488 011 01110 00 1 m 100111 n d PMUL Vd.16b, Vn.16b, Vm.16b
4490 000 01110 00 1 m 111000 n d PMULL Vd.8h, Vn.8b, Vm.8b
4492 001 01110 10 1 m 110000 n d UMULL Vd.2d, Vn.2s, Vm.2s
4493 001 01110 01 1 m 110000 n d UMULL Vd.4s, Vn.4h, Vm.4h
4494 001 01110 00 1 m 110000 n d UMULL Vd.8h, Vn.8b, Vm.8b
4496 000 01110 10 1 m 110000 n d SMULL Vd.2d, Vn.2s, Vm.2s
4497 000 01110 01 1 m 110000 n d SMULL Vd.4s, Vn.4h, Vm.4h
4498 000 01110 00 1 m 110000 n d SMULL Vd.8h, Vn.8b, Vm.8b
4500 010 01110 11 1 m 000011 n d SQADD Vd.2d, Vn.2d, Vm.2d
4501 010 01110 10 1 m 000011 n d SQADD Vd.4s, Vn.4s, Vm.4s
4502 010 01110 01 1 m 000011 n d SQADD Vd.8h, Vn.8h, Vm.8h
4503 010 01110 00 1 m 000011 n d SQADD Vd.16b, Vn.16b, Vm.16b
4505 011 01110 11 1 m 000011 n d UQADD Vd.2d, Vn.2d, Vm.2d
4506 011 01110 10 1 m 000011 n d UQADD Vd.4s, Vn.4s, Vm.4s
4507 011 01110 01 1 m 000011 n d UQADD Vd.8h, Vn.8h, Vm.8h
4508 011 01110 00 1 m 000011 n d UQADD Vd.16b, Vn.16b, Vm.16b
4510 010 01110 11 1 m 001011 n d SQSUB Vd.2d, Vn.2d, Vm.2d
4511 010 01110 10 1 m 001011 n d SQSUB Vd.4s, Vn.4s, Vm.4s
4512 010 01110 01 1 m 001011 n d SQSUB Vd.8h, Vn.8h, Vm.8h
4513 010 01110 00 1 m 001011 n d SQSUB Vd.16b, Vn.16b, Vm.16b
4515 011 01110 11 1 m 001011 n d UQSUB Vd.2d, Vn.2d, Vm.2d
4516 011 01110 10 1 m 001011 n d UQSUB Vd.4s, Vn.4s, Vm.4s
4517 011 01110 01 1 m 001011 n d UQSUB Vd.8h, Vn.8h, Vm.8h
4518 011 01110 00 1 m 001011 n d UQSUB Vd.16b, Vn.16b, Vm.16b
4520 000 01110 10 1 m 110100 n d SQDMULL Vd.2d, Vn.2s, Vm.2s
4521 000 01110 01 1 m 110100 n d SQDMULL Vd.4s, Vn.4h, Vm.4h
4523 010 01110 10 1 m 101101 n d SQDMULH Vd.4s, Vn.4s, Vm.4s
4524 010 01110 01 1 m 101101 n d SQDMULH Vd.8h, Vn.8h, Vm.8h
4525 011 01110 10 1 m 101101 n d SQRDMULH Vd.4s, Vn.4s, Vm.4s
4526 011 01110 10 1 m 101101 n d SQRDMULH Vd.8h, Vn.8h, Vm.8h
4528 010 01110 sz 1 m 010011 n d SQSHL@sz Vd, Vn, Vm
4529 010 01110 sz 1 m 010111 n d SQRSHL@sz Vd, Vn, Vm
4530 011 01110 sz 1 m 010011 n d UQSHL@sz Vd, Vn, Vm
4531 011 01110 sz 1 m 010111 n d URQSHL@sz Vd, Vn, Vm
4533 010 01110 sz 1 m 010001 n d SSHL@sz Vd, Vn, Vm
4534 010 01110 sz 1 m 010101 n d SRSHL@sz Vd, Vn, Vm
4535 011 01110 sz 1 m 010001 n d USHL@sz Vd, Vn, Vm
4536 011 01110 sz 1 m 010101 n d URSHL@sz Vd, Vn, Vm
4538 010 01110 01 1 m 111111 n d FRECPS Vd.2d, Vn.2d, Vm.2d
4539 010 01110 00 1 m 111111 n d FRECPS Vd.4s, Vn.4s, Vm.4s
4540 010 01110 11 1 m 111111 n d FRSQRTS Vd.2d, Vn.2d, Vm.2d
4541 010 01110 10 1 m 111111 n d FRSQRTS Vd.4s, Vn.4s, Vm.4s
4543 UInt vD = qregEnc(i->ARM64in.VBinV.dst);
4544 UInt vN = qregEnc(i->ARM64in.VBinV.argL);
4545 UInt vM = qregEnc(i->ARM64in.VBinV.argR);
4546 switch (i->ARM64in.VBinV.op) {
4547 case ARM64vecb_ADD64x2:
4548 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X100001, vN, vD);
4549 break;
4550 case ARM64vecb_ADD32x4:
4551 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X100001, vN, vD);
4552 break;
4553 case ARM64vecb_ADD16x8:
4554 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100001, vN, vD);
4555 break;
4556 case ARM64vecb_ADD8x16:
4557 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X100001, vN, vD);
4558 break;
4559 case ARM64vecb_SUB64x2:
4560 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X100001, vN, vD);
4561 break;
4562 case ARM64vecb_SUB32x4:
4563 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X100001, vN, vD);
4564 break;
4565 case ARM64vecb_SUB16x8:
4566 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X100001, vN, vD);
4567 break;
4568 case ARM64vecb_SUB8x16:
4569 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100001, vN, vD);
4570 break;
4571 case ARM64vecb_MUL32x4:
4572 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X100111, vN, vD);
4573 break;
4574 case ARM64vecb_MUL16x8:
4575 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100111, vN, vD);
4576 break;
4577 case ARM64vecb_MUL8x16:
4578 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X100111, vN, vD);
4579 break;
4580 case ARM64vecb_FADD64x2:
4581 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X110101, vN, vD);
4582 break;
4583 case ARM64vecb_FADD32x4:
4584 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X110101, vN, vD);
4585 break;
4586 case ARM64vecb_FSUB64x2:
4587 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X110101, vN, vD);
4588 break;
4589 case ARM64vecb_FSUB32x4:
4590 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X110101, vN, vD);
4591 break;
4592 case ARM64vecb_FMUL64x2:
4593 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X110111, vN, vD);
4594 break;
4595 case ARM64vecb_FMUL32x4:
4596 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X110111, vN, vD);
4597 break;
4598 case ARM64vecb_FDIV64x2:
4599 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X111111, vN, vD);
4600 break;
4601 case ARM64vecb_FDIV32x4:
4602 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X111111, vN, vD);
4603 break;
4605 case ARM64vecb_FMAX64x2:
4606 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111101, vN, vD);
4607 break;
4608 case ARM64vecb_FMAX32x4:
4609 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111101, vN, vD);
4610 break;
4611 case ARM64vecb_FMIN64x2:
4612 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X111101, vN, vD);
4613 break;
4614 case ARM64vecb_FMIN32x4:
4615 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X111101, vN, vD);
4616 break;
4618 case ARM64vecb_UMAX32x4:
4619 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X011001, vN, vD);
4620 break;
4621 case ARM64vecb_UMAX16x8:
4622 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X011001, vN, vD);
4623 break;
4624 case ARM64vecb_UMAX8x16:
4625 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X011001, vN, vD);
4626 break;
4628 case ARM64vecb_UMIN32x4:
4629 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X011011, vN, vD);
4630 break;
4631 case ARM64vecb_UMIN16x8:
4632 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X011011, vN, vD);
4633 break;
4634 case ARM64vecb_UMIN8x16:
4635 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X011011, vN, vD);
4636 break;
4638 case ARM64vecb_SMAX32x4:
4639 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011001, vN, vD);
4640 break;
4641 case ARM64vecb_SMAX16x8:
4642 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011001, vN, vD);
4643 break;
4644 case ARM64vecb_SMAX8x16:
4645 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X011001, vN, vD);
4646 break;
4648 case ARM64vecb_SMIN32x4:
4649 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011011, vN, vD);
4650 break;
4651 case ARM64vecb_SMIN16x8:
4652 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011011, vN, vD);
4653 break;
4654 case ARM64vecb_SMIN8x16:
4655 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X011011, vN, vD);
4656 break;
4658 case ARM64vecb_AND:
4659 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X000111, vN, vD);
4660 break;
4661 case ARM64vecb_ORR:
4662 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X000111, vN, vD);
4663 break;
4664 case ARM64vecb_XOR:
4665 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X000111, vN, vD);
4666 break;
4668 case ARM64vecb_CMEQ64x2:
4669 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X100011, vN, vD);
4670 break;
4671 case ARM64vecb_CMEQ32x4:
4672 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X100011, vN, vD);
4673 break;
4674 case ARM64vecb_CMEQ16x8:
4675 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X100011, vN, vD);
4676 break;
4677 case ARM64vecb_CMEQ8x16:
4678 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100011, vN, vD);
4679 break;
4681 case ARM64vecb_CMHI64x2:
4682 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X001101, vN, vD);
4683 break;
4684 case ARM64vecb_CMHI32x4:
4685 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X001101, vN, vD);
4686 break;
4687 case ARM64vecb_CMHI16x8:
4688 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X001101, vN, vD);
4689 break;
4690 case ARM64vecb_CMHI8x16:
4691 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X001101, vN, vD);
4692 break;
4694 case ARM64vecb_CMGT64x2:
4695 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X001101, vN, vD);
4696 break;
4697 case ARM64vecb_CMGT32x4:
4698 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X001101, vN, vD);
4699 break;
4700 case ARM64vecb_CMGT16x8:
4701 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X001101, vN, vD);
4702 break;
4703 case ARM64vecb_CMGT8x16:
4704 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X001101, vN, vD);
4705 break;
4707 case ARM64vecb_FCMEQ64x2:
4708 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111001, vN, vD);
4709 break;
4710 case ARM64vecb_FCMEQ32x4:
4711 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111001, vN, vD);
4712 break;
4714 case ARM64vecb_FCMGE64x2:
4715 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X111001, vN, vD);
4716 break;
4717 case ARM64vecb_FCMGE32x4:
4718 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X111001, vN, vD);
4719 break;
4721 case ARM64vecb_FCMGT64x2:
4722 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X111001, vN, vD);
4723 break;
4724 case ARM64vecb_FCMGT32x4:
4725 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X111001, vN, vD);
4726 break;
4728 case ARM64vecb_TBL1:
4729 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X000000, vN, vD);
4730 break;
4732 case ARM64vecb_UZP164x2:
4733 *p++ = X_3_8_5_6_5_5(X010, X01110110, vM, X000110, vN, vD);
4734 break;
4735 case ARM64vecb_UZP132x4:
4736 *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X000110, vN, vD);
4737 break;
4738 case ARM64vecb_UZP116x8:
4739 *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X000110, vN, vD);
4740 break;
4741 case ARM64vecb_UZP18x16:
4742 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X000110, vN, vD);
4743 break;
4745 case ARM64vecb_UZP264x2:
4746 *p++ = X_3_8_5_6_5_5(X010, X01110110, vM, X010110, vN, vD);
4747 break;
4748 case ARM64vecb_UZP232x4:
4749 *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X010110, vN, vD);
4750 break;
4751 case ARM64vecb_UZP216x8:
4752 *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X010110, vN, vD);
4753 break;
4754 case ARM64vecb_UZP28x16:
4755 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X010110, vN, vD);
4756 break;
4758 case ARM64vecb_ZIP132x4:
4759 *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X001110, vN, vD);
4760 break;
4761 case ARM64vecb_ZIP116x8:
4762 *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X001110, vN, vD);
4763 break;
4764 case ARM64vecb_ZIP18x16:
4765 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X001110, vN, vD);
4766 break;
4768 case ARM64vecb_ZIP232x4:
4769 *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X011110, vN, vD);
4770 break;
4771 case ARM64vecb_ZIP216x8:
4772 *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X011110, vN, vD);
4773 break;
4774 case ARM64vecb_ZIP28x16:
4775 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X011110, vN, vD);
4776 break;
4778 case ARM64vecb_PMUL8x16:
4779 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100111, vN, vD);
4780 break;
4782 case ARM64vecb_PMULL8x8:
4783 *p++ = X_3_8_5_6_5_5(X000, X01110001, vM, X111000, vN, vD);
4784 break;
4786 case ARM64vecb_UMULL2DSS:
4787 *p++ = X_3_8_5_6_5_5(X001, X01110101, vM, X110000, vN, vD);
4788 break;
4789 case ARM64vecb_UMULL4SHH:
4790 *p++ = X_3_8_5_6_5_5(X001, X01110011, vM, X110000, vN, vD);
4791 break;
4792 case ARM64vecb_UMULL8HBB:
4793 *p++ = X_3_8_5_6_5_5(X001, X01110001, vM, X110000, vN, vD);
4794 break;
4796 case ARM64vecb_SMULL2DSS:
4797 *p++ = X_3_8_5_6_5_5(X000, X01110101, vM, X110000, vN, vD);
4798 break;
4799 case ARM64vecb_SMULL4SHH:
4800 *p++ = X_3_8_5_6_5_5(X000, X01110011, vM, X110000, vN, vD);
4801 break;
4802 case ARM64vecb_SMULL8HBB:
4803 *p++ = X_3_8_5_6_5_5(X000, X01110001, vM, X110000, vN, vD);
4804 break;
4806 case ARM64vecb_SQADD64x2:
4807 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X000011, vN, vD);
4808 break;
4809 case ARM64vecb_SQADD32x4:
4810 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X000011, vN, vD);
4811 break;
4812 case ARM64vecb_SQADD16x8:
4813 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X000011, vN, vD);
4814 break;
4815 case ARM64vecb_SQADD8x16:
4816 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X000011, vN, vD);
4817 break;
4819 case ARM64vecb_UQADD64x2:
4820 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X000011, vN, vD);
4821 break;
4822 case ARM64vecb_UQADD32x4:
4823 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X000011, vN, vD);
4824 break;
4825 case ARM64vecb_UQADD16x8:
4826 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X000011, vN, vD);
4827 break;
4828 case ARM64vecb_UQADD8x16:
4829 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X000011, vN, vD);
4830 break;
4832 case ARM64vecb_SQSUB64x2:
4833 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X001011, vN, vD);
4834 break;
4835 case ARM64vecb_SQSUB32x4:
4836 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X001011, vN, vD);
4837 break;
4838 case ARM64vecb_SQSUB16x8:
4839 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X001011, vN, vD);
4840 break;
4841 case ARM64vecb_SQSUB8x16:
4842 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X001011, vN, vD);
4843 break;
4845 case ARM64vecb_UQSUB64x2:
4846 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X001011, vN, vD);
4847 break;
4848 case ARM64vecb_UQSUB32x4:
4849 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X001011, vN, vD);
4850 break;
4851 case ARM64vecb_UQSUB16x8:
4852 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X001011, vN, vD);
4853 break;
4854 case ARM64vecb_UQSUB8x16:
4855 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X001011, vN, vD);
4856 break;
4858 case ARM64vecb_SQDMULL2DSS:
4859 *p++ = X_3_8_5_6_5_5(X000, X01110101, vM, X110100, vN, vD);
4860 break;
4861 case ARM64vecb_SQDMULL4SHH:
4862 *p++ = X_3_8_5_6_5_5(X000, X01110011, vM, X110100, vN, vD);
4863 break;
4865 case ARM64vecb_SQDMULH32x4:
4866 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X101101, vN, vD);
4867 break;
4868 case ARM64vecb_SQDMULH16x8:
4869 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X101101, vN, vD);
4870 break;
4871 case ARM64vecb_SQRDMULH32x4:
4872 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X101101, vN, vD);
4873 break;
4874 case ARM64vecb_SQRDMULH16x8:
4875 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X101101, vN, vD);
4876 break;
4878 case ARM64vecb_SQSHL64x2:
4879 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010011, vN, vD);
4880 break;
4881 case ARM64vecb_SQSHL32x4:
4882 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010011, vN, vD);
4883 break;
4884 case ARM64vecb_SQSHL16x8:
4885 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010011, vN, vD);
4886 break;
4887 case ARM64vecb_SQSHL8x16:
4888 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010011, vN, vD);
4889 break;
4891 case ARM64vecb_SQRSHL64x2:
4892 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010111, vN, vD);
4893 break;
4894 case ARM64vecb_SQRSHL32x4:
4895 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010111, vN, vD);
4896 break;
4897 case ARM64vecb_SQRSHL16x8:
4898 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010111, vN, vD);
4899 break;
4900 case ARM64vecb_SQRSHL8x16:
4901 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010111, vN, vD);
4902 break;
4904 case ARM64vecb_UQSHL64x2:
4905 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010011, vN, vD);
4906 break;
4907 case ARM64vecb_UQSHL32x4:
4908 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010011, vN, vD);
4909 break;
4910 case ARM64vecb_UQSHL16x8:
4911 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010011, vN, vD);
4912 break;
4913 case ARM64vecb_UQSHL8x16:
4914 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010011, vN, vD);
4915 break;
4917 case ARM64vecb_UQRSHL64x2:
4918 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010111, vN, vD);
4919 break;
4920 case ARM64vecb_UQRSHL32x4:
4921 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010111, vN, vD);
4922 break;
4923 case ARM64vecb_UQRSHL16x8:
4924 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010111, vN, vD);
4925 break;
4926 case ARM64vecb_UQRSHL8x16:
4927 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010111, vN, vD);
4928 break;
4930 case ARM64vecb_SSHL64x2:
4931 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010001, vN, vD);
4932 break;
4933 case ARM64vecb_SSHL32x4:
4934 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010001, vN, vD);
4935 break;
4936 case ARM64vecb_SSHL16x8:
4937 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010001, vN, vD);
4938 break;
4939 case ARM64vecb_SSHL8x16:
4940 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010001, vN, vD);
4941 break;
4943 case ARM64vecb_SRSHL64x2:
4944 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010101, vN, vD);
4945 break;
4946 case ARM64vecb_SRSHL32x4:
4947 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010101, vN, vD);
4948 break;
4949 case ARM64vecb_SRSHL16x8:
4950 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010101, vN, vD);
4951 break;
4952 case ARM64vecb_SRSHL8x16:
4953 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010101, vN, vD);
4954 break;
4956 case ARM64vecb_USHL64x2:
4957 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010001, vN, vD);
4958 break;
4959 case ARM64vecb_USHL32x4:
4960 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010001, vN, vD);
4961 break;
4962 case ARM64vecb_USHL16x8:
4963 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010001, vN, vD);
4964 break;
4965 case ARM64vecb_USHL8x16:
4966 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010001, vN, vD);
4967 break;
4969 case ARM64vecb_URSHL64x2:
4970 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010101, vN, vD);
4971 break;
4972 case ARM64vecb_URSHL32x4:
4973 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010101, vN, vD);
4974 break;
4975 case ARM64vecb_URSHL16x8:
4976 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010101, vN, vD);
4977 break;
4978 case ARM64vecb_URSHL8x16:
4979 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010101, vN, vD);
4980 break;
4982 case ARM64vecb_FRECPS64x2:
4983 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111111, vN, vD);
4984 break;
4985 case ARM64vecb_FRECPS32x4:
4986 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111111, vN, vD);
4987 break;
4988 case ARM64vecb_FRSQRTS64x2:
4989 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X111111, vN, vD);
4990 break;
4991 case ARM64vecb_FRSQRTS32x4:
4992 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X111111, vN, vD);
4993 break;
4995 default:
4996 goto bad;
4998 goto done;
5000 case ARM64in_VModifyV: {
5001 /* 31 23 20 15 9 4
5002 010 01110 sz 1 00000 001110 n d SUQADD@sz Vd, Vn
5003 011 01110 sz 1 00000 001110 n d USQADD@sz Vd, Vn
5005 UInt vD = qregEnc(i->ARM64in.VModifyV.mod);
5006 UInt vN = qregEnc(i->ARM64in.VModifyV.arg);
5007 switch (i->ARM64in.VModifyV.op) {
5008 case ARM64vecmo_SUQADD64x2:
5009 *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X001110, vN, vD);
5010 break;
5011 case ARM64vecmo_SUQADD32x4:
5012 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X001110, vN, vD);
5013 break;
5014 case ARM64vecmo_SUQADD16x8:
5015 *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X001110, vN, vD);
5016 break;
5017 case ARM64vecmo_SUQADD8x16:
5018 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X001110, vN, vD);
5019 break;
5020 case ARM64vecmo_USQADD64x2:
5021 *p++ = X_3_8_5_6_5_5(X011, X01110111, X00000, X001110, vN, vD);
5022 break;
5023 case ARM64vecmo_USQADD32x4:
5024 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X001110, vN, vD);
5025 break;
5026 case ARM64vecmo_USQADD16x8:
5027 *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X001110, vN, vD);
5028 break;
5029 case ARM64vecmo_USQADD8x16:
5030 *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X001110, vN, vD);
5031 break;
5032 default:
5033 goto bad;
5035 goto done;
5037 case ARM64in_VUnaryV: {
5038 /* 31 23 20 15 9 4
5039 010 01110 11 1 00000 111110 n d FABS Vd.2d, Vn.2d
5040 010 01110 10 1 00000 111110 n d FABS Vd.4s, Vn.4s
5041 011 01110 11 1 00000 111110 n d FNEG Vd.2d, Vn.2d
5042 011 01110 10 1 00000 111110 n d FNEG Vd.4s, Vn.4s
5043 011 01110 00 1 00000 010110 n d NOT Vd.16b, Vn.16b
5045 010 01110 11 1 00000 101110 n d ABS Vd.2d, Vn.2d
5046 010 01110 10 1 00000 101110 n d ABS Vd.4s, Vn.4s
5047 010 01110 01 1 00000 101110 n d ABS Vd.8h, Vn.8h
5048 010 01110 00 1 00000 101110 n d ABS Vd.16b, Vn.16b
5050 010 01110 10 1 00000 010010 n d CLS Vd.4s, Vn.4s
5051 010 01110 01 1 00000 010010 n d CLS Vd.8h, Vn.8h
5052 010 01110 00 1 00000 010010 n d CLS Vd.16b, Vn.16b
5054 011 01110 10 1 00000 010010 n d CLZ Vd.4s, Vn.4s
5055 011 01110 01 1 00000 010010 n d CLZ Vd.8h, Vn.8h
5056 011 01110 00 1 00000 010010 n d CLZ Vd.16b, Vn.16b
5058 010 01110 00 1 00000 010110 n d CNT Vd.16b, Vn.16b
5060 011 01110 01 1 00000 010110 n d RBIT Vd.16b, Vn.16b
5061 010 01110 00 1 00000 000110 n d REV16 Vd.16b, Vn.16b
5062 011 01110 00 1 00000 000010 n d REV32 Vd.16b, Vn.16b
5063 011 01110 01 1 00000 000010 n d REV32 Vd.8h, Vn.8h
5065 010 01110 00 1 00000 000010 n d REV64 Vd.16b, Vn.16b
5066 010 01110 01 1 00000 000010 n d REV64 Vd.8h, Vn.8h
5067 010 01110 10 1 00000 000010 n d REV64 Vd.4s, Vn.4s
5069 010 01110 10 1 00001 110010 n d URECPE Vd.4s, Vn.4s
5070 011 01110 10 1 00001 110010 n d URSQRTE Vd.4s, Vn.4s
5072 010 01110 11 1 00001 110110 n d FRECPE Vd.2d, Vn.2d
5073 010 01110 10 1 00001 110110 n d FRECPE Vd.4s, Vn.4s
5075 011 01110 11 1 00001 110110 n d FRECPE Vd.2d, Vn.2d
5076 011 01110 10 1 00001 110110 n d FRECPE Vd.4s, Vn.4s
5078 011 01110 11 1 00001 111110 n d FSQRT Vd.2d, Vn.2d
5079 011 01110 10 1 00001 111110 n d FSQRT Vd.4s, Vn.4s
5081 UInt vD = qregEnc(i->ARM64in.VUnaryV.dst);
5082 UInt vN = qregEnc(i->ARM64in.VUnaryV.arg);
5083 switch (i->ARM64in.VUnaryV.op) {
5084 case ARM64vecu_FABS64x2:
5085 *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X111110, vN, vD);
5086 break;
5087 case ARM64vecu_FABS32x4:
5088 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X111110, vN, vD);
5089 break;
5090 case ARM64vecu_FNEG64x2:
5091 *p++ = X_3_8_5_6_5_5(X011, X01110111, X00000, X111110, vN, vD);
5092 break;
5093 case ARM64vecu_FNEG32x4:
5094 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X111110, vN, vD);
5095 break;
5096 case ARM64vecu_NOT:
5097 *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010110, vN, vD);
5098 break;
5099 case ARM64vecu_ABS64x2:
5100 *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X101110, vN, vD);
5101 break;
5102 case ARM64vecu_ABS32x4:
5103 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X101110, vN, vD);
5104 break;
5105 case ARM64vecu_ABS16x8:
5106 *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X101110, vN, vD);
5107 break;
5108 case ARM64vecu_ABS8x16:
5109 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X101110, vN, vD);
5110 break;
5111 case ARM64vecu_CLS32x4:
5112 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X010010, vN, vD);
5113 break;
5114 case ARM64vecu_CLS16x8:
5115 *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X010010, vN, vD);
5116 break;
5117 case ARM64vecu_CLS8x16:
5118 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X010010, vN, vD);
5119 break;
5120 case ARM64vecu_CLZ32x4:
5121 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X010010, vN, vD);
5122 break;
5123 case ARM64vecu_CLZ16x8:
5124 *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X010010, vN, vD);
5125 break;
5126 case ARM64vecu_CLZ8x16:
5127 *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010010, vN, vD);
5128 break;
5129 case ARM64vecu_CNT8x16:
5130 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X010110, vN, vD);
5131 break;
5132 case ARM64vecu_RBIT:
5133 *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X010110, vN, vD);
5134 break;
5135 case ARM64vecu_REV1616B:
5136 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X000110, vN, vD);
5137 break;
5138 case ARM64vecu_REV3216B:
5139 *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X000010, vN, vD);
5140 break;
5141 case ARM64vecu_REV328H:
5142 *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X000010, vN, vD);
5143 break;
5144 case ARM64vecu_REV6416B:
5145 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X000010, vN, vD);
5146 break;
5147 case ARM64vecu_REV648H:
5148 *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X000010, vN, vD);
5149 break;
5150 case ARM64vecu_REV644S:
5151 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X000010, vN, vD);
5152 break;
5153 case ARM64vecu_URECPE32x4:
5154 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00001, X110010, vN, vD);
5155 break;
5156 case ARM64vecu_URSQRTE32x4:
5157 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X110010, vN, vD);
5158 break;
5159 case ARM64vecu_FRECPE64x2:
5160 *p++ = X_3_8_5_6_5_5(X010, X01110111, X00001, X110110, vN, vD);
5161 break;
5162 case ARM64vecu_FRECPE32x4:
5163 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00001, X110110, vN, vD);
5164 break;
5165 case ARM64vecu_FRSQRTE64x2:
5166 *p++ = X_3_8_5_6_5_5(X011, X01110111, X00001, X110110, vN, vD);
5167 break;
5168 case ARM64vecu_FRSQRTE32x4:
5169 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X110110, vN, vD);
5170 break;
5171 case ARM64vecu_FSQRT64x2:
5172 *p++ = X_3_8_5_6_5_5(X011, X01110111, X00001, X111110, vN, vD);
5173 break;
5174 case ARM64vecu_FSQRT32x4:
5175 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X111110, vN, vD);
5176 break;
5177 default:
5178 goto bad;
5180 goto done;
5182 case ARM64in_VNarrowV: {
5183 /* 31 23 21 15 9 4
5184 000 01110 00 1,00001 001010 n d XTN Vd.8b, Vn.8h
5185 000 01110 01 1,00001 001010 n d XTN Vd.4h, Vn.4s
5186 000 01110 10 1,00001 001010 n d XTN Vd.2s, Vn.2d
5188 001 01110 00 1,00001 001010 n d SQXTUN Vd.8b, Vn.8h
5189 001 01110 01 1,00001 001010 n d SQXTUN Vd.4h, Vn.4s
5190 001 01110 10 1,00001 001010 n d SQXTUN Vd.2s, Vn.2d
5192 000 01110 00 1,00001 010010 n d SQXTN Vd.8b, Vn.8h
5193 000 01110 01 1,00001 010010 n d SQXTN Vd.4h, Vn.4s
5194 000 01110 10 1,00001 010010 n d SQXTN Vd.2s, Vn.2d
5196 001 01110 00 1,00001 010010 n d UQXTN Vd.8b, Vn.8h
5197 001 01110 01 1,00001 010010 n d UQXTN Vd.4h, Vn.4s
5198 001 01110 10 1,00001 010010 n d UQXTN Vd.2s, Vn.2d
5200 UInt vD = qregEnc(i->ARM64in.VNarrowV.dst);
5201 UInt vN = qregEnc(i->ARM64in.VNarrowV.src);
5202 UInt dszBlg2 = i->ARM64in.VNarrowV.dszBlg2;
5203 vassert(dszBlg2 >= 0 && dszBlg2 <= 2);
5204 switch (i->ARM64in.VNarrowV.op) {
5205 case ARM64vecna_XTN:
5206 *p++ = X_3_8_5_6_5_5(X000, X01110001 | (dszBlg2 << 1),
5207 X00001, X001010, vN, vD);
5208 goto done;
5209 case ARM64vecna_SQXTUN:
5210 *p++ = X_3_8_5_6_5_5(X001, X01110001 | (dszBlg2 << 1),
5211 X00001, X001010, vN, vD);
5212 goto done;
5213 case ARM64vecna_SQXTN:
5214 *p++ = X_3_8_5_6_5_5(X000, X01110001 | (dszBlg2 << 1),
5215 X00001, X010010, vN, vD);
5216 goto done;
5217 case ARM64vecna_UQXTN:
5218 *p++ = X_3_8_5_6_5_5(X001, X01110001 | (dszBlg2 << 1),
5219 X00001, X010010, vN, vD);
5220 goto done;
5221 default:
5222 break;
5224 goto bad;
5226 case ARM64in_VShiftImmV: {
5228 011 011110 immh immb 000001 n d USHR Vd.T, Vn.T, #sh
5229 010 011110 immh immb 000001 n d SSHR Vd.T, Vn.T, #sh
5231 001 011110 immh immb 100101 n d UQSHRN ,,#sh
5232 000 011110 immh immb 100101 n d SQSHRN ,,#sh
5233 001 011110 immh immb 100001 n d SQSHRUN ,,#sh
5235 001 011110 immh immb 100111 n d UQRSHRN ,,#sh
5236 000 011110 immh immb 100111 n d SQRSHRN ,,#sh
5237 001 011110 immh immb 100011 n d SQRSHRUN ,,#sh
5239 where immh:immb
5240 = case T of
5241 2d | sh in 1..64 -> let xxxxxx = 64-sh in 1xxx:xxx
5242 4s | sh in 1..32 -> let xxxxx = 32-sh in 01xx:xxx
5243 8h | sh in 1..16 -> let xxxx = 16-sh in 001x:xxx
5244 16b | sh in 1..8 -> let xxx = 8-sh in 0001:xxx
5246 010 011110 immh immb 010101 n d SHL Vd.T, Vn.T, #sh
5248 011 011110 immh immb 011101 n d UQSHL Vd.T, Vn.T, #sh
5249 010 011110 immh immb 011101 n d SQSHL Vd.T, Vn.T, #sh
5250 011 011110 immh immb 011001 n d SQSHLU Vd.T, Vn.T, #sh
5252 where immh:immb
5253 = case T of
5254 2d | sh in 0..63 -> let xxxxxx = sh in 1xxx:xxx
5255 4s | sh in 0..31 -> let xxxxx = sh in 01xx:xxx
5256 8h | sh in 0..15 -> let xxxx = sh in 001x:xxx
5257 16b | sh in 0..7 -> let xxx = sh in 0001:xxx
5259 UInt vD = qregEnc(i->ARM64in.VShiftImmV.dst);
5260 UInt vN = qregEnc(i->ARM64in.VShiftImmV.src);
5261 UInt sh = i->ARM64in.VShiftImmV.amt;
5262 UInt tmpl = 0; /* invalid */
5264 const UInt tmpl_USHR
5265 = X_3_6_7_6_5_5(X011, X011110, 0, X000001, vN, vD);
5266 const UInt tmpl_SSHR
5267 = X_3_6_7_6_5_5(X010, X011110, 0, X000001, vN, vD);
5269 const UInt tmpl_UQSHRN
5270 = X_3_6_7_6_5_5(X001, X011110, 0, X100101, vN, vD);
5271 const UInt tmpl_SQSHRN
5272 = X_3_6_7_6_5_5(X000, X011110, 0, X100101, vN, vD);
5273 const UInt tmpl_SQSHRUN
5274 = X_3_6_7_6_5_5(X001, X011110, 0, X100001, vN, vD);
5276 const UInt tmpl_UQRSHRN
5277 = X_3_6_7_6_5_5(X001, X011110, 0, X100111, vN, vD);
5278 const UInt tmpl_SQRSHRN
5279 = X_3_6_7_6_5_5(X000, X011110, 0, X100111, vN, vD);
5280 const UInt tmpl_SQRSHRUN
5281 = X_3_6_7_6_5_5(X001, X011110, 0, X100011, vN, vD);
5283 const UInt tmpl_SHL
5284 = X_3_6_7_6_5_5(X010, X011110, 0, X010101, vN, vD);
5286 const UInt tmpl_UQSHL
5287 = X_3_6_7_6_5_5(X011, X011110, 0, X011101, vN, vD);
5288 const UInt tmpl_SQSHL
5289 = X_3_6_7_6_5_5(X010, X011110, 0, X011101, vN, vD);
5290 const UInt tmpl_SQSHLU
5291 = X_3_6_7_6_5_5(X011, X011110, 0, X011001, vN, vD);
5293 switch (i->ARM64in.VShiftImmV.op) {
5294 case ARM64vecshi_SSHR64x2: tmpl = tmpl_SSHR; goto right64x2;
5295 case ARM64vecshi_USHR64x2: tmpl = tmpl_USHR; goto right64x2;
5296 case ARM64vecshi_SHL64x2: tmpl = tmpl_SHL; goto left64x2;
5297 case ARM64vecshi_UQSHL64x2: tmpl = tmpl_UQSHL; goto left64x2;
5298 case ARM64vecshi_SQSHL64x2: tmpl = tmpl_SQSHL; goto left64x2;
5299 case ARM64vecshi_SQSHLU64x2: tmpl = tmpl_SQSHLU; goto left64x2;
5300 case ARM64vecshi_SSHR32x4: tmpl = tmpl_SSHR; goto right32x4;
5301 case ARM64vecshi_USHR32x4: tmpl = tmpl_USHR; goto right32x4;
5302 case ARM64vecshi_UQSHRN2SD: tmpl = tmpl_UQSHRN; goto right32x4;
5303 case ARM64vecshi_SQSHRN2SD: tmpl = tmpl_SQSHRN; goto right32x4;
5304 case ARM64vecshi_SQSHRUN2SD: tmpl = tmpl_SQSHRUN; goto right32x4;
5305 case ARM64vecshi_UQRSHRN2SD: tmpl = tmpl_UQRSHRN; goto right32x4;
5306 case ARM64vecshi_SQRSHRN2SD: tmpl = tmpl_SQRSHRN; goto right32x4;
5307 case ARM64vecshi_SQRSHRUN2SD: tmpl = tmpl_SQRSHRUN; goto right32x4;
5308 case ARM64vecshi_SHL32x4: tmpl = tmpl_SHL; goto left32x4;
5309 case ARM64vecshi_UQSHL32x4: tmpl = tmpl_UQSHL; goto left32x4;
5310 case ARM64vecshi_SQSHL32x4: tmpl = tmpl_SQSHL; goto left32x4;
5311 case ARM64vecshi_SQSHLU32x4: tmpl = tmpl_SQSHLU; goto left32x4;
5312 case ARM64vecshi_SSHR16x8: tmpl = tmpl_SSHR; goto right16x8;
5313 case ARM64vecshi_USHR16x8: tmpl = tmpl_USHR; goto right16x8;
5314 case ARM64vecshi_UQSHRN4HS: tmpl = tmpl_UQSHRN; goto right16x8;
5315 case ARM64vecshi_SQSHRN4HS: tmpl = tmpl_SQSHRN; goto right16x8;
5316 case ARM64vecshi_SQSHRUN4HS: tmpl = tmpl_SQSHRUN; goto right16x8;
5317 case ARM64vecshi_UQRSHRN4HS: tmpl = tmpl_UQRSHRN; goto right16x8;
5318 case ARM64vecshi_SQRSHRN4HS: tmpl = tmpl_SQRSHRN; goto right16x8;
5319 case ARM64vecshi_SQRSHRUN4HS: tmpl = tmpl_SQRSHRUN; goto right16x8;
5320 case ARM64vecshi_SHL16x8: tmpl = tmpl_SHL; goto left16x8;
5321 case ARM64vecshi_UQSHL16x8: tmpl = tmpl_UQSHL; goto left16x8;
5322 case ARM64vecshi_SQSHL16x8: tmpl = tmpl_SQSHL; goto left16x8;
5323 case ARM64vecshi_SQSHLU16x8: tmpl = tmpl_SQSHLU; goto left16x8;
5324 case ARM64vecshi_SSHR8x16: tmpl = tmpl_SSHR; goto right8x16;
5325 case ARM64vecshi_USHR8x16: tmpl = tmpl_USHR; goto right8x16;
5326 case ARM64vecshi_UQSHRN8BH: tmpl = tmpl_UQSHRN; goto right8x16;
5327 case ARM64vecshi_SQSHRN8BH: tmpl = tmpl_SQSHRN; goto right8x16;
5328 case ARM64vecshi_SQSHRUN8BH: tmpl = tmpl_SQSHRUN; goto right8x16;
5329 case ARM64vecshi_UQRSHRN8BH: tmpl = tmpl_UQRSHRN; goto right8x16;
5330 case ARM64vecshi_SQRSHRN8BH: tmpl = tmpl_SQRSHRN; goto right8x16;
5331 case ARM64vecshi_SQRSHRUN8BH: tmpl = tmpl_SQRSHRUN; goto right8x16;
5332 case ARM64vecshi_SHL8x16: tmpl = tmpl_SHL; goto left8x16;
5333 case ARM64vecshi_UQSHL8x16: tmpl = tmpl_UQSHL; goto left8x16;
5334 case ARM64vecshi_SQSHL8x16: tmpl = tmpl_SQSHL; goto left8x16;
5335 case ARM64vecshi_SQSHLU8x16: tmpl = tmpl_SQSHLU; goto left8x16;
5337 default: break;
5339 right64x2:
5340 if (sh >= 1 && sh <= 63) {
5341 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X1000000 | (64-sh), 0,0,0);
5342 goto done;
5344 break;
5345 right32x4:
5346 if (sh >= 1 && sh <= 32) {
5347 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0100000 | (32-sh), 0,0,0);
5348 goto done;
5350 break;
5351 right16x8:
5352 if (sh >= 1 && sh <= 16) {
5353 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0010000 | (16-sh), 0,0,0);
5354 goto done;
5356 break;
5357 right8x16:
5358 if (sh >= 1 && sh <= 8) {
5359 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0001000 | (8-sh), 0,0,0);
5360 goto done;
5362 break;
5364 left64x2:
5365 if (sh >= 0 && sh <= 63) {
5366 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X1000000 | sh, 0,0,0);
5367 goto done;
5369 break;
5370 left32x4:
5371 if (sh >= 0 && sh <= 31) {
5372 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0100000 | sh, 0,0,0);
5373 goto done;
5375 break;
5376 left16x8:
5377 if (sh >= 0 && sh <= 15) {
5378 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0010000 | sh, 0,0,0);
5379 goto done;
5381 break;
5382 left8x16:
5383 if (sh >= 0 && sh <= 7) {
5384 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0001000 | sh, 0,0,0);
5385 goto done;
5387 break;
5389 goto bad;
5391 case ARM64in_VExtV: {
5393 011 01110 000 m 0 imm4 0 n d EXT Vd.16b, Vn.16b, Vm.16b, #imm4
5394 where imm4 = the shift amount, in bytes,
5395 Vn is low operand, Vm is high operand
5397 UInt vD = qregEnc(i->ARM64in.VExtV.dst);
5398 UInt vN = qregEnc(i->ARM64in.VExtV.srcLo);
5399 UInt vM = qregEnc(i->ARM64in.VExtV.srcHi);
5400 UInt imm4 = i->ARM64in.VExtV.amtB;
5401 vassert(imm4 >= 1 && imm4 <= 15);
5402 *p++ = X_3_8_5_6_5_5(X011, X01110000, vM,
5403 X000000 | (imm4 << 1), vN, vD);
5404 goto done;
5406 case ARM64in_VImmQ: {
5407 UInt rQ = qregEnc(i->ARM64in.VImmQ.rQ);
5408 UShort imm = i->ARM64in.VImmQ.imm;
5409 vassert(rQ < 32);
5410 switch (imm) {
5411 case 0x0000:
5412 // movi rQ.4s, #0x0 == 0x4F 0x00 0x04 000 rQ
5413 *p++ = 0x4F000400 | rQ;
5414 goto done;
5415 case 0x0001:
5416 // movi rQ, #0xFF == 0x2F 0x00 0xE4 001 rQ
5417 *p++ = 0x2F00E420 | rQ;
5418 goto done;
5419 case 0x0003:
5420 // movi rQ, #0xFFFF == 0x2F 0x00 0xE4 011 rQ
5421 *p++ = 0x2F00E460 | rQ;
5422 goto done;
5423 case 0x000F:
5424 // movi rQ, #0xFFFFFFFF == 0x2F 0x00 0xE5 111 rQ
5425 *p++ = 0x2F00E5E0 | rQ;
5426 goto done;
5427 case 0x003F:
5428 // movi rQ, #0xFFFFFFFFFFFF == 0x2F 0x01 0xE7 111 rQ
5429 *p++ = 0x2F01E7E0 | rQ;
5430 goto done;
5431 case 0x00FF:
5432 // movi rQ, #0xFFFFFFFFFFFFFFFF == 0x2F 0x07 0xE7 111 rQ
5433 *p++ = 0x2F07E7E0 | rQ;
5434 goto done;
5435 case 0xFFFF:
5436 // mvni rQ.4s, #0x0 == 0x6F 0x00 0x04 000 rQ
5437 *p++ = 0x6F000400 | rQ;
5438 goto done;
5439 default:
5440 break;
5442 goto bad; /* no other handled cases right now */
5445 case ARM64in_VDfromX: {
5446 /* INS Vd.D[0], rX
5447 0100 1110 0000 1000 0001 11 nn dd INS Vd.D[0], Xn
5448 This isn't wonderful, in the sense that the upper half of
5449 the vector register stays unchanged and thus the insn is
5450 data dependent on its output register. */
5451 UInt dd = dregEnc(i->ARM64in.VDfromX.rD);
5452 UInt xx = iregEnc(i->ARM64in.VDfromX.rX);
5453 vassert(xx < 31);
5454 *p++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xx,dd);
5455 goto done;
5458 case ARM64in_VQfromX: {
5459 /* FMOV D, X
5460 1001 1110 0110 0111 0000 00 nn dd FMOV Vd.D[0], Xn
5461 I think this zeroes out the top half of the destination, which
5462 is what we need. TODO: can we do VDfromX and VQfromXX better? */
5463 UInt dd = qregEnc(i->ARM64in.VQfromX.rQ);
5464 UInt xx = iregEnc(i->ARM64in.VQfromX.rXlo);
5465 vassert(xx < 31);
5466 *p++ = 0x9E670000 | X_2_6_2_12_5_5(0,0,0,0,xx,dd);
5467 goto done;
5470 case ARM64in_VQfromXX: {
5471 /* What we really generate is a two insn sequence:
5472 INS Vd.D[0], Xlo; INS Vd.D[1], Xhi
5473 0100 1110 0000 1000 0001 11 nn dd INS Vd.D[0], Xn
5474 0100 1110 0001 1000 0001 11 nn dd INS Vd.D[1], Xn
5476 UInt qq = qregEnc(i->ARM64in.VQfromXX.rQ);
5477 UInt xhi = iregEnc(i->ARM64in.VQfromXX.rXhi);
5478 UInt xlo = iregEnc(i->ARM64in.VQfromXX.rXlo);
5479 vassert(xhi < 31 && xlo < 31);
5480 *p++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xlo,qq);
5481 *p++ = 0x4E181C00 | X_2_6_2_12_5_5(0,0,0,0,xhi,qq);
5482 goto done;
5485 case ARM64in_VXfromQ: {
5486 /* 010 0111 0000 01000 001111 nn dd UMOV Xd, Vn.D[0]
5487 010 0111 0000 11000 001111 nn dd UMOV Xd, Vn.D[1]
5489 UInt dd = iregEnc(i->ARM64in.VXfromQ.rX);
5490 UInt nn = qregEnc(i->ARM64in.VXfromQ.rQ);
5491 UInt laneNo = i->ARM64in.VXfromQ.laneNo;
5492 vassert(dd < 31);
5493 vassert(laneNo < 2);
5494 *p++ = X_3_8_5_6_5_5(X010, X01110000,
5495 laneNo == 1 ? X11000 : X01000, X001111, nn, dd);
5496 goto done;
5499 case ARM64in_VXfromDorS: {
5500 /* 000 11110001 00110 000000 n d FMOV Wd, Sn
5501 100 11110011 00110 000000 n d FMOV Xd, Dn
5503 UInt dd = iregEnc(i->ARM64in.VXfromDorS.rX);
5504 UInt nn = dregEnc(i->ARM64in.VXfromDorS.rDorS);
5505 Bool fromD = i->ARM64in.VXfromDorS.fromD;
5506 vassert(dd < 31);
5507 *p++ = X_3_8_5_6_5_5(fromD ? X100 : X000,
5508 fromD ? X11110011 : X11110001,
5509 X00110, X000000, nn, dd);
5510 goto done;
5513 case ARM64in_VMov: {
5514 /* 000 11110 00 10000 00 10000 n d FMOV Sd, Sn
5515 000 11110 01 10000 00 10000 n d FMOV Dd, Dn
5516 010 01110 10 1 n 0 00111 n d MOV Vd.16b, Vn.16b
5518 HReg rD = i->ARM64in.VMov.dst;
5519 HReg rN = i->ARM64in.VMov.src;
5520 switch (i->ARM64in.VMov.szB) {
5521 case 16: {
5522 UInt dd = qregEnc(rD);
5523 UInt nn = qregEnc(rN);
5524 *p++ = X_3_8_5_6_5_5(X010, X01110101, nn, X000111, nn, dd);
5525 goto done;
5527 case 8: {
5528 UInt dd = dregEnc(rD);
5529 UInt nn = dregEnc(rN);
5530 *p++ = X_3_8_5_6_5_5(X000, X11110011, X00000, X010000, nn, dd);
5531 goto done;
5533 default:
5534 break;
5536 goto bad;
5539 case ARM64in_EvCheck: {
5540 /* The sequence is fixed (canned) except for the two amodes
5541 supplied by the insn. These don't change the length, though.
5542 We generate:
5543 ldr w9, [x21 + #8] 8 == offsetof(host_EvC_COUNTER)
5544 subs w9, w9, #1
5545 str w9, [x21 + #8] 8 == offsetof(host_EvC_COUNTER)
5546 bpl nofail
5547 ldr x9, [x21 + #0] 0 == offsetof(host_EvC_FAILADDR)
5548 br x9
5549 nofail:
5551 UInt* p0 = p;
5552 p = do_load_or_store32(p, True/*isLoad*/, /*w*/9,
5553 i->ARM64in.EvCheck.amCounter);
5554 *p++ = 0x71000529; /* subs w9, w9, #1 */
5555 p = do_load_or_store32(p, False/*!isLoad*/, /*w*/9,
5556 i->ARM64in.EvCheck.amCounter);
5557 *p++ = 0x54000065; /* bpl nofail */
5558 p = do_load_or_store64(p, True/*isLoad*/, /*x*/9,
5559 i->ARM64in.EvCheck.amFailAddr);
5560 *p++ = 0xD61F0120; /* br x9 */
5561 /* nofail: */
5563 /* Crosscheck */
5564 vassert(evCheckSzB_ARM64() == (UChar*)p - (UChar*)p0);
5565 goto done;
5568 case ARM64in_ProfInc: {
5569 /* We generate:
5570 (ctrP is unknown now, so use 0x6555'7555'8555'9566 in the
5571 expectation that a later call to LibVEX_patchProfCtr
5572 will be used to fill in the immediate fields once the
5573 right value is known.)
5574 imm64-exactly4 x9, 0x6555'7555'8555'9566
5575 ldr x8, [x9]
5576 add x8, x8, #1
5577 str x8, [x9]
5579 p = imm64_to_ireg_EXACTLY4(p, /*x*/9, 0x6555755585559566ULL);
5580 *p++ = 0xF9400128;
5581 *p++ = 0x91000508;
5582 *p++ = 0xF9000128;
5583 /* Tell the caller .. */
5584 vassert(!(*is_profInc));
5585 *is_profInc = True;
5586 goto done;
5589 /* ... */
5590 default:
5591 goto bad;
5594 bad:
5595 ppARM64Instr(i);
5596 vpanic("emit_ARM64Instr");
5597 /*NOTREACHED*/
5599 done:
5600 vassert(((UChar*)p) - &buf[0] <= 40);
5601 return ((UChar*)p) - &buf[0];
5605 /* How big is an event check? See case for ARM64in_EvCheck in
5606 emit_ARM64Instr just above. That crosschecks what this returns, so
5607 we can tell if we're inconsistent. */
5608 Int evCheckSzB_ARM64 (void)
5610 return 24;
5614 /* NB: what goes on here has to be very closely coordinated with the
5615 emitInstr case for XDirect, above. */
5616 VexInvalRange chainXDirect_ARM64 ( VexEndness endness_host,
5617 void* place_to_chain,
5618 const void* disp_cp_chain_me_EXPECTED,
5619 const void* place_to_jump_to )
5621 vassert(endness_host == VexEndnessLE);
5623 /* What we're expecting to see is:
5624 movw x9, disp_cp_chain_me_to_EXPECTED[15:0]
5625 movk x9, disp_cp_chain_me_to_EXPECTED[31:15], lsl 16
5626 movk x9, disp_cp_chain_me_to_EXPECTED[47:32], lsl 32
5627 movk x9, disp_cp_chain_me_to_EXPECTED[63:48], lsl 48
5628 blr x9
5630 <16 bytes generated by imm64_to_ireg_EXACTLY4>
5631 D6 3F 01 20
5633 UInt* p = (UInt*)place_to_chain;
5634 vassert(0 == (3 & (HWord)p));
5635 vassert(is_imm64_to_ireg_EXACTLY4(
5636 p, /*x*/9, (Addr)disp_cp_chain_me_EXPECTED));
5637 vassert(p[4] == 0xD63F0120);
5639 /* And what we want to change it to is:
5640 movw x9, place_to_jump_to[15:0]
5641 movk x9, place_to_jump_to[31:15], lsl 16
5642 movk x9, place_to_jump_to[47:32], lsl 32
5643 movk x9, place_to_jump_to[63:48], lsl 48
5644 br x9
5646 <16 bytes generated by imm64_to_ireg_EXACTLY4>
5647 D6 1F 01 20
5649 The replacement has the same length as the original.
5651 (void)imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)place_to_jump_to);
5652 p[4] = 0xD61F0120;
5654 VexInvalRange vir = {(HWord)p, 20};
5655 return vir;
5659 /* NB: what goes on here has to be very closely coordinated with the
5660 emitInstr case for XDirect, above. */
5661 VexInvalRange unchainXDirect_ARM64 ( VexEndness endness_host,
5662 void* place_to_unchain,
5663 const void* place_to_jump_to_EXPECTED,
5664 const void* disp_cp_chain_me )
5666 vassert(endness_host == VexEndnessLE);
5668 /* What we're expecting to see is:
5669 movw x9, place_to_jump_to_EXPECTED[15:0]
5670 movk x9, place_to_jump_to_EXPECTED[31:15], lsl 16
5671 movk x9, place_to_jump_to_EXPECTED[47:32], lsl 32
5672 movk x9, place_to_jump_to_EXPECTED[63:48], lsl 48
5673 br x9
5675 <16 bytes generated by imm64_to_ireg_EXACTLY4>
5676 D6 1F 01 20
5678 UInt* p = (UInt*)place_to_unchain;
5679 vassert(0 == (3 & (HWord)p));
5680 vassert(is_imm64_to_ireg_EXACTLY4(
5681 p, /*x*/9, (Addr)place_to_jump_to_EXPECTED));
5682 vassert(p[4] == 0xD61F0120);
5684 /* And what we want to change it to is:
5685 movw x9, disp_cp_chain_me_to[15:0]
5686 movk x9, disp_cp_chain_me_to[31:15], lsl 16
5687 movk x9, disp_cp_chain_me_to[47:32], lsl 32
5688 movk x9, disp_cp_chain_me_to[63:48], lsl 48
5689 blr x9
5691 <16 bytes generated by imm64_to_ireg_EXACTLY4>
5692 D6 3F 01 20
5694 (void)imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)disp_cp_chain_me);
5695 p[4] = 0xD63F0120;
5697 VexInvalRange vir = {(HWord)p, 20};
5698 return vir;
5702 /* Patch the counter address into a profile inc point, as previously
5703 created by the ARM64in_ProfInc case for emit_ARM64Instr. */
5704 VexInvalRange patchProfInc_ARM64 ( VexEndness endness_host,
5705 void* place_to_patch,
5706 const ULong* location_of_counter )
5708 vassert(sizeof(ULong*) == 8);
5709 vassert(endness_host == VexEndnessLE);
5710 UInt* p = (UInt*)place_to_patch;
5711 vassert(0 == (3 & (HWord)p));
5712 vassert(is_imm64_to_ireg_EXACTLY4(p, /*x*/9, 0x6555755585559566ULL));
5713 vassert(p[4] == 0xF9400128);
5714 vassert(p[5] == 0x91000508);
5715 vassert(p[6] == 0xF9000128);
5716 imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)location_of_counter);
5717 VexInvalRange vir = {(HWord)p, 4*4};
5718 return vir;
5721 /*---------------------------------------------------------------*/
5722 /*--- end host_arm64_defs.c ---*/
5723 /*---------------------------------------------------------------*/