Bug 418702 - ARMv8.1 Paired register compare-and-swap instructions are not supported.
[valgrind.git] / VEX / priv / guest_arm64_toIR.c
blobfe80e593c4ec84676efe465b435f77061b6fda7f
1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*--------------------------------------------------------------------*/
4 /*--- begin guest_arm64_toIR.c ---*/
5 /*--------------------------------------------------------------------*/
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
11 Copyright (C) 2013-2017 OpenWorks
12 info@open-works.net
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
27 The GNU General Public License is contained in the file COPYING.
30 /* KNOWN LIMITATIONS 2014-Nov-16
32 * Correctness: FMAXNM, FMINNM are implemented the same as FMAX/FMIN.
34 Also FP comparison "unordered" .. is implemented as normal FP
35 comparison.
37 Both should be fixed. They behave incorrectly in the presence of
38 NaNs.
40 FMULX is treated the same as FMUL. That's also not correct.
42 * Floating multiply-add (etc) insns. Are split into a multiply and
43 an add, and so suffer double rounding and hence sometimes the
44 least significant mantissa bit is incorrect. Fix: use the IR
45 multiply-add IROps instead.
47 * FRINTA, FRINTN are kludged .. they just round to nearest. No special
48 handling for the "ties" case. FRINTX might be dubious too.
50 * Ditto FCVTXN. No idea what "round to odd" means. This implementation
51 just rounds to nearest.
54 /* "Special" instructions.
56 This instruction decoder can decode four special instructions
57 which mean nothing natively (are no-ops as far as regs/mem are
58 concerned) but have meaning for supporting Valgrind. A special
59 instruction is flagged by a 16-byte preamble:
61 93CC0D8C 93CC358C 93CCCD8C 93CCF58C
62 (ror x12, x12, #3; ror x12, x12, #13
63 ror x12, x12, #51; ror x12, x12, #61)
65 Following that, one of the following 3 are allowed
66 (standard interpretation in parentheses):
68 AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 )
69 AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR
70 AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8
71 AA090129 (orr x9,x9,x9) IR injection
73 Any other bytes following the 16-byte preamble are illegal and
74 constitute a failure in instruction decoding. This all assumes
75 that the preamble will never occur except in specific code
76 fragments designed for Valgrind to catch.
79 /* Translates ARM64 code to IR. */
81 #include "libvex_basictypes.h"
82 #include "libvex_ir.h"
83 #include "libvex.h"
84 #include "libvex_guest_arm64.h"
86 #include "main_util.h"
87 #include "main_globals.h"
88 #include "guest_generic_bb_to_IR.h"
89 #include "guest_arm64_defs.h"
92 /*------------------------------------------------------------*/
93 /*--- Globals ---*/
94 /*------------------------------------------------------------*/
96 /* These are set at the start of the translation of a instruction, so
97 that we don't have to pass them around endlessly. CONST means does
98 not change during translation of the instruction.
101 /* CONST: what is the host's endianness? We need to know this in
102 order to do sub-register accesses to the SIMD/FP registers
103 correctly. */
104 static VexEndness host_endness;
106 /* CONST: The guest address for the instruction currently being
107 translated. */
108 static Addr64 guest_PC_curr_instr;
110 /* MOD: The IRSB* into which we're generating code. */
111 static IRSB* irsb;
114 /*------------------------------------------------------------*/
115 /*--- Debugging output ---*/
116 /*------------------------------------------------------------*/
118 #define DIP(format, args...) \
119 if (vex_traceflags & VEX_TRACE_FE) \
120 vex_printf(format, ## args)
122 #define DIS(buf, format, args...) \
123 if (vex_traceflags & VEX_TRACE_FE) \
124 vex_sprintf(buf, format, ## args)
127 /*------------------------------------------------------------*/
128 /*--- Helper bits and pieces for deconstructing the ---*/
129 /*--- arm insn stream. ---*/
130 /*------------------------------------------------------------*/
132 /* Do a little-endian load of a 32-bit word, regardless of the
133 endianness of the underlying host. */
134 static inline UInt getUIntLittleEndianly ( const UChar* p )
136 UInt w = 0;
137 w = (w << 8) | p[3];
138 w = (w << 8) | p[2];
139 w = (w << 8) | p[1];
140 w = (w << 8) | p[0];
141 return w;
144 /* Sign extend a N-bit value up to 64 bits, by copying
145 bit N-1 into all higher positions. */
146 static ULong sx_to_64 ( ULong x, UInt n )
148 vassert(n > 1 && n < 64);
149 x <<= (64-n);
150 Long r = (Long)x;
151 r >>= (64-n);
152 return (ULong)r;
155 //ZZ /* Do a little-endian load of a 16-bit word, regardless of the
156 //ZZ endianness of the underlying host. */
157 //ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
158 //ZZ {
159 //ZZ UShort w = 0;
160 //ZZ w = (w << 8) | p[1];
161 //ZZ w = (w << 8) | p[0];
162 //ZZ return w;
163 //ZZ }
164 //ZZ
165 //ZZ static UInt ROR32 ( UInt x, UInt sh ) {
166 //ZZ vassert(sh >= 0 && sh < 32);
167 //ZZ if (sh == 0)
168 //ZZ return x;
169 //ZZ else
170 //ZZ return (x << (32-sh)) | (x >> sh);
171 //ZZ }
172 //ZZ
173 //ZZ static Int popcount32 ( UInt x )
174 //ZZ {
175 //ZZ Int res = 0, i;
176 //ZZ for (i = 0; i < 32; i++) {
177 //ZZ res += (x & 1);
178 //ZZ x >>= 1;
179 //ZZ }
180 //ZZ return res;
181 //ZZ }
182 //ZZ
183 //ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
184 //ZZ {
185 //ZZ UInt mask = 1 << ix;
186 //ZZ x &= ~mask;
187 //ZZ x |= ((b << ix) & mask);
188 //ZZ return x;
189 //ZZ }
191 #define BITS2(_b1,_b0) \
192 (((_b1) << 1) | (_b0))
194 #define BITS3(_b2,_b1,_b0) \
195 (((_b2) << 2) | ((_b1) << 1) | (_b0))
197 #define BITS4(_b3,_b2,_b1,_b0) \
198 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
200 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
201 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
202 | BITS4((_b3),(_b2),(_b1),(_b0)))
204 #define BITS5(_b4,_b3,_b2,_b1,_b0) \
205 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
206 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \
207 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
208 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
209 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
211 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
212 (((_b8) << 8) \
213 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
215 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
216 (((_b9) << 9) | ((_b8) << 8) \
217 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
219 #define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
220 (((_b10) << 10) \
221 | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
223 #define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
224 (((_b11) << 11) \
225 | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
227 #define X00 BITS2(0,0)
228 #define X01 BITS2(0,1)
229 #define X10 BITS2(1,0)
230 #define X11 BITS2(1,1)
232 // produces _uint[_bMax:_bMin]
233 #define SLICE_UInt(_uint,_bMax,_bMin) \
234 (( ((UInt)(_uint)) >> (_bMin)) \
235 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
238 /*------------------------------------------------------------*/
239 /*--- Helper bits and pieces for creating IR fragments. ---*/
240 /*------------------------------------------------------------*/
242 static IRExpr* mkV128 ( UShort w )
244 return IRExpr_Const(IRConst_V128(w));
247 static IRExpr* mkU64 ( ULong i )
249 return IRExpr_Const(IRConst_U64(i));
252 static IRExpr* mkU32 ( UInt i )
254 return IRExpr_Const(IRConst_U32(i));
257 static IRExpr* mkU16 ( UInt i )
259 vassert(i < 65536);
260 return IRExpr_Const(IRConst_U16(i));
263 static IRExpr* mkU8 ( UInt i )
265 vassert(i < 256);
266 return IRExpr_Const(IRConst_U8( (UChar)i ));
269 static IRExpr* mkexpr ( IRTemp tmp )
271 return IRExpr_RdTmp(tmp);
274 static IRExpr* unop ( IROp op, IRExpr* a )
276 return IRExpr_Unop(op, a);
279 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
281 return IRExpr_Binop(op, a1, a2);
284 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
286 return IRExpr_Triop(op, a1, a2, a3);
289 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
291 return IRExpr_Load(Iend_LE, ty, addr);
294 /* Add a statement to the list held by "irbb". */
295 static void stmt ( IRStmt* st )
297 addStmtToIRSB( irsb, st );
300 static void assign ( IRTemp dst, IRExpr* e )
302 stmt( IRStmt_WrTmp(dst, e) );
305 static void storeLE ( IRExpr* addr, IRExpr* data )
307 stmt( IRStmt_Store(Iend_LE, addr, data) );
310 //ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
311 //ZZ {
312 //ZZ if (guardT == IRTemp_INVALID) {
313 //ZZ /* unconditional */
314 //ZZ storeLE(addr, data);
315 //ZZ } else {
316 //ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data,
317 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
318 //ZZ }
319 //ZZ }
320 //ZZ
321 //ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
322 //ZZ IRExpr* addr, IRExpr* alt,
323 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
324 //ZZ {
325 //ZZ if (guardT == IRTemp_INVALID) {
326 //ZZ /* unconditional */
327 //ZZ IRExpr* loaded = NULL;
328 //ZZ switch (cvt) {
329 //ZZ case ILGop_Ident32:
330 //ZZ loaded = loadLE(Ity_I32, addr); break;
331 //ZZ case ILGop_8Uto32:
332 //ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
333 //ZZ case ILGop_8Sto32:
334 //ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
335 //ZZ case ILGop_16Uto32:
336 //ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
337 //ZZ case ILGop_16Sto32:
338 //ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
339 //ZZ default:
340 //ZZ vassert(0);
341 //ZZ }
342 //ZZ vassert(loaded != NULL);
343 //ZZ assign(dst, loaded);
344 //ZZ } else {
345 //ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the
346 //ZZ loaded data before putting the data in 'dst'. If the load
347 //ZZ does not take place, 'alt' is placed directly in 'dst'. */
348 //ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
349 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
350 //ZZ }
351 //ZZ }
353 /* Generate a new temporary of the given type. */
354 static IRTemp newTemp ( IRType ty )
356 vassert(isPlausibleIRType(ty));
357 return newIRTemp( irsb->tyenv, ty );
360 /* This is used in many places, so the brevity is an advantage. */
361 static IRTemp newTempV128(void)
363 return newTemp(Ity_V128);
366 /* Initialise V128 temporaries en masse. */
367 static
368 void newTempsV128_2(IRTemp* t1, IRTemp* t2)
370 vassert(t1 && *t1 == IRTemp_INVALID);
371 vassert(t2 && *t2 == IRTemp_INVALID);
372 *t1 = newTempV128();
373 *t2 = newTempV128();
376 static
377 void newTempsV128_3(IRTemp* t1, IRTemp* t2, IRTemp* t3)
379 vassert(t1 && *t1 == IRTemp_INVALID);
380 vassert(t2 && *t2 == IRTemp_INVALID);
381 vassert(t3 && *t3 == IRTemp_INVALID);
382 *t1 = newTempV128();
383 *t2 = newTempV128();
384 *t3 = newTempV128();
387 static
388 void newTempsV128_4(IRTemp* t1, IRTemp* t2, IRTemp* t3, IRTemp* t4)
390 vassert(t1 && *t1 == IRTemp_INVALID);
391 vassert(t2 && *t2 == IRTemp_INVALID);
392 vassert(t3 && *t3 == IRTemp_INVALID);
393 vassert(t4 && *t4 == IRTemp_INVALID);
394 *t1 = newTempV128();
395 *t2 = newTempV128();
396 *t3 = newTempV128();
397 *t4 = newTempV128();
400 static
401 void newTempsV128_7(IRTemp* t1, IRTemp* t2, IRTemp* t3,
402 IRTemp* t4, IRTemp* t5, IRTemp* t6, IRTemp* t7)
404 vassert(t1 && *t1 == IRTemp_INVALID);
405 vassert(t2 && *t2 == IRTemp_INVALID);
406 vassert(t3 && *t3 == IRTemp_INVALID);
407 vassert(t4 && *t4 == IRTemp_INVALID);
408 vassert(t5 && *t5 == IRTemp_INVALID);
409 vassert(t6 && *t6 == IRTemp_INVALID);
410 vassert(t7 && *t7 == IRTemp_INVALID);
411 *t1 = newTempV128();
412 *t2 = newTempV128();
413 *t3 = newTempV128();
414 *t4 = newTempV128();
415 *t5 = newTempV128();
416 *t6 = newTempV128();
417 *t7 = newTempV128();
420 //ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
421 //ZZ IRRoundingMode. */
422 //ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
423 //ZZ {
424 //ZZ return mkU32(Irrm_NEAREST);
425 //ZZ }
426 //ZZ
427 //ZZ /* Generate an expression for SRC rotated right by ROT. */
428 //ZZ static IRExpr* genROR32( IRTemp src, Int rot )
429 //ZZ {
430 //ZZ vassert(rot >= 0 && rot < 32);
431 //ZZ if (rot == 0)
432 //ZZ return mkexpr(src);
433 //ZZ return
434 //ZZ binop(Iop_Or32,
435 //ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
436 //ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
437 //ZZ }
438 //ZZ
439 //ZZ static IRExpr* mkU128 ( ULong i )
440 //ZZ {
441 //ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
442 //ZZ }
443 //ZZ
444 //ZZ /* Generate a 4-aligned version of the given expression if
445 //ZZ the given condition is true. Else return it unchanged. */
446 //ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
447 //ZZ {
448 //ZZ if (b)
449 //ZZ return binop(Iop_And32, e, mkU32(~3));
450 //ZZ else
451 //ZZ return e;
452 //ZZ }
454 /* Other IR construction helpers. */
455 static IROp mkAND ( IRType ty ) {
456 switch (ty) {
457 case Ity_I32: return Iop_And32;
458 case Ity_I64: return Iop_And64;
459 default: vpanic("mkAND");
463 static IROp mkOR ( IRType ty ) {
464 switch (ty) {
465 case Ity_I32: return Iop_Or32;
466 case Ity_I64: return Iop_Or64;
467 default: vpanic("mkOR");
471 static IROp mkXOR ( IRType ty ) {
472 switch (ty) {
473 case Ity_I32: return Iop_Xor32;
474 case Ity_I64: return Iop_Xor64;
475 default: vpanic("mkXOR");
479 static IROp mkSHL ( IRType ty ) {
480 switch (ty) {
481 case Ity_I32: return Iop_Shl32;
482 case Ity_I64: return Iop_Shl64;
483 default: vpanic("mkSHL");
487 static IROp mkSHR ( IRType ty ) {
488 switch (ty) {
489 case Ity_I32: return Iop_Shr32;
490 case Ity_I64: return Iop_Shr64;
491 default: vpanic("mkSHR");
495 static IROp mkSAR ( IRType ty ) {
496 switch (ty) {
497 case Ity_I32: return Iop_Sar32;
498 case Ity_I64: return Iop_Sar64;
499 default: vpanic("mkSAR");
503 static IROp mkNOT ( IRType ty ) {
504 switch (ty) {
505 case Ity_I32: return Iop_Not32;
506 case Ity_I64: return Iop_Not64;
507 default: vpanic("mkNOT");
511 static IROp mkADD ( IRType ty ) {
512 switch (ty) {
513 case Ity_I32: return Iop_Add32;
514 case Ity_I64: return Iop_Add64;
515 default: vpanic("mkADD");
519 static IROp mkSUB ( IRType ty ) {
520 switch (ty) {
521 case Ity_I32: return Iop_Sub32;
522 case Ity_I64: return Iop_Sub64;
523 default: vpanic("mkSUB");
527 static IROp mkADDF ( IRType ty ) {
528 switch (ty) {
529 case Ity_F32: return Iop_AddF32;
530 case Ity_F64: return Iop_AddF64;
531 default: vpanic("mkADDF");
535 static IROp mkSUBF ( IRType ty ) {
536 switch (ty) {
537 case Ity_F32: return Iop_SubF32;
538 case Ity_F64: return Iop_SubF64;
539 default: vpanic("mkSUBF");
543 static IROp mkMULF ( IRType ty ) {
544 switch (ty) {
545 case Ity_F32: return Iop_MulF32;
546 case Ity_F64: return Iop_MulF64;
547 default: vpanic("mkMULF");
551 static IROp mkDIVF ( IRType ty ) {
552 switch (ty) {
553 case Ity_F32: return Iop_DivF32;
554 case Ity_F64: return Iop_DivF64;
555 default: vpanic("mkMULF");
559 static IROp mkNEGF ( IRType ty ) {
560 switch (ty) {
561 case Ity_F32: return Iop_NegF32;
562 case Ity_F64: return Iop_NegF64;
563 default: vpanic("mkNEGF");
567 static IROp mkABSF ( IRType ty ) {
568 switch (ty) {
569 case Ity_F32: return Iop_AbsF32;
570 case Ity_F64: return Iop_AbsF64;
571 default: vpanic("mkNEGF");
575 static IROp mkSQRTF ( IRType ty ) {
576 switch (ty) {
577 case Ity_F32: return Iop_SqrtF32;
578 case Ity_F64: return Iop_SqrtF64;
579 default: vpanic("mkNEGF");
583 static IROp mkVecADD ( UInt size ) {
584 const IROp ops[4]
585 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
586 vassert(size < 4);
587 return ops[size];
590 static IROp mkVecQADDU ( UInt size ) {
591 const IROp ops[4]
592 = { Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2 };
593 vassert(size < 4);
594 return ops[size];
597 static IROp mkVecQADDS ( UInt size ) {
598 const IROp ops[4]
599 = { Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2 };
600 vassert(size < 4);
601 return ops[size];
604 static IROp mkVecQADDEXTSUSATUU ( UInt size ) {
605 const IROp ops[4]
606 = { Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8,
607 Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2 };
608 vassert(size < 4);
609 return ops[size];
612 static IROp mkVecQADDEXTUSSATSS ( UInt size ) {
613 const IROp ops[4]
614 = { Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8,
615 Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2 };
616 vassert(size < 4);
617 return ops[size];
620 static IROp mkVecSUB ( UInt size ) {
621 const IROp ops[4]
622 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
623 vassert(size < 4);
624 return ops[size];
627 static IROp mkVecQSUBU ( UInt size ) {
628 const IROp ops[4]
629 = { Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2 };
630 vassert(size < 4);
631 return ops[size];
634 static IROp mkVecQSUBS ( UInt size ) {
635 const IROp ops[4]
636 = { Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2 };
637 vassert(size < 4);
638 return ops[size];
641 static IROp mkVecSARN ( UInt size ) {
642 const IROp ops[4]
643 = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
644 vassert(size < 4);
645 return ops[size];
648 static IROp mkVecSHRN ( UInt size ) {
649 const IROp ops[4]
650 = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
651 vassert(size < 4);
652 return ops[size];
655 static IROp mkVecSHLN ( UInt size ) {
656 const IROp ops[4]
657 = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
658 vassert(size < 4);
659 return ops[size];
662 static IROp mkVecCATEVENLANES ( UInt size ) {
663 const IROp ops[4]
664 = { Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8,
665 Iop_CatEvenLanes32x4, Iop_InterleaveLO64x2 };
666 vassert(size < 4);
667 return ops[size];
670 static IROp mkVecCATODDLANES ( UInt size ) {
671 const IROp ops[4]
672 = { Iop_CatOddLanes8x16, Iop_CatOddLanes16x8,
673 Iop_CatOddLanes32x4, Iop_InterleaveHI64x2 };
674 vassert(size < 4);
675 return ops[size];
678 static IROp mkVecINTERLEAVELO ( UInt size ) {
679 const IROp ops[4]
680 = { Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
681 Iop_InterleaveLO32x4, Iop_InterleaveLO64x2 };
682 vassert(size < 4);
683 return ops[size];
686 static IROp mkVecINTERLEAVEHI ( UInt size ) {
687 const IROp ops[4]
688 = { Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
689 Iop_InterleaveHI32x4, Iop_InterleaveHI64x2 };
690 vassert(size < 4);
691 return ops[size];
694 static IROp mkVecMAXU ( UInt size ) {
695 const IROp ops[4]
696 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
697 vassert(size < 4);
698 return ops[size];
701 static IROp mkVecMAXS ( UInt size ) {
702 const IROp ops[4]
703 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
704 vassert(size < 4);
705 return ops[size];
708 static IROp mkVecMINU ( UInt size ) {
709 const IROp ops[4]
710 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
711 vassert(size < 4);
712 return ops[size];
715 static IROp mkVecMINS ( UInt size ) {
716 const IROp ops[4]
717 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
718 vassert(size < 4);
719 return ops[size];
722 static IROp mkVecMUL ( UInt size ) {
723 const IROp ops[4]
724 = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
725 vassert(size < 3);
726 return ops[size];
729 static IROp mkVecMULLU ( UInt sizeNarrow ) {
730 const IROp ops[4]
731 = { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2, Iop_INVALID };
732 vassert(sizeNarrow < 3);
733 return ops[sizeNarrow];
736 static IROp mkVecMULLS ( UInt sizeNarrow ) {
737 const IROp ops[4]
738 = { Iop_Mull8Sx8, Iop_Mull16Sx4, Iop_Mull32Sx2, Iop_INVALID };
739 vassert(sizeNarrow < 3);
740 return ops[sizeNarrow];
743 static IROp mkVecQDMULLS ( UInt sizeNarrow ) {
744 const IROp ops[4]
745 = { Iop_INVALID, Iop_QDMull16Sx4, Iop_QDMull32Sx2, Iop_INVALID };
746 vassert(sizeNarrow < 3);
747 return ops[sizeNarrow];
750 static IROp mkVecCMPEQ ( UInt size ) {
751 const IROp ops[4]
752 = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 };
753 vassert(size < 4);
754 return ops[size];
757 static IROp mkVecCMPGTU ( UInt size ) {
758 const IROp ops[4]
759 = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
760 vassert(size < 4);
761 return ops[size];
764 static IROp mkVecCMPGTS ( UInt size ) {
765 const IROp ops[4]
766 = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
767 vassert(size < 4);
768 return ops[size];
771 static IROp mkVecABS ( UInt size ) {
772 const IROp ops[4]
773 = { Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2 };
774 vassert(size < 4);
775 return ops[size];
778 static IROp mkVecZEROHIxxOFV128 ( UInt size ) {
779 const IROp ops[4]
780 = { Iop_ZeroHI120ofV128, Iop_ZeroHI112ofV128,
781 Iop_ZeroHI96ofV128, Iop_ZeroHI64ofV128 };
782 vassert(size < 4);
783 return ops[size];
786 static IRExpr* mkU ( IRType ty, ULong imm ) {
787 switch (ty) {
788 case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
789 case Ity_I64: return mkU64(imm);
790 default: vpanic("mkU");
794 static IROp mkVecQDMULHIS ( UInt size ) {
795 const IROp ops[4]
796 = { Iop_INVALID, Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, Iop_INVALID };
797 vassert(size < 4);
798 return ops[size];
801 static IROp mkVecQRDMULHIS ( UInt size ) {
802 const IROp ops[4]
803 = { Iop_INVALID, Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, Iop_INVALID };
804 vassert(size < 4);
805 return ops[size];
808 static IROp mkVecQANDUQSH ( UInt size ) {
809 const IROp ops[4]
810 = { Iop_QandUQsh8x16, Iop_QandUQsh16x8,
811 Iop_QandUQsh32x4, Iop_QandUQsh64x2 };
812 vassert(size < 4);
813 return ops[size];
816 static IROp mkVecQANDSQSH ( UInt size ) {
817 const IROp ops[4]
818 = { Iop_QandSQsh8x16, Iop_QandSQsh16x8,
819 Iop_QandSQsh32x4, Iop_QandSQsh64x2 };
820 vassert(size < 4);
821 return ops[size];
824 static IROp mkVecQANDUQRSH ( UInt size ) {
825 const IROp ops[4]
826 = { Iop_QandUQRsh8x16, Iop_QandUQRsh16x8,
827 Iop_QandUQRsh32x4, Iop_QandUQRsh64x2 };
828 vassert(size < 4);
829 return ops[size];
832 static IROp mkVecQANDSQRSH ( UInt size ) {
833 const IROp ops[4]
834 = { Iop_QandSQRsh8x16, Iop_QandSQRsh16x8,
835 Iop_QandSQRsh32x4, Iop_QandSQRsh64x2 };
836 vassert(size < 4);
837 return ops[size];
840 static IROp mkVecSHU ( UInt size ) {
841 const IROp ops[4]
842 = { Iop_Sh8Ux16, Iop_Sh16Ux8, Iop_Sh32Ux4, Iop_Sh64Ux2 };
843 vassert(size < 4);
844 return ops[size];
847 static IROp mkVecSHS ( UInt size ) {
848 const IROp ops[4]
849 = { Iop_Sh8Sx16, Iop_Sh16Sx8, Iop_Sh32Sx4, Iop_Sh64Sx2 };
850 vassert(size < 4);
851 return ops[size];
854 static IROp mkVecRSHU ( UInt size ) {
855 const IROp ops[4]
856 = { Iop_Rsh8Ux16, Iop_Rsh16Ux8, Iop_Rsh32Ux4, Iop_Rsh64Ux2 };
857 vassert(size < 4);
858 return ops[size];
861 static IROp mkVecRSHS ( UInt size ) {
862 const IROp ops[4]
863 = { Iop_Rsh8Sx16, Iop_Rsh16Sx8, Iop_Rsh32Sx4, Iop_Rsh64Sx2 };
864 vassert(size < 4);
865 return ops[size];
868 static IROp mkVecNARROWUN ( UInt sizeNarrow ) {
869 const IROp ops[4]
870 = { Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4,
871 Iop_NarrowUn64to32x2, Iop_INVALID };
872 vassert(sizeNarrow < 4);
873 return ops[sizeNarrow];
876 static IROp mkVecQNARROWUNSU ( UInt sizeNarrow ) {
877 const IROp ops[4]
878 = { Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4,
879 Iop_QNarrowUn64Sto32Ux2, Iop_INVALID };
880 vassert(sizeNarrow < 4);
881 return ops[sizeNarrow];
884 static IROp mkVecQNARROWUNSS ( UInt sizeNarrow ) {
885 const IROp ops[4]
886 = { Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4,
887 Iop_QNarrowUn64Sto32Sx2, Iop_INVALID };
888 vassert(sizeNarrow < 4);
889 return ops[sizeNarrow];
892 static IROp mkVecQNARROWUNUU ( UInt sizeNarrow ) {
893 const IROp ops[4]
894 = { Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4,
895 Iop_QNarrowUn64Uto32Ux2, Iop_INVALID };
896 vassert(sizeNarrow < 4);
897 return ops[sizeNarrow];
900 static IROp mkVecQANDqshrNNARROWUU ( UInt sizeNarrow ) {
901 const IROp ops[4]
902 = { Iop_QandQShrNnarrow16Uto8Ux8, Iop_QandQShrNnarrow32Uto16Ux4,
903 Iop_QandQShrNnarrow64Uto32Ux2, Iop_INVALID };
904 vassert(sizeNarrow < 4);
905 return ops[sizeNarrow];
908 static IROp mkVecQANDqsarNNARROWSS ( UInt sizeNarrow ) {
909 const IROp ops[4]
910 = { Iop_QandQSarNnarrow16Sto8Sx8, Iop_QandQSarNnarrow32Sto16Sx4,
911 Iop_QandQSarNnarrow64Sto32Sx2, Iop_INVALID };
912 vassert(sizeNarrow < 4);
913 return ops[sizeNarrow];
916 static IROp mkVecQANDqsarNNARROWSU ( UInt sizeNarrow ) {
917 const IROp ops[4]
918 = { Iop_QandQSarNnarrow16Sto8Ux8, Iop_QandQSarNnarrow32Sto16Ux4,
919 Iop_QandQSarNnarrow64Sto32Ux2, Iop_INVALID };
920 vassert(sizeNarrow < 4);
921 return ops[sizeNarrow];
924 static IROp mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow ) {
925 const IROp ops[4]
926 = { Iop_QandQRShrNnarrow16Uto8Ux8, Iop_QandQRShrNnarrow32Uto16Ux4,
927 Iop_QandQRShrNnarrow64Uto32Ux2, Iop_INVALID };
928 vassert(sizeNarrow < 4);
929 return ops[sizeNarrow];
932 static IROp mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow ) {
933 const IROp ops[4]
934 = { Iop_QandQRSarNnarrow16Sto8Sx8, Iop_QandQRSarNnarrow32Sto16Sx4,
935 Iop_QandQRSarNnarrow64Sto32Sx2, Iop_INVALID };
936 vassert(sizeNarrow < 4);
937 return ops[sizeNarrow];
940 static IROp mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow ) {
941 const IROp ops[4]
942 = { Iop_QandQRSarNnarrow16Sto8Ux8, Iop_QandQRSarNnarrow32Sto16Ux4,
943 Iop_QandQRSarNnarrow64Sto32Ux2, Iop_INVALID };
944 vassert(sizeNarrow < 4);
945 return ops[sizeNarrow];
948 static IROp mkVecQSHLNSATUU ( UInt size ) {
949 const IROp ops[4]
950 = { Iop_QShlNsatUU8x16, Iop_QShlNsatUU16x8,
951 Iop_QShlNsatUU32x4, Iop_QShlNsatUU64x2 };
952 vassert(size < 4);
953 return ops[size];
956 static IROp mkVecQSHLNSATSS ( UInt size ) {
957 const IROp ops[4]
958 = { Iop_QShlNsatSS8x16, Iop_QShlNsatSS16x8,
959 Iop_QShlNsatSS32x4, Iop_QShlNsatSS64x2 };
960 vassert(size < 4);
961 return ops[size];
964 static IROp mkVecQSHLNSATSU ( UInt size ) {
965 const IROp ops[4]
966 = { Iop_QShlNsatSU8x16, Iop_QShlNsatSU16x8,
967 Iop_QShlNsatSU32x4, Iop_QShlNsatSU64x2 };
968 vassert(size < 4);
969 return ops[size];
972 static IROp mkVecADDF ( UInt size ) {
973 const IROp ops[4]
974 = { Iop_INVALID, Iop_INVALID, Iop_Add32Fx4, Iop_Add64Fx2 };
975 vassert(size < 4);
976 return ops[size];
979 static IROp mkVecMAXF ( UInt size ) {
980 const IROp ops[4]
981 = { Iop_INVALID, Iop_INVALID, Iop_Max32Fx4, Iop_Max64Fx2 };
982 vassert(size < 4);
983 return ops[size];
986 static IROp mkVecMINF ( UInt size ) {
987 const IROp ops[4]
988 = { Iop_INVALID, Iop_INVALID, Iop_Min32Fx4, Iop_Min64Fx2 };
989 vassert(size < 4);
990 return ops[size];
993 /* Generate IR to create 'arg rotated right by imm', for sane values
994 of 'ty' and 'imm'. */
995 static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
997 UInt w = 0;
998 if (ty == Ity_I64) {
999 w = 64;
1000 } else {
1001 vassert(ty == Ity_I32);
1002 w = 32;
1004 vassert(w != 0);
1005 vassert(imm < w);
1006 if (imm == 0) {
1007 return arg;
1009 IRTemp res = newTemp(ty);
1010 assign(res, binop(mkOR(ty),
1011 binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
1012 binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
1013 return res;
1016 /* Generate IR to set the returned temp to either all-zeroes or
1017 all ones, as a copy of arg<imm>. */
1018 static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
1020 UInt w = 0;
1021 if (ty == Ity_I64) {
1022 w = 64;
1023 } else {
1024 vassert(ty == Ity_I32);
1025 w = 32;
1027 vassert(w != 0);
1028 vassert(imm < w);
1029 IRTemp res = newTemp(ty);
1030 assign(res, binop(mkSAR(ty),
1031 binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
1032 mkU8(w - 1)));
1033 return res;
1036 /* S-widen 8/16/32/64 bit int expr to 64. */
1037 static IRExpr* widenSto64 ( IRType srcTy, IRExpr* e )
1039 switch (srcTy) {
1040 case Ity_I64: return e;
1041 case Ity_I32: return unop(Iop_32Sto64, e);
1042 case Ity_I16: return unop(Iop_16Sto64, e);
1043 case Ity_I8: return unop(Iop_8Sto64, e);
1044 default: vpanic("widenSto64(arm64)");
1048 /* U-widen 8/16/32/64 bit int expr to 64. */
1049 static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
1051 switch (srcTy) {
1052 case Ity_I64: return e;
1053 case Ity_I32: return unop(Iop_32Uto64, e);
1054 case Ity_I16: return unop(Iop_16Uto64, e);
1055 case Ity_I8: return unop(Iop_8Uto64, e);
1056 default: vpanic("widenUto64(arm64)");
1060 /* Narrow 64 bit int expr to 8/16/32/64. Clearly only some
1061 of these combinations make sense. */
1062 static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
1064 switch (dstTy) {
1065 case Ity_I64: return e;
1066 case Ity_I32: return unop(Iop_64to32, e);
1067 case Ity_I16: return unop(Iop_64to16, e);
1068 case Ity_I8: return unop(Iop_64to8, e);
1069 default: vpanic("narrowFrom64(arm64)");
1074 /*------------------------------------------------------------*/
1075 /*--- Helpers for accessing guest registers. ---*/
1076 /*------------------------------------------------------------*/
1078 #define OFFB_X0 offsetof(VexGuestARM64State,guest_X0)
1079 #define OFFB_X1 offsetof(VexGuestARM64State,guest_X1)
1080 #define OFFB_X2 offsetof(VexGuestARM64State,guest_X2)
1081 #define OFFB_X3 offsetof(VexGuestARM64State,guest_X3)
1082 #define OFFB_X4 offsetof(VexGuestARM64State,guest_X4)
1083 #define OFFB_X5 offsetof(VexGuestARM64State,guest_X5)
1084 #define OFFB_X6 offsetof(VexGuestARM64State,guest_X6)
1085 #define OFFB_X7 offsetof(VexGuestARM64State,guest_X7)
1086 #define OFFB_X8 offsetof(VexGuestARM64State,guest_X8)
1087 #define OFFB_X9 offsetof(VexGuestARM64State,guest_X9)
1088 #define OFFB_X10 offsetof(VexGuestARM64State,guest_X10)
1089 #define OFFB_X11 offsetof(VexGuestARM64State,guest_X11)
1090 #define OFFB_X12 offsetof(VexGuestARM64State,guest_X12)
1091 #define OFFB_X13 offsetof(VexGuestARM64State,guest_X13)
1092 #define OFFB_X14 offsetof(VexGuestARM64State,guest_X14)
1093 #define OFFB_X15 offsetof(VexGuestARM64State,guest_X15)
1094 #define OFFB_X16 offsetof(VexGuestARM64State,guest_X16)
1095 #define OFFB_X17 offsetof(VexGuestARM64State,guest_X17)
1096 #define OFFB_X18 offsetof(VexGuestARM64State,guest_X18)
1097 #define OFFB_X19 offsetof(VexGuestARM64State,guest_X19)
1098 #define OFFB_X20 offsetof(VexGuestARM64State,guest_X20)
1099 #define OFFB_X21 offsetof(VexGuestARM64State,guest_X21)
1100 #define OFFB_X22 offsetof(VexGuestARM64State,guest_X22)
1101 #define OFFB_X23 offsetof(VexGuestARM64State,guest_X23)
1102 #define OFFB_X24 offsetof(VexGuestARM64State,guest_X24)
1103 #define OFFB_X25 offsetof(VexGuestARM64State,guest_X25)
1104 #define OFFB_X26 offsetof(VexGuestARM64State,guest_X26)
1105 #define OFFB_X27 offsetof(VexGuestARM64State,guest_X27)
1106 #define OFFB_X28 offsetof(VexGuestARM64State,guest_X28)
1107 #define OFFB_X29 offsetof(VexGuestARM64State,guest_X29)
1108 #define OFFB_X30 offsetof(VexGuestARM64State,guest_X30)
1110 #define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP)
1111 #define OFFB_PC offsetof(VexGuestARM64State,guest_PC)
1113 #define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP)
1114 #define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1)
1115 #define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2)
1116 #define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP)
1118 #define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
1119 #define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR)
1121 #define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0)
1122 #define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1)
1123 #define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2)
1124 #define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3)
1125 #define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4)
1126 #define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5)
1127 #define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6)
1128 #define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7)
1129 #define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8)
1130 #define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9)
1131 #define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10)
1132 #define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11)
1133 #define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12)
1134 #define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13)
1135 #define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14)
1136 #define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15)
1137 #define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16)
1138 #define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17)
1139 #define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18)
1140 #define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19)
1141 #define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20)
1142 #define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21)
1143 #define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22)
1144 #define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23)
1145 #define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24)
1146 #define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25)
1147 #define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26)
1148 #define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27)
1149 #define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28)
1150 #define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29)
1151 #define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30)
1152 #define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31)
1154 #define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR)
1155 #define OFFB_QCFLAG offsetof(VexGuestARM64State,guest_QCFLAG)
1157 #define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART)
1158 #define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN)
1160 #define OFFB_LLSC_SIZE offsetof(VexGuestARM64State,guest_LLSC_SIZE)
1161 #define OFFB_LLSC_ADDR offsetof(VexGuestARM64State,guest_LLSC_ADDR)
1162 #define OFFB_LLSC_DATA offsetof(VexGuestARM64State,guest_LLSC_DATA)
1165 /* ---------------- Integer registers ---------------- */
1167 static Int offsetIReg64 ( UInt iregNo )
1169 /* Do we care about endianness here? We do if sub-parts of integer
1170 registers are accessed. */
1171 switch (iregNo) {
1172 case 0: return OFFB_X0;
1173 case 1: return OFFB_X1;
1174 case 2: return OFFB_X2;
1175 case 3: return OFFB_X3;
1176 case 4: return OFFB_X4;
1177 case 5: return OFFB_X5;
1178 case 6: return OFFB_X6;
1179 case 7: return OFFB_X7;
1180 case 8: return OFFB_X8;
1181 case 9: return OFFB_X9;
1182 case 10: return OFFB_X10;
1183 case 11: return OFFB_X11;
1184 case 12: return OFFB_X12;
1185 case 13: return OFFB_X13;
1186 case 14: return OFFB_X14;
1187 case 15: return OFFB_X15;
1188 case 16: return OFFB_X16;
1189 case 17: return OFFB_X17;
1190 case 18: return OFFB_X18;
1191 case 19: return OFFB_X19;
1192 case 20: return OFFB_X20;
1193 case 21: return OFFB_X21;
1194 case 22: return OFFB_X22;
1195 case 23: return OFFB_X23;
1196 case 24: return OFFB_X24;
1197 case 25: return OFFB_X25;
1198 case 26: return OFFB_X26;
1199 case 27: return OFFB_X27;
1200 case 28: return OFFB_X28;
1201 case 29: return OFFB_X29;
1202 case 30: return OFFB_X30;
1203 /* but not 31 */
1204 default: vassert(0);
1208 static Int offsetIReg64orSP ( UInt iregNo )
1210 return iregNo == 31 ? OFFB_XSP : offsetIReg64(iregNo);
1213 static const HChar* nameIReg64orZR ( UInt iregNo )
1215 vassert(iregNo < 32);
1216 static const HChar* names[32]
1217 = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
1218 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
1219 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
1220 "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
1221 return names[iregNo];
1224 static const HChar* nameIReg64orSP ( UInt iregNo )
1226 if (iregNo == 31) {
1227 return "sp";
1229 vassert(iregNo < 31);
1230 return nameIReg64orZR(iregNo);
1233 static IRExpr* getIReg64orSP ( UInt iregNo )
1235 vassert(iregNo < 32);
1236 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1239 static IRExpr* getIReg64orZR ( UInt iregNo )
1241 if (iregNo == 31) {
1242 return mkU64(0);
1244 vassert(iregNo < 31);
1245 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1248 static void putIReg64orSP ( UInt iregNo, IRExpr* e )
1250 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1251 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1254 static void putIReg64orZR ( UInt iregNo, IRExpr* e )
1256 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1257 if (iregNo == 31) {
1258 return;
1260 vassert(iregNo < 31);
1261 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1264 static const HChar* nameIReg32orZR ( UInt iregNo )
1266 vassert(iregNo < 32);
1267 static const HChar* names[32]
1268 = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
1269 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
1270 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
1271 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
1272 return names[iregNo];
1275 static const HChar* nameIReg32orSP ( UInt iregNo )
1277 if (iregNo == 31) {
1278 return "wsp";
1280 vassert(iregNo < 31);
1281 return nameIReg32orZR(iregNo);
1284 static IRExpr* getIReg32orSP ( UInt iregNo )
1286 vassert(iregNo < 32);
1287 return unop(Iop_64to32,
1288 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1291 static IRExpr* getIReg32orZR ( UInt iregNo )
1293 if (iregNo == 31) {
1294 return mkU32(0);
1296 vassert(iregNo < 31);
1297 return unop(Iop_64to32,
1298 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1301 static void putIReg32orSP ( UInt iregNo, IRExpr* e )
1303 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1304 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1307 static void putIReg32orZR ( UInt iregNo, IRExpr* e )
1309 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1310 if (iregNo == 31) {
1311 return;
1313 vassert(iregNo < 31);
1314 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1317 static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
1319 vassert(is64 == True || is64 == False);
1320 return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
1323 static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
1325 vassert(is64 == True || is64 == False);
1326 return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
1329 static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
1331 vassert(is64 == True || is64 == False);
1332 return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
1335 static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
1337 vassert(is64 == True || is64 == False);
1338 if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
1341 static void putPC ( IRExpr* e )
1343 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1344 stmt( IRStmt_Put(OFFB_PC, e) );
1348 /* ---------------- Vector (Q) registers ---------------- */
1350 static Int offsetQReg128 ( UInt qregNo )
1352 /* We don't care about endianness at this point. It only becomes
1353 relevant when dealing with sections of these registers.*/
1354 switch (qregNo) {
1355 case 0: return OFFB_Q0;
1356 case 1: return OFFB_Q1;
1357 case 2: return OFFB_Q2;
1358 case 3: return OFFB_Q3;
1359 case 4: return OFFB_Q4;
1360 case 5: return OFFB_Q5;
1361 case 6: return OFFB_Q6;
1362 case 7: return OFFB_Q7;
1363 case 8: return OFFB_Q8;
1364 case 9: return OFFB_Q9;
1365 case 10: return OFFB_Q10;
1366 case 11: return OFFB_Q11;
1367 case 12: return OFFB_Q12;
1368 case 13: return OFFB_Q13;
1369 case 14: return OFFB_Q14;
1370 case 15: return OFFB_Q15;
1371 case 16: return OFFB_Q16;
1372 case 17: return OFFB_Q17;
1373 case 18: return OFFB_Q18;
1374 case 19: return OFFB_Q19;
1375 case 20: return OFFB_Q20;
1376 case 21: return OFFB_Q21;
1377 case 22: return OFFB_Q22;
1378 case 23: return OFFB_Q23;
1379 case 24: return OFFB_Q24;
1380 case 25: return OFFB_Q25;
1381 case 26: return OFFB_Q26;
1382 case 27: return OFFB_Q27;
1383 case 28: return OFFB_Q28;
1384 case 29: return OFFB_Q29;
1385 case 30: return OFFB_Q30;
1386 case 31: return OFFB_Q31;
1387 default: vassert(0);
1391 /* Write to a complete Qreg. */
1392 static void putQReg128 ( UInt qregNo, IRExpr* e )
1394 vassert(qregNo < 32);
1395 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
1396 stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
1399 /* Read a complete Qreg. */
1400 static IRExpr* getQReg128 ( UInt qregNo )
1402 vassert(qregNo < 32);
1403 return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
1406 /* Produce the IR type for some sub-part of a vector. For 32- and 64-
1407 bit sub-parts we can choose either integer or float types, and
1408 choose float on the basis that that is the common use case and so
1409 will give least interference with Put-to-Get forwarding later
1410 on. */
1411 static IRType preferredVectorSubTypeFromSize ( UInt szB )
1413 switch (szB) {
1414 case 1: return Ity_I8;
1415 case 2: return Ity_I16;
1416 case 4: return Ity_I32; //Ity_F32;
1417 case 8: return Ity_F64;
1418 case 16: return Ity_V128;
1419 default: vassert(0);
1423 /* Find the offset of the laneNo'th lane of type laneTy in the given
1424 Qreg. Since the host is little-endian, the least significant lane
1425 has the lowest offset. */
1426 static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
1428 vassert(host_endness == VexEndnessLE);
1429 Int base = offsetQReg128(qregNo);
1430 /* Since the host is little-endian, the least significant lane
1431 will be at the lowest address. */
1432 /* Restrict this to known types, so as to avoid silently accepting
1433 stupid types. */
1434 UInt laneSzB = 0;
1435 switch (laneTy) {
1436 case Ity_I8: laneSzB = 1; break;
1437 case Ity_F16: case Ity_I16: laneSzB = 2; break;
1438 case Ity_F32: case Ity_I32: laneSzB = 4; break;
1439 case Ity_F64: case Ity_I64: laneSzB = 8; break;
1440 case Ity_V128: laneSzB = 16; break;
1441 default: break;
1443 vassert(laneSzB > 0);
1444 UInt minOff = laneNo * laneSzB;
1445 UInt maxOff = minOff + laneSzB - 1;
1446 vassert(maxOff < 16);
1447 return base + minOff;
1450 /* Put to the least significant lane of a Qreg. */
1451 static void putQRegLO ( UInt qregNo, IRExpr* e )
1453 IRType ty = typeOfIRExpr(irsb->tyenv, e);
1454 Int off = offsetQRegLane(qregNo, ty, 0);
1455 switch (ty) {
1456 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
1457 case Ity_F16: case Ity_F32: case Ity_F64: case Ity_V128:
1458 break;
1459 default:
1460 vassert(0); // Other cases are probably invalid
1462 stmt(IRStmt_Put(off, e));
1465 /* Get from the least significant lane of a Qreg. */
1466 static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
1468 Int off = offsetQRegLane(qregNo, ty, 0);
1469 switch (ty) {
1470 case Ity_I8:
1471 case Ity_F16: case Ity_I16:
1472 case Ity_I32: case Ity_I64:
1473 case Ity_F32: case Ity_F64: case Ity_V128:
1474 break;
1475 default:
1476 vassert(0); // Other cases are ATC
1478 return IRExpr_Get(off, ty);
1481 static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
1483 static const HChar* namesQ[32]
1484 = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1485 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
1486 "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
1487 "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
1488 static const HChar* namesD[32]
1489 = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
1490 "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
1491 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
1492 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
1493 static const HChar* namesS[32]
1494 = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
1495 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
1496 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
1497 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
1498 static const HChar* namesH[32]
1499 = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
1500 "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
1501 "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
1502 "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
1503 static const HChar* namesB[32]
1504 = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
1505 "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
1506 "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
1507 "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
1508 vassert(qregNo < 32);
1509 switch (sizeofIRType(laneTy)) {
1510 case 1: return namesB[qregNo];
1511 case 2: return namesH[qregNo];
1512 case 4: return namesS[qregNo];
1513 case 8: return namesD[qregNo];
1514 case 16: return namesQ[qregNo];
1515 default: vassert(0);
1517 /*NOTREACHED*/
1520 static const HChar* nameQReg128 ( UInt qregNo )
1522 return nameQRegLO(qregNo, Ity_V128);
1525 /* Find the offset of the most significant half (8 bytes) of the given
1526 Qreg. This requires knowing the endianness of the host. */
1527 static Int offsetQRegHI64 ( UInt qregNo )
1529 return offsetQRegLane(qregNo, Ity_I64, 1);
1532 static IRExpr* getQRegHI64 ( UInt qregNo )
1534 return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
1537 static void putQRegHI64 ( UInt qregNo, IRExpr* e )
1539 IRType ty = typeOfIRExpr(irsb->tyenv, e);
1540 Int off = offsetQRegHI64(qregNo);
1541 switch (ty) {
1542 case Ity_I64: case Ity_F64:
1543 break;
1544 default:
1545 vassert(0); // Other cases are plain wrong
1547 stmt(IRStmt_Put(off, e));
1550 /* Put to a specified lane of a Qreg. */
1551 static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
1553 IRType laneTy = typeOfIRExpr(irsb->tyenv, e);
1554 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1555 switch (laneTy) {
1556 case Ity_F64: case Ity_I64:
1557 case Ity_I32: case Ity_F32:
1558 case Ity_I16: case Ity_F16:
1559 case Ity_I8:
1560 break;
1561 default:
1562 vassert(0); // Other cases are ATC
1564 stmt(IRStmt_Put(off, e));
1567 /* Get from a specified lane of a Qreg. */
1568 static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
1570 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1571 switch (laneTy) {
1572 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
1573 case Ity_F64: case Ity_F32: case Ity_F16:
1574 break;
1575 default:
1576 vassert(0); // Other cases are ATC
1578 return IRExpr_Get(off, laneTy);
1582 //ZZ /* ---------------- Misc registers ---------------- */
1583 //ZZ
1584 //ZZ static void putMiscReg32 ( UInt gsoffset,
1585 //ZZ IRExpr* e, /* :: Ity_I32 */
1586 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
1587 //ZZ {
1588 //ZZ switch (gsoffset) {
1589 //ZZ case OFFB_FPSCR: break;
1590 //ZZ case OFFB_QFLAG32: break;
1591 //ZZ case OFFB_GEFLAG0: break;
1592 //ZZ case OFFB_GEFLAG1: break;
1593 //ZZ case OFFB_GEFLAG2: break;
1594 //ZZ case OFFB_GEFLAG3: break;
1595 //ZZ default: vassert(0); /* awaiting more cases */
1596 //ZZ }
1597 //ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1598 //ZZ
1599 //ZZ if (guardT == IRTemp_INVALID) {
1600 //ZZ /* unconditional write */
1601 //ZZ stmt(IRStmt_Put(gsoffset, e));
1602 //ZZ } else {
1603 //ZZ stmt(IRStmt_Put(
1604 //ZZ gsoffset,
1605 //ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
1606 //ZZ e, IRExpr_Get(gsoffset, Ity_I32) )
1607 //ZZ ));
1608 //ZZ }
1609 //ZZ }
1610 //ZZ
1611 //ZZ static IRTemp get_ITSTATE ( void )
1612 //ZZ {
1613 //ZZ ASSERT_IS_THUMB;
1614 //ZZ IRTemp t = newTemp(Ity_I32);
1615 //ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
1616 //ZZ return t;
1617 //ZZ }
1618 //ZZ
1619 //ZZ static void put_ITSTATE ( IRTemp t )
1620 //ZZ {
1621 //ZZ ASSERT_IS_THUMB;
1622 //ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
1623 //ZZ }
1624 //ZZ
1625 //ZZ static IRTemp get_QFLAG32 ( void )
1626 //ZZ {
1627 //ZZ IRTemp t = newTemp(Ity_I32);
1628 //ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
1629 //ZZ return t;
1630 //ZZ }
1631 //ZZ
1632 //ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
1633 //ZZ {
1634 //ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
1635 //ZZ }
1636 //ZZ
1637 //ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
1638 //ZZ Status Register) to indicate that overflow or saturation occurred.
1639 //ZZ Nb: t must be zero to denote no saturation, and any nonzero
1640 //ZZ value to indicate saturation. */
1641 //ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
1642 //ZZ {
1643 //ZZ IRTemp old = get_QFLAG32();
1644 //ZZ IRTemp nyu = newTemp(Ity_I32);
1645 //ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
1646 //ZZ put_QFLAG32(nyu, condT);
1647 //ZZ }
1650 /* ---------------- FPCR stuff ---------------- */
1652 /* Generate IR to get hold of the rounding mode bits in FPCR, and
1653 convert them to IR format. Bind the final result to the
1654 returned temp. */
1655 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1657 /* The ARMvfp encoding for rounding mode bits is:
1658 00 to nearest
1659 01 to +infinity
1660 10 to -infinity
1661 11 to zero
1662 We need to convert that to the IR encoding:
1663 00 to nearest (the default)
1664 10 to +infinity
1665 01 to -infinity
1666 11 to zero
1667 Which can be done by swapping bits 0 and 1.
1668 The rmode bits are at 23:22 in FPSCR.
1670 IRTemp armEncd = newTemp(Ity_I32);
1671 IRTemp swapped = newTemp(Ity_I32);
1672 /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that
1673 we don't zero out bits 24 and above, since the assignment to
1674 'swapped' will mask them out anyway. */
1675 assign(armEncd,
1676 binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
1677 /* Now swap them. */
1678 assign(swapped,
1679 binop(Iop_Or32,
1680 binop(Iop_And32,
1681 binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1682 mkU32(2)),
1683 binop(Iop_And32,
1684 binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1685 mkU32(1))
1687 return swapped;
1691 /*------------------------------------------------------------*/
1692 /*--- Helpers for flag handling and conditional insns ---*/
1693 /*------------------------------------------------------------*/
1695 static const HChar* nameARM64Condcode ( ARM64Condcode cond )
1697 switch (cond) {
1698 case ARM64CondEQ: return "eq";
1699 case ARM64CondNE: return "ne";
1700 case ARM64CondCS: return "cs"; // or 'hs'
1701 case ARM64CondCC: return "cc"; // or 'lo'
1702 case ARM64CondMI: return "mi";
1703 case ARM64CondPL: return "pl";
1704 case ARM64CondVS: return "vs";
1705 case ARM64CondVC: return "vc";
1706 case ARM64CondHI: return "hi";
1707 case ARM64CondLS: return "ls";
1708 case ARM64CondGE: return "ge";
1709 case ARM64CondLT: return "lt";
1710 case ARM64CondGT: return "gt";
1711 case ARM64CondLE: return "le";
1712 case ARM64CondAL: return "al";
1713 case ARM64CondNV: return "nv";
1714 default: vpanic("name_ARM64Condcode");
1718 /* and a handy shorthand for it */
1719 static const HChar* nameCC ( ARM64Condcode cond ) {
1720 return nameARM64Condcode(cond);
1724 /* Build IR to calculate some particular condition from stored
1725 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1726 Ity_I64, suitable for narrowing. Although the return type is
1727 Ity_I64, the returned value is either 0 or 1. 'cond' must be
1728 :: Ity_I64 and must denote the condition to compute in
1729 bits 7:4, and be zero everywhere else.
1731 static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
1733 vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
1734 /* And 'cond' had better produce a value in which only bits 7:4 are
1735 nonzero. However, obviously we can't assert for that. */
1737 /* So what we're constructing for the first argument is
1738 "(cond << 4) | stored-operation".
1739 However, as per comments above, 'cond' must be supplied
1740 pre-shifted to this function.
1742 This pairing scheme requires that the ARM64_CC_OP_ values all fit
1743 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest
1744 8 bits of the first argument. */
1745 IRExpr** args
1746 = mkIRExprVec_4(
1747 binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
1748 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1749 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1750 IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
1752 IRExpr* call
1753 = mkIRExprCCall(
1754 Ity_I64,
1755 0/*regparm*/,
1756 "arm64g_calculate_condition", &arm64g_calculate_condition,
1757 args
1760 /* Exclude the requested condition, OP and NDEP from definedness
1761 checking. We're only interested in DEP1 and DEP2. */
1762 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1763 return call;
1767 /* Build IR to calculate some particular condition from stored
1768 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1769 Ity_I64, suitable for narrowing. Although the return type is
1770 Ity_I64, the returned value is either 0 or 1.
1772 static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
1774 /* First arg is "(cond << 4) | condition". This requires that the
1775 ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a
1776 (COND, OP) pair in the lowest 8 bits of the first argument. */
1777 vassert(cond >= 0 && cond <= 15);
1778 return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
1782 /* Build IR to calculate just the carry flag from stored
1783 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1784 Ity_I64. */
1785 static IRExpr* mk_arm64g_calculate_flag_c ( void )
1787 IRExpr** args
1788 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1789 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1790 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1791 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1792 IRExpr* call
1793 = mkIRExprCCall(
1794 Ity_I64,
1795 0/*regparm*/,
1796 "arm64g_calculate_flag_c", &arm64g_calculate_flag_c,
1797 args
1799 /* Exclude OP and NDEP from definedness checking. We're only
1800 interested in DEP1 and DEP2. */
1801 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1802 return call;
1806 //ZZ /* Build IR to calculate just the overflow flag from stored
1807 //ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1808 //ZZ Ity_I32. */
1809 //ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
1810 //ZZ {
1811 //ZZ IRExpr** args
1812 //ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
1813 //ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1814 //ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1815 //ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1816 //ZZ IRExpr* call
1817 //ZZ = mkIRExprCCall(
1818 //ZZ Ity_I32,
1819 //ZZ 0/*regparm*/,
1820 //ZZ "armg_calculate_flag_v", &armg_calculate_flag_v,
1821 //ZZ args
1822 //ZZ );
1823 //ZZ /* Exclude OP and NDEP from definedness checking. We're only
1824 //ZZ interested in DEP1 and DEP2. */
1825 //ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1826 //ZZ return call;
1827 //ZZ }
1830 /* Build IR to calculate N Z C V in bits 31:28 of the
1831 returned word. */
1832 static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
1834 IRExpr** args
1835 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1836 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1837 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1838 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1839 IRExpr* call
1840 = mkIRExprCCall(
1841 Ity_I64,
1842 0/*regparm*/,
1843 "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
1844 args
1846 /* Exclude OP and NDEP from definedness checking. We're only
1847 interested in DEP1 and DEP2. */
1848 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1849 return call;
1853 /* Build IR to set the flags thunk, in the most general case. */
1854 static
1855 void setFlags_D1_D2_ND ( UInt cc_op,
1856 IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
1858 vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
1859 vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
1860 vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
1861 vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
1862 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) ));
1863 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1864 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1865 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1868 /* Build IR to set the flags thunk after ADD or SUB. */
1869 static
1870 void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
1872 IRTemp argL64 = IRTemp_INVALID;
1873 IRTemp argR64 = IRTemp_INVALID;
1874 IRTemp z64 = newTemp(Ity_I64);
1875 if (is64) {
1876 argL64 = argL;
1877 argR64 = argR;
1878 } else {
1879 argL64 = newTemp(Ity_I64);
1880 argR64 = newTemp(Ity_I64);
1881 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1882 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1884 assign(z64, mkU64(0));
1885 UInt cc_op = ARM64G_CC_OP_NUMBER;
1886 /**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; }
1887 else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
1888 else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; }
1889 else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
1890 else { vassert(0); }
1891 setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
1894 /* Build IR to set the flags thunk after ADC or SBC. */
1895 static
1896 void setFlags_ADC_SBC ( Bool is64, Bool isSBC,
1897 IRTemp argL, IRTemp argR, IRTemp oldC )
1899 IRTemp argL64 = IRTemp_INVALID;
1900 IRTemp argR64 = IRTemp_INVALID;
1901 IRTemp oldC64 = IRTemp_INVALID;
1902 if (is64) {
1903 argL64 = argL;
1904 argR64 = argR;
1905 oldC64 = oldC;
1906 } else {
1907 argL64 = newTemp(Ity_I64);
1908 argR64 = newTemp(Ity_I64);
1909 oldC64 = newTemp(Ity_I64);
1910 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1911 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1912 assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC)));
1914 UInt cc_op = ARM64G_CC_OP_NUMBER;
1915 /**/ if ( isSBC && is64) { cc_op = ARM64G_CC_OP_SBC64; }
1916 else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; }
1917 else if (!isSBC && is64) { cc_op = ARM64G_CC_OP_ADC64; }
1918 else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; }
1919 else { vassert(0); }
1920 setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64);
1923 /* Build IR to set the flags thunk after ADD or SUB, if the given
1924 condition evaluates to True at run time. If not, the flags are set
1925 to the specified NZCV value. */
1926 static
1927 void setFlags_ADD_SUB_conditionally (
1928 Bool is64, Bool isSUB,
1929 IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
1932 /* Generate IR as follows:
1933 CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
1934 CC_DEP1 = ITE(cond, argL64, nzcv << 28)
1935 CC_DEP2 = ITE(cond, argR64, 0)
1936 CC_NDEP = 0
1939 IRTemp z64 = newTemp(Ity_I64);
1940 assign(z64, mkU64(0));
1942 /* Establish the operation and operands for the True case. */
1943 IRTemp t_dep1 = IRTemp_INVALID;
1944 IRTemp t_dep2 = IRTemp_INVALID;
1945 UInt t_op = ARM64G_CC_OP_NUMBER;
1946 /**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; }
1947 else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
1948 else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; }
1949 else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
1950 else { vassert(0); }
1951 /* */
1952 if (is64) {
1953 t_dep1 = argL;
1954 t_dep2 = argR;
1955 } else {
1956 t_dep1 = newTemp(Ity_I64);
1957 t_dep2 = newTemp(Ity_I64);
1958 assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
1959 assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
1962 /* Establish the operation and operands for the False case. */
1963 IRTemp f_dep1 = newTemp(Ity_I64);
1964 IRTemp f_dep2 = z64;
1965 UInt f_op = ARM64G_CC_OP_COPY;
1966 assign(f_dep1, mkU64(nzcv << 28));
1968 /* Final thunk values */
1969 IRTemp dep1 = newTemp(Ity_I64);
1970 IRTemp dep2 = newTemp(Ity_I64);
1971 IRTemp op = newTemp(Ity_I64);
1973 assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
1974 assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
1975 assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
1977 /* finally .. */
1978 stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) ));
1979 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
1980 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
1981 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
1984 /* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
1985 static
1986 void setFlags_LOGIC ( Bool is64, IRTemp res )
1988 IRTemp res64 = IRTemp_INVALID;
1989 IRTemp z64 = newTemp(Ity_I64);
1990 UInt cc_op = ARM64G_CC_OP_NUMBER;
1991 if (is64) {
1992 res64 = res;
1993 cc_op = ARM64G_CC_OP_LOGIC64;
1994 } else {
1995 res64 = newTemp(Ity_I64);
1996 assign(res64, unop(Iop_32Uto64, mkexpr(res)));
1997 cc_op = ARM64G_CC_OP_LOGIC32;
1999 assign(z64, mkU64(0));
2000 setFlags_D1_D2_ND(cc_op, res64, z64, z64);
2003 /* Build IR to set the flags thunk to a given NZCV value. NZCV is
2004 located in bits 31:28 of the supplied value. */
2005 static
2006 void setFlags_COPY ( IRTemp nzcv_28x0 )
2008 IRTemp z64 = newTemp(Ity_I64);
2009 assign(z64, mkU64(0));
2010 setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
2014 //ZZ /* Minor variant of the above that sets NDEP to zero (if it
2015 //ZZ sets it at all) */
2016 //ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
2017 //ZZ IRTemp t_dep2,
2018 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2019 //ZZ {
2020 //ZZ IRTemp z32 = newTemp(Ity_I32);
2021 //ZZ assign( z32, mkU32(0) );
2022 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
2023 //ZZ }
2024 //ZZ
2025 //ZZ
2026 //ZZ /* Minor variant of the above that sets DEP2 to zero (if it
2027 //ZZ sets it at all) */
2028 //ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
2029 //ZZ IRTemp t_ndep,
2030 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2031 //ZZ {
2032 //ZZ IRTemp z32 = newTemp(Ity_I32);
2033 //ZZ assign( z32, mkU32(0) );
2034 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
2035 //ZZ }
2036 //ZZ
2037 //ZZ
2038 //ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
2039 //ZZ sets them at all) */
2040 //ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
2041 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2042 //ZZ {
2043 //ZZ IRTemp z32 = newTemp(Ity_I32);
2044 //ZZ assign( z32, mkU32(0) );
2045 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
2046 //ZZ }
2049 /*------------------------------------------------------------*/
2050 /*--- Misc math helpers ---*/
2051 /*------------------------------------------------------------*/
2053 /* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
2054 static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
2056 IRTemp maskT = newTemp(Ity_I64);
2057 IRTemp res = newTemp(Ity_I64);
2058 vassert(sh >= 1 && sh <= 63);
2059 assign(maskT, mkU64(mask));
2060 assign( res,
2061 binop(Iop_Or64,
2062 binop(Iop_Shr64,
2063 binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
2064 mkU8(sh)),
2065 binop(Iop_And64,
2066 binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
2067 mkexpr(maskT))
2070 return res;
2073 /* Generates byte swaps within 32-bit lanes. */
2074 static IRTemp math_UINTSWAP64 ( IRTemp src )
2076 IRTemp res;
2077 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2078 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2079 return res;
2082 /* Generates byte swaps within 16-bit lanes. */
2083 static IRTemp math_USHORTSWAP64 ( IRTemp src )
2085 IRTemp res;
2086 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2087 return res;
2090 /* Generates a 64-bit byte swap. */
2091 static IRTemp math_BYTESWAP64 ( IRTemp src )
2093 IRTemp res;
2094 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2095 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2096 res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
2097 return res;
2100 /* Generates a 64-bit bit swap. */
2101 static IRTemp math_BITSWAP64 ( IRTemp src )
2103 IRTemp res;
2104 res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
2105 res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
2106 res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
2107 return math_BYTESWAP64(res);
2110 /* Duplicates the bits at the bottom of the given word to fill the
2111 whole word. src :: Ity_I64 is assumed to have zeroes everywhere
2112 except for the bottom bits. */
2113 static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
2115 if (srcTy == Ity_I8) {
2116 IRTemp t16 = newTemp(Ity_I64);
2117 assign(t16, binop(Iop_Or64, mkexpr(src),
2118 binop(Iop_Shl64, mkexpr(src), mkU8(8))));
2119 IRTemp t32 = newTemp(Ity_I64);
2120 assign(t32, binop(Iop_Or64, mkexpr(t16),
2121 binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
2122 IRTemp t64 = newTemp(Ity_I64);
2123 assign(t64, binop(Iop_Or64, mkexpr(t32),
2124 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2125 return t64;
2127 if (srcTy == Ity_I16) {
2128 IRTemp t32 = newTemp(Ity_I64);
2129 assign(t32, binop(Iop_Or64, mkexpr(src),
2130 binop(Iop_Shl64, mkexpr(src), mkU8(16))));
2131 IRTemp t64 = newTemp(Ity_I64);
2132 assign(t64, binop(Iop_Or64, mkexpr(t32),
2133 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2134 return t64;
2136 if (srcTy == Ity_I32) {
2137 IRTemp t64 = newTemp(Ity_I64);
2138 assign(t64, binop(Iop_Or64, mkexpr(src),
2139 binop(Iop_Shl64, mkexpr(src), mkU8(32))));
2140 return t64;
2142 if (srcTy == Ity_I64) {
2143 return src;
2145 vassert(0);
2149 /* Duplicates the src element exactly so as to fill a V128 value. */
2150 static IRTemp math_DUP_TO_V128 ( IRTemp src, IRType srcTy )
2152 IRTemp res = newTempV128();
2153 if (srcTy == Ity_F64) {
2154 IRTemp i64 = newTemp(Ity_I64);
2155 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(src)));
2156 assign(res, binop(Iop_64HLtoV128, mkexpr(i64), mkexpr(i64)));
2157 return res;
2159 if (srcTy == Ity_F32) {
2160 IRTemp i64a = newTemp(Ity_I64);
2161 assign(i64a, unop(Iop_32Uto64, unop(Iop_ReinterpF32asI32, mkexpr(src))));
2162 IRTemp i64b = newTemp(Ity_I64);
2163 assign(i64b, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(i64a), mkU8(32)),
2164 mkexpr(i64a)));
2165 assign(res, binop(Iop_64HLtoV128, mkexpr(i64b), mkexpr(i64b)));
2166 return res;
2168 if (srcTy == Ity_I64) {
2169 assign(res, binop(Iop_64HLtoV128, mkexpr(src), mkexpr(src)));
2170 return res;
2172 if (srcTy == Ity_I32 || srcTy == Ity_I16 || srcTy == Ity_I8) {
2173 IRTemp t1 = newTemp(Ity_I64);
2174 assign(t1, widenUto64(srcTy, mkexpr(src)));
2175 IRTemp t2 = math_DUP_TO_64(t1, srcTy);
2176 assign(res, binop(Iop_64HLtoV128, mkexpr(t2), mkexpr(t2)));
2177 return res;
2179 vassert(0);
2183 /* |fullWidth| is a full V128 width result. Depending on bitQ,
2184 zero out the upper half. */
2185 static IRExpr* math_MAYBE_ZERO_HI64 ( UInt bitQ, IRTemp fullWidth )
2187 if (bitQ == 1) return mkexpr(fullWidth);
2188 if (bitQ == 0) return unop(Iop_ZeroHI64ofV128, mkexpr(fullWidth));
2189 vassert(0);
2192 /* The same, but from an expression instead. */
2193 static IRExpr* math_MAYBE_ZERO_HI64_fromE ( UInt bitQ, IRExpr* fullWidth )
2195 IRTemp fullWidthT = newTempV128();
2196 assign(fullWidthT, fullWidth);
2197 return math_MAYBE_ZERO_HI64(bitQ, fullWidthT);
2201 /*------------------------------------------------------------*/
2202 /*--- FP comparison helpers ---*/
2203 /*------------------------------------------------------------*/
2205 /* irRes :: Ity_I32 holds a floating point comparison result encoded
2206 as an IRCmpF64Result. Generate code to convert it to an
2207 ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
2208 Assign a new temp to hold that value, and return the temp. */
2209 static
2210 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
2212 IRTemp ix = newTemp(Ity_I64);
2213 IRTemp termL = newTemp(Ity_I64);
2214 IRTemp termR = newTemp(Ity_I64);
2215 IRTemp nzcv = newTemp(Ity_I64);
2216 IRTemp irRes = newTemp(Ity_I64);
2218 /* This is where the fun starts. We have to convert 'irRes' from
2219 an IR-convention return result (IRCmpF64Result) to an
2220 ARM-encoded (N,Z,C,V) group. The final result is in the bottom
2221 4 bits of 'nzcv'. */
2222 /* Map compare result from IR to ARM(nzcv) */
2224 FP cmp result | IR | ARM(nzcv)
2225 --------------------------------
2226 UN 0x45 0011
2227 LT 0x01 1000
2228 GT 0x00 0010
2229 EQ 0x40 0110
2231 /* Now since you're probably wondering WTF ..
2233 ix fishes the useful bits out of the IR value, bits 6 and 0, and
2234 places them side by side, giving a number which is 0, 1, 2 or 3.
2236 termL is a sequence cooked up by GNU superopt. It converts ix
2237 into an almost correct value NZCV value (incredibly), except
2238 for the case of UN, where it produces 0100 instead of the
2239 required 0011.
2241 termR is therefore a correction term, also computed from ix. It
2242 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get
2243 the final correct value, we subtract termR from termL.
2245 Don't take my word for it. There's a test program at the bottom
2246 of guest_arm_toIR.c, to try this out with.
2248 assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
2250 assign(
2252 binop(Iop_Or64,
2253 binop(Iop_And64,
2254 binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
2255 mkU64(3)),
2256 binop(Iop_And64, mkexpr(irRes), mkU64(1))));
2258 assign(
2259 termL,
2260 binop(Iop_Add64,
2261 binop(Iop_Shr64,
2262 binop(Iop_Sub64,
2263 binop(Iop_Shl64,
2264 binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
2265 mkU8(62)),
2266 mkU64(1)),
2267 mkU8(61)),
2268 mkU64(1)));
2270 assign(
2271 termR,
2272 binop(Iop_And64,
2273 binop(Iop_And64,
2274 mkexpr(ix),
2275 binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
2276 mkU64(1)));
2278 assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
2279 return nzcv;
2283 /*------------------------------------------------------------*/
2284 /*--- Data processing (immediate) ---*/
2285 /*------------------------------------------------------------*/
2287 /* Helper functions for supporting "DecodeBitMasks" */
2289 static ULong dbm_ROR ( Int width, ULong x, Int rot )
2291 vassert(width > 0 && width <= 64);
2292 vassert(rot >= 0 && rot < width);
2293 if (rot == 0) return x;
2294 ULong res = x >> rot;
2295 res |= (x << (width - rot));
2296 if (width < 64)
2297 res &= ((1ULL << width) - 1);
2298 return res;
2301 static ULong dbm_RepTo64( Int esize, ULong x )
2303 switch (esize) {
2304 case 64:
2305 return x;
2306 case 32:
2307 x &= 0xFFFFFFFF; x |= (x << 32);
2308 return x;
2309 case 16:
2310 x &= 0xFFFF; x |= (x << 16); x |= (x << 32);
2311 return x;
2312 case 8:
2313 x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32);
2314 return x;
2315 case 4:
2316 x &= 0xF; x |= (x << 4); x |= (x << 8);
2317 x |= (x << 16); x |= (x << 32);
2318 return x;
2319 case 2:
2320 x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8);
2321 x |= (x << 16); x |= (x << 32);
2322 return x;
2323 default:
2324 break;
2326 vpanic("dbm_RepTo64");
2327 /*NOTREACHED*/
2328 return 0;
2331 static Int dbm_highestSetBit ( ULong x )
2333 Int i;
2334 for (i = 63; i >= 0; i--) {
2335 if (x & (1ULL << i))
2336 return i;
2338 vassert(x == 0);
2339 return -1;
2342 static
2343 Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask,
2344 ULong immN, ULong imms, ULong immr, Bool immediate,
2345 UInt M /*32 or 64*/)
2347 vassert(immN < (1ULL << 1));
2348 vassert(imms < (1ULL << 6));
2349 vassert(immr < (1ULL << 6));
2350 vassert(immediate == False || immediate == True);
2351 vassert(M == 32 || M == 64);
2353 Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) );
2354 if (len < 1) { /* printf("fail1\n"); */ return False; }
2355 vassert(len <= 6);
2356 vassert(M >= (1 << len));
2358 vassert(len >= 1 && len <= 6);
2359 ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len);
2360 (1 << len) - 1;
2361 vassert(levels >= 1 && levels <= 63);
2363 if (immediate && ((imms & levels) == levels)) {
2364 /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
2365 return False;
2368 ULong S = imms & levels;
2369 ULong R = immr & levels;
2370 Int diff = S - R;
2371 diff &= 63;
2372 Int esize = 1 << len;
2373 vassert(2 <= esize && esize <= 64);
2375 /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
2376 same below with d. S can be 63 in which case we have an out of
2377 range and hence undefined shift. */
2378 vassert(S >= 0 && S <= 63);
2379 vassert(esize >= (S+1));
2380 ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
2381 //(1ULL << (S+1)) - 1;
2382 ((1ULL << S) - 1) + (1ULL << S);
2384 Int d = // diff<len-1:0>
2385 diff & ((1 << len)-1);
2386 vassert(esize >= (d+1));
2387 vassert(d >= 0 && d <= 63);
2389 ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
2390 //(1ULL << (d+1)) - 1;
2391 ((1ULL << d) - 1) + (1ULL << d);
2393 if (esize != 64) vassert(elem_s < (1ULL << esize));
2394 if (esize != 64) vassert(elem_d < (1ULL << esize));
2396 if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
2397 if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
2399 return True;
2403 static
2404 Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres,
2405 UInt insn, Bool sigill_diag)
2407 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2409 /* insn[28:23]
2410 10000x PC-rel addressing
2411 10001x Add/subtract (immediate)
2412 100100 Logical (immediate)
2413 100101 Move Wide (immediate)
2414 100110 Bitfield
2415 100111 Extract
2418 /* ------------------ ADD/SUB{,S} imm12 ------------------ */
2419 if (INSN(28,24) == BITS5(1,0,0,0,1)) {
2420 Bool is64 = INSN(31,31) == 1;
2421 Bool isSub = INSN(30,30) == 1;
2422 Bool setCC = INSN(29,29) == 1;
2423 UInt sh = INSN(23,22);
2424 UInt uimm12 = INSN(21,10);
2425 UInt nn = INSN(9,5);
2426 UInt dd = INSN(4,0);
2427 const HChar* nm = isSub ? "sub" : "add";
2428 if (sh >= 2) {
2429 /* Invalid; fall through */
2430 } else {
2431 vassert(sh <= 1);
2432 uimm12 <<= (12 * sh);
2433 if (is64) {
2434 IRTemp argL = newTemp(Ity_I64);
2435 IRTemp argR = newTemp(Ity_I64);
2436 IRTemp res = newTemp(Ity_I64);
2437 assign(argL, getIReg64orSP(nn));
2438 assign(argR, mkU64(uimm12));
2439 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
2440 mkexpr(argL), mkexpr(argR)));
2441 if (setCC) {
2442 putIReg64orZR(dd, mkexpr(res));
2443 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
2444 DIP("%ss %s, %s, 0x%x\n",
2445 nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
2446 } else {
2447 putIReg64orSP(dd, mkexpr(res));
2448 DIP("%s %s, %s, 0x%x\n",
2449 nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
2451 } else {
2452 IRTemp argL = newTemp(Ity_I32);
2453 IRTemp argR = newTemp(Ity_I32);
2454 IRTemp res = newTemp(Ity_I32);
2455 assign(argL, getIReg32orSP(nn));
2456 assign(argR, mkU32(uimm12));
2457 assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32,
2458 mkexpr(argL), mkexpr(argR)));
2459 if (setCC) {
2460 putIReg32orZR(dd, mkexpr(res));
2461 setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR);
2462 DIP("%ss %s, %s, 0x%x\n",
2463 nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
2464 } else {
2465 putIReg32orSP(dd, mkexpr(res));
2466 DIP("%s %s, %s, 0x%x\n",
2467 nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
2470 return True;
2474 /* -------------------- ADR/ADRP -------------------- */
2475 if (INSN(28,24) == BITS5(1,0,0,0,0)) {
2476 UInt bP = INSN(31,31);
2477 UInt immLo = INSN(30,29);
2478 UInt immHi = INSN(23,5);
2479 UInt rD = INSN(4,0);
2480 ULong uimm = (immHi << 2) | immLo;
2481 ULong simm = sx_to_64(uimm, 21);
2482 ULong val;
2483 if (bP) {
2484 val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
2485 } else {
2486 val = guest_PC_curr_instr + simm;
2488 putIReg64orZR(rD, mkU64(val));
2489 DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
2490 return True;
2493 /* -------------------- LOGIC(imm) -------------------- */
2494 if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
2495 /* 31 30 28 22 21 15 9 4
2496 sf op 100100 N immr imms Rn Rd
2497 op=00: AND Rd|SP, Rn, #imm
2498 op=01: ORR Rd|SP, Rn, #imm
2499 op=10: EOR Rd|SP, Rn, #imm
2500 op=11: ANDS Rd|ZR, Rn, #imm
2502 Bool is64 = INSN(31,31) == 1;
2503 UInt op = INSN(30,29);
2504 UInt N = INSN(22,22);
2505 UInt immR = INSN(21,16);
2506 UInt immS = INSN(15,10);
2507 UInt nn = INSN(9,5);
2508 UInt dd = INSN(4,0);
2509 ULong imm = 0;
2510 Bool ok;
2511 if (N == 1 && !is64)
2512 goto after_logic_imm; /* not allowed; fall through */
2513 ok = dbm_DecodeBitMasks(&imm, NULL,
2514 N, immS, immR, True, is64 ? 64 : 32);
2515 if (!ok)
2516 goto after_logic_imm;
2518 const HChar* names[4] = { "and", "orr", "eor", "ands" };
2519 const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
2520 const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
2522 vassert(op < 4);
2523 if (is64) {
2524 IRExpr* argL = getIReg64orZR(nn);
2525 IRExpr* argR = mkU64(imm);
2526 IRTemp res = newTemp(Ity_I64);
2527 assign(res, binop(ops64[op], argL, argR));
2528 if (op < 3) {
2529 putIReg64orSP(dd, mkexpr(res));
2530 DIP("%s %s, %s, 0x%llx\n", names[op],
2531 nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
2532 } else {
2533 putIReg64orZR(dd, mkexpr(res));
2534 setFlags_LOGIC(True/*is64*/, res);
2535 DIP("%s %s, %s, 0x%llx\n", names[op],
2536 nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
2538 } else {
2539 IRExpr* argL = getIReg32orZR(nn);
2540 IRExpr* argR = mkU32((UInt)imm);
2541 IRTemp res = newTemp(Ity_I32);
2542 assign(res, binop(ops32[op], argL, argR));
2543 if (op < 3) {
2544 putIReg32orSP(dd, mkexpr(res));
2545 DIP("%s %s, %s, 0x%x\n", names[op],
2546 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
2547 } else {
2548 putIReg32orZR(dd, mkexpr(res));
2549 setFlags_LOGIC(False/*!is64*/, res);
2550 DIP("%s %s, %s, 0x%x\n", names[op],
2551 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
2554 return True;
2556 after_logic_imm:
2558 /* -------------------- MOV{Z,N,K} -------------------- */
2559 if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
2560 /* 31 30 28 22 20 4
2561 | | | | | |
2562 sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw))
2563 sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw))
2564 sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw))
2566 Bool is64 = INSN(31,31) == 1;
2567 UInt subopc = INSN(30,29);
2568 UInt hw = INSN(22,21);
2569 UInt imm16 = INSN(20,5);
2570 UInt dd = INSN(4,0);
2571 if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) {
2572 /* invalid; fall through */
2573 } else {
2574 ULong imm64 = ((ULong)imm16) << (16 * hw);
2575 if (!is64)
2576 vassert(imm64 < 0x100000000ULL);
2577 switch (subopc) {
2578 case BITS2(1,0): // MOVZ
2579 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2580 DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2581 break;
2582 case BITS2(0,0): // MOVN
2583 imm64 = ~imm64;
2584 if (!is64)
2585 imm64 &= 0xFFFFFFFFULL;
2586 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2587 DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2588 break;
2589 case BITS2(1,1): // MOVK
2590 /* This is more complex. We are inserting a slice into
2591 the destination register, so we need to have the old
2592 value of it. */
2593 if (is64) {
2594 IRTemp old = newTemp(Ity_I64);
2595 assign(old, getIReg64orZR(dd));
2596 ULong mask = 0xFFFFULL << (16 * hw);
2597 IRExpr* res
2598 = binop(Iop_Or64,
2599 binop(Iop_And64, mkexpr(old), mkU64(~mask)),
2600 mkU64(imm64));
2601 putIReg64orZR(dd, res);
2602 DIP("movk %s, 0x%x, lsl %u\n",
2603 nameIReg64orZR(dd), imm16, 16*hw);
2604 } else {
2605 IRTemp old = newTemp(Ity_I32);
2606 assign(old, getIReg32orZR(dd));
2607 vassert(hw <= 1);
2608 UInt mask = ((UInt)0xFFFF) << (16 * hw);
2609 IRExpr* res
2610 = binop(Iop_Or32,
2611 binop(Iop_And32, mkexpr(old), mkU32(~mask)),
2612 mkU32((UInt)imm64));
2613 putIReg32orZR(dd, res);
2614 DIP("movk %s, 0x%x, lsl %u\n",
2615 nameIReg32orZR(dd), imm16, 16*hw);
2617 break;
2618 default:
2619 vassert(0);
2621 return True;
2625 /* -------------------- {U,S,}BFM -------------------- */
2626 /* 30 28 22 21 15 9 4
2628 sf 10 100110 N immr imms nn dd
2629 UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2630 UBFM Xd, Xn, #immr, #imms when sf=1, N=1
2632 sf 00 100110 N immr imms nn dd
2633 SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2634 SBFM Xd, Xn, #immr, #imms when sf=1, N=1
2636 sf 01 100110 N immr imms nn dd
2637 BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2638 BFM Xd, Xn, #immr, #imms when sf=1, N=1
2640 if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
2641 UInt sf = INSN(31,31);
2642 UInt opc = INSN(30,29);
2643 UInt N = INSN(22,22);
2644 UInt immR = INSN(21,16);
2645 UInt immS = INSN(15,10);
2646 UInt nn = INSN(9,5);
2647 UInt dd = INSN(4,0);
2648 Bool inZero = False;
2649 Bool extend = False;
2650 const HChar* nm = "???";
2651 /* skip invalid combinations */
2652 switch (opc) {
2653 case BITS2(0,0):
2654 inZero = True; extend = True; nm = "sbfm"; break;
2655 case BITS2(0,1):
2656 inZero = False; extend = False; nm = "bfm"; break;
2657 case BITS2(1,0):
2658 inZero = True; extend = False; nm = "ubfm"; break;
2659 case BITS2(1,1):
2660 goto after_bfm; /* invalid */
2661 default:
2662 vassert(0);
2664 if (sf == 1 && N != 1) goto after_bfm;
2665 if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0
2666 || ((immS >> 5) & 1) != 0)) goto after_bfm;
2667 ULong wmask = 0, tmask = 0;
2668 Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
2669 N, immS, immR, False, sf == 1 ? 64 : 32);
2670 if (!ok) goto after_bfm; /* hmmm */
2672 Bool is64 = sf == 1;
2673 IRType ty = is64 ? Ity_I64 : Ity_I32;
2675 IRTemp dst = newTemp(ty);
2676 IRTemp src = newTemp(ty);
2677 IRTemp bot = newTemp(ty);
2678 IRTemp top = newTemp(ty);
2679 IRTemp res = newTemp(ty);
2680 assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
2681 assign(src, getIRegOrZR(is64, nn));
2682 /* perform bitfield move on low bits */
2683 assign(bot, binop(mkOR(ty),
2684 binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
2685 binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
2686 mkU(ty, wmask))));
2687 /* determine extension bits (sign, zero or dest register) */
2688 assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
2689 /* combine extension bits and result bits */
2690 assign(res, binop(mkOR(ty),
2691 binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
2692 binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
2693 putIRegOrZR(is64, dd, mkexpr(res));
2694 DIP("%s %s, %s, immR=%u, immS=%u\n",
2695 nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
2696 return True;
2698 after_bfm:
2700 /* ---------------------- EXTR ---------------------- */
2701 /* 30 28 22 20 15 9 4
2702 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6
2703 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
2705 if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
2706 Bool is64 = INSN(31,31) == 1;
2707 UInt mm = INSN(20,16);
2708 UInt imm6 = INSN(15,10);
2709 UInt nn = INSN(9,5);
2710 UInt dd = INSN(4,0);
2711 Bool valid = True;
2712 if (INSN(31,31) != INSN(22,22))
2713 valid = False;
2714 if (!is64 && imm6 >= 32)
2715 valid = False;
2716 if (!valid) goto after_extr;
2717 IRType ty = is64 ? Ity_I64 : Ity_I32;
2718 IRTemp srcHi = newTemp(ty);
2719 IRTemp srcLo = newTemp(ty);
2720 IRTemp res = newTemp(ty);
2721 assign(srcHi, getIRegOrZR(is64, nn));
2722 assign(srcLo, getIRegOrZR(is64, mm));
2723 if (imm6 == 0) {
2724 assign(res, mkexpr(srcLo));
2725 } else {
2726 UInt szBits = 8 * sizeofIRType(ty);
2727 vassert(imm6 > 0 && imm6 < szBits);
2728 assign(res, binop(mkOR(ty),
2729 binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
2730 binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
2732 putIRegOrZR(is64, dd, mkexpr(res));
2733 DIP("extr %s, %s, %s, #%u\n",
2734 nameIRegOrZR(is64,dd),
2735 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
2736 return True;
2738 after_extr:
2740 if (sigill_diag) {
2741 vex_printf("ARM64 front end: data_processing_immediate\n");
2743 return False;
2744 # undef INSN
2748 /*------------------------------------------------------------*/
2749 /*--- Data processing (register) instructions ---*/
2750 /*------------------------------------------------------------*/
2752 static const HChar* nameSH ( UInt sh ) {
2753 switch (sh) {
2754 case 0: return "lsl";
2755 case 1: return "lsr";
2756 case 2: return "asr";
2757 case 3: return "ror";
2758 default: vassert(0);
2762 /* Generate IR to get a register value, possibly shifted by an
2763 immediate. Returns either a 32- or 64-bit temporary holding the
2764 result. After the shift, the value can optionally be NOT-ed
2765 too.
2767 sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be
2768 in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR
2769 isn't allowed, but it's the job of the caller to check that.
2771 static IRTemp getShiftedIRegOrZR ( Bool is64,
2772 UInt sh_how, UInt sh_amt, UInt regNo,
2773 Bool invert )
2775 vassert(sh_how < 4);
2776 vassert(sh_amt < (is64 ? 64 : 32));
2777 IRType ty = is64 ? Ity_I64 : Ity_I32;
2778 IRTemp t0 = newTemp(ty);
2779 assign(t0, getIRegOrZR(is64, regNo));
2780 IRTemp t1 = newTemp(ty);
2781 switch (sh_how) {
2782 case BITS2(0,0):
2783 assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
2784 break;
2785 case BITS2(0,1):
2786 assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
2787 break;
2788 case BITS2(1,0):
2789 assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
2790 break;
2791 case BITS2(1,1):
2792 assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
2793 break;
2794 default:
2795 vassert(0);
2797 if (invert) {
2798 IRTemp t2 = newTemp(ty);
2799 assign(t2, unop(mkNOT(ty), mkexpr(t1)));
2800 return t2;
2801 } else {
2802 return t1;
2807 static
2808 Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres,
2809 UInt insn, Bool sigill_diag)
2811 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2813 /* ------------------- ADD/SUB(reg) ------------------- */
2814 /* x==0 => 32 bit op x==1 => 64 bit op
2815 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
2817 31 30 29 28 23 21 20 15 9 4
2818 | | | | | | | | | |
2819 x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6)
2820 x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6)
2821 x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6)
2822 x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6)
2824 if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
2825 UInt bX = INSN(31,31);
2826 UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */
2827 UInt bS = INSN(29, 29); /* set flags? */
2828 UInt sh = INSN(23,22);
2829 UInt rM = INSN(20,16);
2830 UInt imm6 = INSN(15,10);
2831 UInt rN = INSN(9,5);
2832 UInt rD = INSN(4,0);
2833 Bool isSUB = bOP == 1;
2834 Bool is64 = bX == 1;
2835 IRType ty = is64 ? Ity_I64 : Ity_I32;
2836 if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) {
2837 /* invalid; fall through */
2838 } else {
2839 IRTemp argL = newTemp(ty);
2840 assign(argL, getIRegOrZR(is64, rN));
2841 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
2842 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2843 IRTemp res = newTemp(ty);
2844 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2845 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2846 if (bS) {
2847 setFlags_ADD_SUB(is64, isSUB, argL, argR);
2849 DIP("%s%s %s, %s, %s, %s #%u\n",
2850 bOP ? "sub" : "add", bS ? "s" : "",
2851 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2852 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2853 return True;
2857 /* ------------------- ADC/SBC(reg) ------------------- */
2858 /* x==0 => 32 bit op x==1 => 64 bit op
2860 31 30 29 28 23 21 20 15 9 4
2861 | | | | | | | | | |
2862 x 0 0 11010 00 0 Rm 000000 Rn Rd ADC Rd,Rn,Rm
2863 x 0 1 11010 00 0 Rm 000000 Rn Rd ADCS Rd,Rn,Rm
2864 x 1 0 11010 00 0 Rm 000000 Rn Rd SBC Rd,Rn,Rm
2865 x 1 1 11010 00 0 Rm 000000 Rn Rd SBCS Rd,Rn,Rm
2868 if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) {
2869 UInt bX = INSN(31,31);
2870 UInt bOP = INSN(30,30); /* 0: ADC, 1: SBC */
2871 UInt bS = INSN(29,29); /* set flags */
2872 UInt rM = INSN(20,16);
2873 UInt rN = INSN(9,5);
2874 UInt rD = INSN(4,0);
2876 Bool isSUB = bOP == 1;
2877 Bool is64 = bX == 1;
2878 IRType ty = is64 ? Ity_I64 : Ity_I32;
2880 IRTemp oldC = newTemp(ty);
2881 assign(oldC,
2882 is64 ? mk_arm64g_calculate_flag_c()
2883 : unop(Iop_64to32, mk_arm64g_calculate_flag_c()) );
2885 IRTemp argL = newTemp(ty);
2886 assign(argL, getIRegOrZR(is64, rN));
2887 IRTemp argR = newTemp(ty);
2888 assign(argR, getIRegOrZR(is64, rM));
2890 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2891 IRTemp res = newTemp(ty);
2892 if (isSUB) {
2893 IRExpr* one = is64 ? mkU64(1) : mkU32(1);
2894 IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32;
2895 assign(res,
2896 binop(op,
2897 binop(op, mkexpr(argL), mkexpr(argR)),
2898 binop(xorOp, mkexpr(oldC), one)));
2899 } else {
2900 assign(res,
2901 binop(op,
2902 binop(op, mkexpr(argL), mkexpr(argR)),
2903 mkexpr(oldC)));
2906 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2908 if (bS) {
2909 setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC);
2912 DIP("%s%s %s, %s, %s\n",
2913 bOP ? "sbc" : "adc", bS ? "s" : "",
2914 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2915 nameIRegOrZR(is64, rM));
2916 return True;
2919 /* -------------------- LOGIC(reg) -------------------- */
2920 /* x==0 => 32 bit op x==1 => 64 bit op
2921 N==0 => inv? is no-op (no inversion)
2922 N==1 => inv? is NOT
2923 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
2925 31 30 28 23 21 20 15 9 4
2926 | | | | | | | | |
2927 x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6))
2928 x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6))
2929 x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6))
2930 x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6))
2931 With N=1, the names are: BIC ORN EON BICS
2933 if (INSN(28,24) == BITS5(0,1,0,1,0)) {
2934 UInt bX = INSN(31,31);
2935 UInt sh = INSN(23,22);
2936 UInt bN = INSN(21,21);
2937 UInt rM = INSN(20,16);
2938 UInt imm6 = INSN(15,10);
2939 UInt rN = INSN(9,5);
2940 UInt rD = INSN(4,0);
2941 Bool is64 = bX == 1;
2942 IRType ty = is64 ? Ity_I64 : Ity_I32;
2943 if (!is64 && imm6 > 31) {
2944 /* invalid; fall though */
2945 } else {
2946 IRTemp argL = newTemp(ty);
2947 assign(argL, getIRegOrZR(is64, rN));
2948 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
2949 IROp op = Iop_INVALID;
2950 switch (INSN(30,29)) {
2951 case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
2952 case BITS2(0,1): op = mkOR(ty); break;
2953 case BITS2(1,0): op = mkXOR(ty); break;
2954 default: vassert(0);
2956 IRTemp res = newTemp(ty);
2957 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2958 if (INSN(30,29) == BITS2(1,1)) {
2959 setFlags_LOGIC(is64, res);
2961 putIRegOrZR(is64, rD, mkexpr(res));
2963 static const HChar* names_op[8]
2964 = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
2965 vassert(((bN << 2) | INSN(30,29)) < 8);
2966 const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)];
2967 /* Special-case the printing of "MOV" */
2968 if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) {
2969 DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
2970 nameIRegOrZR(is64, rM));
2971 } else {
2972 DIP("%s %s, %s, %s, %s #%u\n", nm_op,
2973 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2974 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2976 return True;
2980 /* -------------------- {U,S}MULH -------------------- */
2981 /* 31 23 22 20 15 9 4
2982 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm
2983 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm
2985 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
2986 && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
2987 Bool isU = INSN(23,23) == 1;
2988 UInt mm = INSN(20,16);
2989 UInt nn = INSN(9,5);
2990 UInt dd = INSN(4,0);
2991 putIReg64orZR(dd, unop(Iop_128HIto64,
2992 binop(isU ? Iop_MullU64 : Iop_MullS64,
2993 getIReg64orZR(nn), getIReg64orZR(mm))));
2994 DIP("%cmulh %s, %s, %s\n",
2995 isU ? 'u' : 's',
2996 nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
2997 return True;
3000 /* -------------------- M{ADD,SUB} -------------------- */
3001 /* 31 30 20 15 14 9 4
3002 sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n
3003 sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n
3005 if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
3006 Bool is64 = INSN(31,31) == 1;
3007 UInt mm = INSN(20,16);
3008 Bool isAdd = INSN(15,15) == 0;
3009 UInt aa = INSN(14,10);
3010 UInt nn = INSN(9,5);
3011 UInt dd = INSN(4,0);
3012 if (is64) {
3013 putIReg64orZR(
3015 binop(isAdd ? Iop_Add64 : Iop_Sub64,
3016 getIReg64orZR(aa),
3017 binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
3018 } else {
3019 putIReg32orZR(
3021 binop(isAdd ? Iop_Add32 : Iop_Sub32,
3022 getIReg32orZR(aa),
3023 binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
3025 DIP("%s %s, %s, %s, %s\n",
3026 isAdd ? "madd" : "msub",
3027 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3028 nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
3029 return True;
3032 /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
3033 /* 31 30 28 20 15 11 9 4
3034 sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm
3035 sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm
3036 sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm
3037 sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm
3038 In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
3040 if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
3041 Bool is64 = INSN(31,31) == 1;
3042 UInt b30 = INSN(30,30);
3043 UInt mm = INSN(20,16);
3044 UInt cond = INSN(15,12);
3045 UInt b10 = INSN(10,10);
3046 UInt nn = INSN(9,5);
3047 UInt dd = INSN(4,0);
3048 UInt op = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */
3049 IRType ty = is64 ? Ity_I64 : Ity_I32;
3050 IRExpr* argL = getIRegOrZR(is64, nn);
3051 IRExpr* argR = getIRegOrZR(is64, mm);
3052 switch (op) {
3053 case BITS2(0,0):
3054 break;
3055 case BITS2(0,1):
3056 argR = binop(mkADD(ty), argR, mkU(ty,1));
3057 break;
3058 case BITS2(1,0):
3059 argR = unop(mkNOT(ty), argR);
3060 break;
3061 case BITS2(1,1):
3062 argR = binop(mkSUB(ty), mkU(ty,0), argR);
3063 break;
3064 default:
3065 vassert(0);
3067 putIRegOrZR(
3068 is64, dd,
3069 IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
3070 argL, argR)
3072 const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
3073 DIP("%s %s, %s, %s, %s\n", op_nm[op],
3074 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3075 nameIRegOrZR(is64, mm), nameCC(cond));
3076 return True;
3079 /* -------------- ADD/SUB(extended reg) -------------- */
3080 /* 28 20 15 12 9 4
3081 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld
3082 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld
3084 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld
3085 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld
3087 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld
3088 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld
3090 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld
3091 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld
3093 The 'm' operand is extended per opt, thusly:
3095 000 Xm & 0xFF UXTB
3096 001 Xm & 0xFFFF UXTH
3097 010 Xm & (2^32)-1 UXTW
3098 011 Xm UXTX
3100 100 Xm sx from bit 7 SXTB
3101 101 Xm sx from bit 15 SXTH
3102 110 Xm sx from bit 31 SXTW
3103 111 Xm SXTX
3105 In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
3106 operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX
3107 are the identity operation on Wm.
3109 After extension, the value is shifted left by imm3 bits, which
3110 may only be in the range 0 .. 4 inclusive.
3112 if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
3113 Bool is64 = INSN(31,31) == 1;
3114 Bool isSub = INSN(30,30) == 1;
3115 Bool setCC = INSN(29,29) == 1;
3116 UInt mm = INSN(20,16);
3117 UInt opt = INSN(15,13);
3118 UInt imm3 = INSN(12,10);
3119 UInt nn = INSN(9,5);
3120 UInt dd = INSN(4,0);
3121 const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
3122 "sxtb", "sxth", "sxtw", "sxtx" };
3123 /* Do almost the same thing in the 32- and 64-bit cases. */
3124 IRTemp xN = newTemp(Ity_I64);
3125 IRTemp xM = newTemp(Ity_I64);
3126 assign(xN, getIReg64orSP(nn));
3127 assign(xM, getIReg64orZR(mm));
3128 IRExpr* xMw = mkexpr(xM); /* "xM widened" */
3129 Int shSX = 0;
3130 /* widen Xm .. */
3131 switch (opt) {
3132 case BITS3(0,0,0): // UXTB
3133 xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
3134 case BITS3(0,0,1): // UXTH
3135 xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
3136 case BITS3(0,1,0): // UXTW -- noop for the 32bit case
3137 if (is64) {
3138 xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
3140 break;
3141 case BITS3(0,1,1): // UXTX -- always a noop
3142 break;
3143 case BITS3(1,0,0): // SXTB
3144 shSX = 56; goto sxTo64;
3145 case BITS3(1,0,1): // SXTH
3146 shSX = 48; goto sxTo64;
3147 case BITS3(1,1,0): // SXTW -- noop for the 32bit case
3148 if (is64) {
3149 shSX = 32; goto sxTo64;
3151 break;
3152 case BITS3(1,1,1): // SXTX -- always a noop
3153 break;
3154 sxTo64:
3155 vassert(shSX >= 32);
3156 xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
3157 mkU8(shSX));
3158 break;
3159 default:
3160 vassert(0);
3162 /* and now shift */
3163 IRTemp argL = xN;
3164 IRTemp argR = newTemp(Ity_I64);
3165 assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
3166 IRTemp res = newTemp(Ity_I64);
3167 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
3168 mkexpr(argL), mkexpr(argR)));
3169 if (is64) {
3170 if (setCC) {
3171 putIReg64orZR(dd, mkexpr(res));
3172 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
3173 } else {
3174 putIReg64orSP(dd, mkexpr(res));
3176 } else {
3177 if (setCC) {
3178 IRTemp argL32 = newTemp(Ity_I32);
3179 IRTemp argR32 = newTemp(Ity_I32);
3180 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
3181 assign(argL32, unop(Iop_64to32, mkexpr(argL)));
3182 assign(argR32, unop(Iop_64to32, mkexpr(argR)));
3183 setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32);
3184 } else {
3185 putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
3188 DIP("%s%s %s, %s, %s %s lsl %u\n",
3189 isSub ? "sub" : "add", setCC ? "s" : "",
3190 setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
3191 nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
3192 nameExt[opt], imm3);
3193 return True;
3196 /* ---------------- CCMP/CCMN(imm) ---------------- */
3197 /* Bizarrely, these appear in the "data processing register"
3198 category, even though they are operations against an
3199 immediate. */
3200 /* 31 29 20 15 11 9 3
3201 sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond
3202 sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond
3204 Operation is:
3205 (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
3206 (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
3208 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3209 && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
3210 Bool is64 = INSN(31,31) == 1;
3211 Bool isSUB = INSN(30,30) == 1;
3212 UInt imm5 = INSN(20,16);
3213 UInt cond = INSN(15,12);
3214 UInt nn = INSN(9,5);
3215 UInt nzcv = INSN(3,0);
3217 IRTemp condT = newTemp(Ity_I1);
3218 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3220 IRType ty = is64 ? Ity_I64 : Ity_I32;
3221 IRTemp argL = newTemp(ty);
3222 IRTemp argR = newTemp(ty);
3224 if (is64) {
3225 assign(argL, getIReg64orZR(nn));
3226 assign(argR, mkU64(imm5));
3227 } else {
3228 assign(argL, getIReg32orZR(nn));
3229 assign(argR, mkU32(imm5));
3231 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3233 DIP("ccm%c %s, #%u, #%u, %s\n",
3234 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3235 imm5, nzcv, nameCC(cond));
3236 return True;
3239 /* ---------------- CCMP/CCMN(reg) ---------------- */
3240 /* 31 29 20 15 11 9 3
3241 sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond
3242 sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond
3243 Operation is:
3244 (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
3245 (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
3247 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3248 && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
3249 Bool is64 = INSN(31,31) == 1;
3250 Bool isSUB = INSN(30,30) == 1;
3251 UInt mm = INSN(20,16);
3252 UInt cond = INSN(15,12);
3253 UInt nn = INSN(9,5);
3254 UInt nzcv = INSN(3,0);
3256 IRTemp condT = newTemp(Ity_I1);
3257 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3259 IRType ty = is64 ? Ity_I64 : Ity_I32;
3260 IRTemp argL = newTemp(ty);
3261 IRTemp argR = newTemp(ty);
3263 if (is64) {
3264 assign(argL, getIReg64orZR(nn));
3265 assign(argR, getIReg64orZR(mm));
3266 } else {
3267 assign(argL, getIReg32orZR(nn));
3268 assign(argR, getIReg32orZR(mm));
3270 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3272 DIP("ccm%c %s, %s, #%u, %s\n",
3273 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3274 nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
3275 return True;
3279 /* -------------- REV/REV16/REV32/RBIT -------------- */
3280 /* 31 30 28 20 15 11 9 4
3282 1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn
3283 0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn
3285 1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn
3286 0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn
3288 1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn
3289 0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn
3291 1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn
3293 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3294 && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
3295 UInt b31 = INSN(31,31);
3296 UInt opc = INSN(11,10);
3298 UInt ix = 0;
3299 /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
3300 else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
3301 else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
3302 else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
3303 else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
3304 else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
3305 else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
3306 if (ix >= 1 && ix <= 7) {
3307 Bool is64 = ix == 1 || ix == 3 || ix == 5 || ix == 7;
3308 UInt nn = INSN(9,5);
3309 UInt dd = INSN(4,0);
3310 IRTemp src = newTemp(Ity_I64);
3311 IRTemp dst = IRTemp_INVALID;
3312 IRTemp (*math)(IRTemp) = NULL;
3313 switch (ix) {
3314 case 1: case 2: math = math_BYTESWAP64; break;
3315 case 3: case 4: math = math_BITSWAP64; break;
3316 case 5: case 6: math = math_USHORTSWAP64; break;
3317 case 7: math = math_UINTSWAP64; break;
3318 default: vassert(0);
3320 const HChar* names[7]
3321 = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
3322 const HChar* nm = names[ix-1];
3323 vassert(math);
3324 if (ix == 6) {
3325 /* This has to be special cased, since the logic below doesn't
3326 handle it correctly. */
3327 assign(src, getIReg64orZR(nn));
3328 dst = math(src);
3329 putIReg64orZR(dd,
3330 unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
3331 } else if (is64) {
3332 assign(src, getIReg64orZR(nn));
3333 dst = math(src);
3334 putIReg64orZR(dd, mkexpr(dst));
3335 } else {
3336 assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
3337 dst = math(src);
3338 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3340 DIP("%s %s, %s\n", nm,
3341 nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
3342 return True;
3344 /* else fall through */
3347 /* -------------------- CLZ/CLS -------------------- */
3348 /* 30 28 24 20 15 9 4
3349 sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn
3350 sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn
3352 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3353 && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
3354 Bool is64 = INSN(31,31) == 1;
3355 Bool isCLS = INSN(10,10) == 1;
3356 UInt nn = INSN(9,5);
3357 UInt dd = INSN(4,0);
3358 IRTemp src = newTemp(Ity_I64);
3359 IRTemp srcZ = newTemp(Ity_I64);
3360 IRTemp dst = newTemp(Ity_I64);
3361 /* Get the argument, widened out to 64 bit */
3362 if (is64) {
3363 assign(src, getIReg64orZR(nn));
3364 } else {
3365 assign(src, binop(Iop_Shl64,
3366 unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
3368 /* If this is CLS, mash the arg around accordingly */
3369 if (isCLS) {
3370 IRExpr* one = mkU8(1);
3371 assign(srcZ,
3372 binop(Iop_Xor64,
3373 binop(Iop_Shl64, mkexpr(src), one),
3374 binop(Iop_Shl64, binop(Iop_Shr64, mkexpr(src), one), one)));
3375 } else {
3376 assign(srcZ, mkexpr(src));
3378 /* And compute CLZ. */
3379 if (is64) {
3380 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3381 mkU64(isCLS ? 63 : 64),
3382 unop(Iop_Clz64, mkexpr(srcZ))));
3383 putIReg64orZR(dd, mkexpr(dst));
3384 } else {
3385 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3386 mkU64(isCLS ? 31 : 32),
3387 unop(Iop_Clz64, mkexpr(srcZ))));
3388 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3390 DIP("cl%c %s, %s\n", isCLS ? 's' : 'z',
3391 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
3392 return True;
3395 /* ------------------ LSLV/LSRV/ASRV/RORV ------------------ */
3396 /* 30 28 20 15 11 9 4
3397 sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm
3398 sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm
3399 sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm
3400 sf 00 1101 0110 m 0010 11 n d RORV Rd,Rn,Rm
3402 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3403 && INSN(15,12) == BITS4(0,0,1,0)) {
3404 Bool is64 = INSN(31,31) == 1;
3405 UInt mm = INSN(20,16);
3406 UInt op = INSN(11,10);
3407 UInt nn = INSN(9,5);
3408 UInt dd = INSN(4,0);
3409 IRType ty = is64 ? Ity_I64 : Ity_I32;
3410 IRTemp srcL = newTemp(ty);
3411 IRTemp srcR = newTemp(Ity_I64);
3412 IRTemp res = newTemp(ty);
3413 IROp iop = Iop_INVALID;
3414 assign(srcL, getIRegOrZR(is64, nn));
3415 assign(srcR, binop(Iop_And64, getIReg64orZR(mm),
3416 mkU64(is64 ? 63 : 31)));
3417 if (op < 3) {
3418 // LSLV, LSRV, ASRV
3419 switch (op) {
3420 case BITS2(0,0): iop = mkSHL(ty); break;
3421 case BITS2(0,1): iop = mkSHR(ty); break;
3422 case BITS2(1,0): iop = mkSAR(ty); break;
3423 default: vassert(0);
3425 assign(res, binop(iop, mkexpr(srcL),
3426 unop(Iop_64to8, mkexpr(srcR))));
3427 } else {
3428 // RORV
3429 IROp opSHL = mkSHL(ty);
3430 IROp opSHR = mkSHR(ty);
3431 IROp opOR = mkOR(ty);
3432 IRExpr* width = mkU64(is64 ? 64: 32);
3433 assign(
3434 res,
3435 IRExpr_ITE(
3436 binop(Iop_CmpEQ64, mkexpr(srcR), mkU64(0)),
3437 mkexpr(srcL),
3438 binop(opOR,
3439 binop(opSHL,
3440 mkexpr(srcL),
3441 unop(Iop_64to8, binop(Iop_Sub64, width,
3442 mkexpr(srcR)))),
3443 binop(opSHR,
3444 mkexpr(srcL), unop(Iop_64to8, mkexpr(srcR))))
3447 putIRegOrZR(is64, dd, mkexpr(res));
3448 vassert(op < 4);
3449 const HChar* names[4] = { "lslv", "lsrv", "asrv", "rorv" };
3450 DIP("%s %s, %s, %s\n",
3451 names[op], nameIRegOrZR(is64,dd),
3452 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
3453 return True;
3456 /* -------------------- SDIV/UDIV -------------------- */
3457 /* 30 28 20 15 10 9 4
3458 sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm
3459 sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm
3461 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3462 && INSN(15,11) == BITS5(0,0,0,0,1)) {
3463 Bool is64 = INSN(31,31) == 1;
3464 UInt mm = INSN(20,16);
3465 Bool isS = INSN(10,10) == 1;
3466 UInt nn = INSN(9,5);
3467 UInt dd = INSN(4,0);
3468 if (isS) {
3469 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
3470 getIRegOrZR(is64, nn),
3471 getIRegOrZR(is64, mm)));
3472 } else {
3473 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
3474 getIRegOrZR(is64, nn),
3475 getIRegOrZR(is64, mm)));
3477 DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
3478 nameIRegOrZR(is64, dd),
3479 nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
3480 return True;
3483 /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
3484 /* 31 23 20 15 14 9 4
3485 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa
3486 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa
3487 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa
3488 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa
3489 with operation
3490 Xd = Xa +/- (Wn *u/s Wm)
3492 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
3493 Bool isU = INSN(23,23) == 1;
3494 UInt mm = INSN(20,16);
3495 Bool isAdd = INSN(15,15) == 0;
3496 UInt aa = INSN(14,10);
3497 UInt nn = INSN(9,5);
3498 UInt dd = INSN(4,0);
3499 IRTemp wN = newTemp(Ity_I32);
3500 IRTemp wM = newTemp(Ity_I32);
3501 IRTemp xA = newTemp(Ity_I64);
3502 IRTemp muld = newTemp(Ity_I64);
3503 IRTemp res = newTemp(Ity_I64);
3504 assign(wN, getIReg32orZR(nn));
3505 assign(wM, getIReg32orZR(mm));
3506 assign(xA, getIReg64orZR(aa));
3507 assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
3508 mkexpr(wN), mkexpr(wM)));
3509 assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
3510 mkexpr(xA), mkexpr(muld)));
3511 putIReg64orZR(dd, mkexpr(res));
3512 DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
3513 nameIReg64orZR(dd), nameIReg32orZR(nn),
3514 nameIReg32orZR(mm), nameIReg64orZR(aa));
3515 return True;
3518 /* -------------------- CRC32/CRC32C -------------------- */
3519 /* 31 30 20 15 11 9 4
3520 sf 00 1101 0110 m 0100 sz n d CRC32<sz> Wd, Wn, Wm|Xm
3521 sf 00 1101 0110 m 0101 sz n d CRC32C<sz> Wd, Wn, Wm|Xm
3523 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3524 && INSN(15,13) == BITS3(0,1,0)) {
3525 UInt bitSF = INSN(31,31);
3526 UInt mm = INSN(20,16);
3527 UInt bitC = INSN(12,12);
3528 UInt sz = INSN(11,10);
3529 UInt nn = INSN(9,5);
3530 UInt dd = INSN(4,0);
3531 vassert(sz >= 0 && sz <= 3);
3532 if ((bitSF == 0 && sz <= BITS2(1,0))
3533 || (bitSF == 1 && sz == BITS2(1,1))) {
3534 UInt ix = (bitC == 1 ? 4 : 0) | sz;
3535 void* helpers[8]
3536 = { &arm64g_calc_crc32b, &arm64g_calc_crc32h,
3537 &arm64g_calc_crc32w, &arm64g_calc_crc32x,
3538 &arm64g_calc_crc32cb, &arm64g_calc_crc32ch,
3539 &arm64g_calc_crc32cw, &arm64g_calc_crc32cx };
3540 const HChar* hNames[8]
3541 = { "arm64g_calc_crc32b", "arm64g_calc_crc32h",
3542 "arm64g_calc_crc32w", "arm64g_calc_crc32x",
3543 "arm64g_calc_crc32cb", "arm64g_calc_crc32ch",
3544 "arm64g_calc_crc32cw", "arm64g_calc_crc32cx" };
3545 const HChar* iNames[8]
3546 = { "crc32b", "crc32h", "crc32w", "crc32x",
3547 "crc32cb", "crc32ch", "crc32cw", "crc32cx" };
3549 IRTemp srcN = newTemp(Ity_I64);
3550 assign(srcN, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
3552 IRTemp srcM = newTemp(Ity_I64);
3553 IRExpr* at64 = getIReg64orZR(mm);
3554 switch (sz) {
3555 case BITS2(0,0):
3556 assign(srcM, binop(Iop_And64, at64, mkU64(0xFF))); break;
3557 case BITS2(0,1):
3558 assign(srcM, binop(Iop_And64, at64, mkU64(0xFFFF))); break;
3559 case BITS2(1,0):
3560 assign(srcM, binop(Iop_And64, at64, mkU64(0xFFFFFFFF))); break;
3561 case BITS2(1,1):
3562 assign(srcM, at64); break;
3563 default:
3564 vassert(0);
3567 vassert(ix >= 0 && ix <= 7);
3569 putIReg64orZR(
3571 unop(Iop_32Uto64,
3572 unop(Iop_64to32,
3573 mkIRExprCCall(Ity_I64, 0/*regparm*/,
3574 hNames[ix], helpers[ix],
3575 mkIRExprVec_2(mkexpr(srcN),
3576 mkexpr(srcM))))));
3578 DIP("%s %s, %s, %s\n", iNames[ix],
3579 nameIReg32orZR(dd),
3580 nameIReg32orZR(nn), nameIRegOrZR(bitSF == 1, mm));
3581 return True;
3583 /* fall through */
3586 if (sigill_diag) {
3587 vex_printf("ARM64 front end: data_processing_register\n");
3589 return False;
3590 # undef INSN
3594 /*------------------------------------------------------------*/
3595 /*--- Math helpers for vector interleave/deinterleave ---*/
3596 /*------------------------------------------------------------*/
3598 #define EX(_tmp) \
3599 mkexpr(_tmp)
3600 #define SL(_hi128,_lo128,_nbytes) \
3601 ( (_nbytes) == 0 \
3602 ? (_lo128) \
3603 : triop(Iop_SliceV128,(_hi128),(_lo128),mkU8(_nbytes)) )
3604 #define ROR(_v128,_nbytes) \
3605 SL((_v128),(_v128),(_nbytes))
3606 #define ROL(_v128,_nbytes) \
3607 SL((_v128),(_v128),16-(_nbytes))
3608 #define SHR(_v128,_nbytes) \
3609 binop(Iop_ShrV128,(_v128),mkU8(8*(_nbytes)))
3610 #define SHL(_v128,_nbytes) \
3611 binop(Iop_ShlV128,(_v128),mkU8(8*(_nbytes)))
3612 #define ILO64x2(_argL,_argR) \
3613 binop(Iop_InterleaveLO64x2,(_argL),(_argR))
3614 #define IHI64x2(_argL,_argR) \
3615 binop(Iop_InterleaveHI64x2,(_argL),(_argR))
3616 #define ILO32x4(_argL,_argR) \
3617 binop(Iop_InterleaveLO32x4,(_argL),(_argR))
3618 #define IHI32x4(_argL,_argR) \
3619 binop(Iop_InterleaveHI32x4,(_argL),(_argR))
3620 #define ILO16x8(_argL,_argR) \
3621 binop(Iop_InterleaveLO16x8,(_argL),(_argR))
3622 #define IHI16x8(_argL,_argR) \
3623 binop(Iop_InterleaveHI16x8,(_argL),(_argR))
3624 #define ILO8x16(_argL,_argR) \
3625 binop(Iop_InterleaveLO8x16,(_argL),(_argR))
3626 #define IHI8x16(_argL,_argR) \
3627 binop(Iop_InterleaveHI8x16,(_argL),(_argR))
3628 #define CEV32x4(_argL,_argR) \
3629 binop(Iop_CatEvenLanes32x4,(_argL),(_argR))
3630 #define COD32x4(_argL,_argR) \
3631 binop(Iop_CatOddLanes32x4,(_argL),(_argR))
3632 #define COD16x8(_argL,_argR) \
3633 binop(Iop_CatOddLanes16x8,(_argL),(_argR))
3634 #define COD8x16(_argL,_argR) \
3635 binop(Iop_CatOddLanes8x16,(_argL),(_argR))
3636 #define CEV8x16(_argL,_argR) \
3637 binop(Iop_CatEvenLanes8x16,(_argL),(_argR))
3638 #define AND(_arg1,_arg2) \
3639 binop(Iop_AndV128,(_arg1),(_arg2))
3640 #define OR2(_arg1,_arg2) \
3641 binop(Iop_OrV128,(_arg1),(_arg2))
3642 #define OR3(_arg1,_arg2,_arg3) \
3643 binop(Iop_OrV128,(_arg1),binop(Iop_OrV128,(_arg2),(_arg3)))
3644 #define OR4(_arg1,_arg2,_arg3,_arg4) \
3645 binop(Iop_OrV128, \
3646 binop(Iop_OrV128,(_arg1),(_arg2)), \
3647 binop(Iop_OrV128,(_arg3),(_arg4)))
3650 /* Do interleaving for 1 128 bit vector, for ST1 insns. */
3651 static
3652 void math_INTERLEAVE1_128( /*OUTx1*/ IRTemp* i0,
3653 UInt laneSzBlg2, IRTemp u0 )
3655 assign(*i0, mkexpr(u0));
3659 /* Do interleaving for 2 128 bit vectors, for ST2 insns. */
3660 static
3661 void math_INTERLEAVE2_128( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
3662 UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
3664 /* This is pretty easy, since we have primitives directly to
3665 hand. */
3666 if (laneSzBlg2 == 3) {
3667 // 64x2
3668 // u1 == B1 B0, u0 == A1 A0
3669 // i1 == B1 A1, i0 == B0 A0
3670 assign(*i0, binop(Iop_InterleaveLO64x2, mkexpr(u1), mkexpr(u0)));
3671 assign(*i1, binop(Iop_InterleaveHI64x2, mkexpr(u1), mkexpr(u0)));
3672 return;
3674 if (laneSzBlg2 == 2) {
3675 // 32x4
3676 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3677 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3678 assign(*i0, binop(Iop_InterleaveLO32x4, mkexpr(u1), mkexpr(u0)));
3679 assign(*i1, binop(Iop_InterleaveHI32x4, mkexpr(u1), mkexpr(u0)));
3680 return;
3682 if (laneSzBlg2 == 1) {
3683 // 16x8
3684 // u1 == B{7..0}, u0 == A{7..0}
3685 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3686 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3687 assign(*i0, binop(Iop_InterleaveLO16x8, mkexpr(u1), mkexpr(u0)));
3688 assign(*i1, binop(Iop_InterleaveHI16x8, mkexpr(u1), mkexpr(u0)));
3689 return;
3691 if (laneSzBlg2 == 0) {
3692 // 8x16
3693 // u1 == B{f..0}, u0 == A{f..0}
3694 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3695 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3696 assign(*i0, binop(Iop_InterleaveLO8x16, mkexpr(u1), mkexpr(u0)));
3697 assign(*i1, binop(Iop_InterleaveHI8x16, mkexpr(u1), mkexpr(u0)));
3698 return;
3700 /*NOTREACHED*/
3701 vassert(0);
3705 /* Do interleaving for 3 128 bit vectors, for ST3 insns. */
3706 static
3707 void math_INTERLEAVE3_128(
3708 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
3709 UInt laneSzBlg2,
3710 IRTemp u0, IRTemp u1, IRTemp u2 )
3712 if (laneSzBlg2 == 3) {
3713 // 64x2
3714 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
3715 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
3716 assign(*i2, IHI64x2( EX(u2), EX(u1) ));
3717 assign(*i1, ILO64x2( ROR(EX(u0),8), EX(u2) ));
3718 assign(*i0, ILO64x2( EX(u1), EX(u0) ));
3719 return;
3722 if (laneSzBlg2 == 2) {
3723 // 32x4
3724 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
3725 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
3726 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
3727 IRTemp p0 = newTempV128();
3728 IRTemp p1 = newTempV128();
3729 IRTemp p2 = newTempV128();
3730 IRTemp c1100 = newTempV128();
3731 IRTemp c0011 = newTempV128();
3732 IRTemp c0110 = newTempV128();
3733 assign(c1100, mkV128(0xFF00));
3734 assign(c0011, mkV128(0x00FF));
3735 assign(c0110, mkV128(0x0FF0));
3736 // First interleave them at 64x2 granularity,
3737 // generating partial ("p") values.
3738 math_INTERLEAVE3_128(&p0, &p1, &p2, 3, u0, u1, u2);
3739 // And more shuffling around for the final answer
3740 assign(*i2, OR2( AND( IHI32x4(EX(p2), ROL(EX(p2),8)), EX(c1100) ),
3741 AND( IHI32x4(ROR(EX(p1),4), EX(p2)), EX(c0011) ) ));
3742 assign(*i1, OR3( SHL(EX(p2),12),
3743 AND(EX(p1),EX(c0110)),
3744 SHR(EX(p0),12) ));
3745 assign(*i0, OR2( AND( ILO32x4(EX(p0),ROL(EX(p1),4)), EX(c1100) ),
3746 AND( ILO32x4(ROR(EX(p0),8),EX(p0)), EX(c0011) ) ));
3747 return;
3750 if (laneSzBlg2 == 1) {
3751 // 16x8
3752 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
3753 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
3754 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
3756 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
3757 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
3758 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
3760 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
3761 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
3762 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
3763 IRTemp p0 = newTempV128();
3764 IRTemp p1 = newTempV128();
3765 IRTemp p2 = newTempV128();
3766 IRTemp c1000 = newTempV128();
3767 IRTemp c0100 = newTempV128();
3768 IRTemp c0010 = newTempV128();
3769 IRTemp c0001 = newTempV128();
3770 assign(c1000, mkV128(0xF000));
3771 assign(c0100, mkV128(0x0F00));
3772 assign(c0010, mkV128(0x00F0));
3773 assign(c0001, mkV128(0x000F));
3774 // First interleave them at 32x4 granularity,
3775 // generating partial ("p") values.
3776 math_INTERLEAVE3_128(&p0, &p1, &p2, 2, u0, u1, u2);
3777 // And more shuffling around for the final answer
3778 assign(*i2,
3779 OR4( AND( IHI16x8( EX(p2), ROL(EX(p2),4) ), EX(c1000) ),
3780 AND( IHI16x8( ROL(EX(p2),6), EX(p2) ), EX(c0100) ),
3781 AND( IHI16x8( ROL(EX(p2),2), ROL(EX(p2),6) ), EX(c0010) ),
3782 AND( ILO16x8( ROR(EX(p2),2), ROL(EX(p1),2) ), EX(c0001) )
3784 assign(*i1,
3785 OR4( AND( IHI16x8( ROL(EX(p1),4), ROR(EX(p2),2) ), EX(c1000) ),
3786 AND( IHI16x8( EX(p1), ROL(EX(p1),4) ), EX(c0100) ),
3787 AND( IHI16x8( ROL(EX(p1),4), ROL(EX(p1),8) ), EX(c0010) ),
3788 AND( IHI16x8( ROR(EX(p0),6), ROL(EX(p1),4) ), EX(c0001) )
3790 assign(*i0,
3791 OR4( AND( IHI16x8( ROR(EX(p1),2), ROL(EX(p0),2) ), EX(c1000) ),
3792 AND( IHI16x8( ROL(EX(p0),2), ROL(EX(p0),6) ), EX(c0100) ),
3793 AND( IHI16x8( ROL(EX(p0),8), ROL(EX(p0),2) ), EX(c0010) ),
3794 AND( IHI16x8( ROL(EX(p0),4), ROL(EX(p0),8) ), EX(c0001) )
3796 return;
3799 if (laneSzBlg2 == 0) {
3800 // 8x16. It doesn't seem worth the hassle of first doing a
3801 // 16x8 interleave, so just generate all 24 partial results
3802 // directly :-(
3803 // u2 == Cf .. C0, u1 == Bf .. B0, u0 == Af .. A0
3804 // i2 == Cf Bf Af Ce .. Bb Ab Ca
3805 // i1 == Ba Aa C9 B9 .. A6 C5 B5
3806 // i0 == A5 C4 B4 A4 .. C0 B0 A0
3808 IRTemp i2_FEDC = newTempV128(); IRTemp i2_BA98 = newTempV128();
3809 IRTemp i2_7654 = newTempV128(); IRTemp i2_3210 = newTempV128();
3810 IRTemp i1_FEDC = newTempV128(); IRTemp i1_BA98 = newTempV128();
3811 IRTemp i1_7654 = newTempV128(); IRTemp i1_3210 = newTempV128();
3812 IRTemp i0_FEDC = newTempV128(); IRTemp i0_BA98 = newTempV128();
3813 IRTemp i0_7654 = newTempV128(); IRTemp i0_3210 = newTempV128();
3814 IRTemp i2_hi64 = newTempV128(); IRTemp i2_lo64 = newTempV128();
3815 IRTemp i1_hi64 = newTempV128(); IRTemp i1_lo64 = newTempV128();
3816 IRTemp i0_hi64 = newTempV128(); IRTemp i0_lo64 = newTempV128();
3818 // eg XXXX(qqq, CC, 0xF, BB, 0xA)) sets qqq to be a vector
3819 // of the form 14 bytes junk : CC[0xF] : BB[0xA]
3821 # define XXXX(_tempName,_srcVec1,_srcShift1,_srcVec2,_srcShift2) \
3822 IRTemp t_##_tempName = newTempV128(); \
3823 assign(t_##_tempName, \
3824 ILO8x16( ROR(EX(_srcVec1),(_srcShift1)), \
3825 ROR(EX(_srcVec2),(_srcShift2)) ) )
3827 // Let CC, BB, AA be (handy) aliases of u2, u1, u0 respectively
3828 IRTemp CC = u2; IRTemp BB = u1; IRTemp AA = u0;
3830 // The slicing and reassembly are done as interleavedly as possible,
3831 // so as to minimise the demand for registers in the back end, which
3832 // was observed to be a problem in testing.
3834 XXXX(CfBf, CC, 0xf, BB, 0xf); // i2[15:14]
3835 XXXX(AfCe, AA, 0xf, CC, 0xe);
3836 assign(i2_FEDC, ILO16x8(EX(t_CfBf), EX(t_AfCe)));
3838 XXXX(BeAe, BB, 0xe, AA, 0xe);
3839 XXXX(CdBd, CC, 0xd, BB, 0xd);
3840 assign(i2_BA98, ILO16x8(EX(t_BeAe), EX(t_CdBd)));
3841 assign(i2_hi64, ILO32x4(EX(i2_FEDC), EX(i2_BA98)));
3843 XXXX(AdCc, AA, 0xd, CC, 0xc);
3844 XXXX(BcAc, BB, 0xc, AA, 0xc);
3845 assign(i2_7654, ILO16x8(EX(t_AdCc), EX(t_BcAc)));
3847 XXXX(CbBb, CC, 0xb, BB, 0xb);
3848 XXXX(AbCa, AA, 0xb, CC, 0xa); // i2[1:0]
3849 assign(i2_3210, ILO16x8(EX(t_CbBb), EX(t_AbCa)));
3850 assign(i2_lo64, ILO32x4(EX(i2_7654), EX(i2_3210)));
3851 assign(*i2, ILO64x2(EX(i2_hi64), EX(i2_lo64)));
3853 XXXX(BaAa, BB, 0xa, AA, 0xa); // i1[15:14]
3854 XXXX(C9B9, CC, 0x9, BB, 0x9);
3855 assign(i1_FEDC, ILO16x8(EX(t_BaAa), EX(t_C9B9)));
3857 XXXX(A9C8, AA, 0x9, CC, 0x8);
3858 XXXX(B8A8, BB, 0x8, AA, 0x8);
3859 assign(i1_BA98, ILO16x8(EX(t_A9C8), EX(t_B8A8)));
3860 assign(i1_hi64, ILO32x4(EX(i1_FEDC), EX(i1_BA98)));
3862 XXXX(C7B7, CC, 0x7, BB, 0x7);
3863 XXXX(A7C6, AA, 0x7, CC, 0x6);
3864 assign(i1_7654, ILO16x8(EX(t_C7B7), EX(t_A7C6)));
3866 XXXX(B6A6, BB, 0x6, AA, 0x6);
3867 XXXX(C5B5, CC, 0x5, BB, 0x5); // i1[1:0]
3868 assign(i1_3210, ILO16x8(EX(t_B6A6), EX(t_C5B5)));
3869 assign(i1_lo64, ILO32x4(EX(i1_7654), EX(i1_3210)));
3870 assign(*i1, ILO64x2(EX(i1_hi64), EX(i1_lo64)));
3872 XXXX(A5C4, AA, 0x5, CC, 0x4); // i0[15:14]
3873 XXXX(B4A4, BB, 0x4, AA, 0x4);
3874 assign(i0_FEDC, ILO16x8(EX(t_A5C4), EX(t_B4A4)));
3876 XXXX(C3B3, CC, 0x3, BB, 0x3);
3877 XXXX(A3C2, AA, 0x3, CC, 0x2);
3878 assign(i0_BA98, ILO16x8(EX(t_C3B3), EX(t_A3C2)));
3879 assign(i0_hi64, ILO32x4(EX(i0_FEDC), EX(i0_BA98)));
3881 XXXX(B2A2, BB, 0x2, AA, 0x2);
3882 XXXX(C1B1, CC, 0x1, BB, 0x1);
3883 assign(i0_7654, ILO16x8(EX(t_B2A2), EX(t_C1B1)));
3885 XXXX(A1C0, AA, 0x1, CC, 0x0);
3886 XXXX(B0A0, BB, 0x0, AA, 0x0); // i0[1:0]
3887 assign(i0_3210, ILO16x8(EX(t_A1C0), EX(t_B0A0)));
3888 assign(i0_lo64, ILO32x4(EX(i0_7654), EX(i0_3210)));
3889 assign(*i0, ILO64x2(EX(i0_hi64), EX(i0_lo64)));
3891 # undef XXXX
3892 return;
3895 /*NOTREACHED*/
3896 vassert(0);
3900 /* Do interleaving for 4 128 bit vectors, for ST4 insns. */
3901 static
3902 void math_INTERLEAVE4_128(
3903 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
3904 UInt laneSzBlg2,
3905 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
3907 if (laneSzBlg2 == 3) {
3908 // 64x2
3909 assign(*i0, ILO64x2(EX(u1), EX(u0)));
3910 assign(*i1, ILO64x2(EX(u3), EX(u2)));
3911 assign(*i2, IHI64x2(EX(u1), EX(u0)));
3912 assign(*i3, IHI64x2(EX(u3), EX(u2)));
3913 return;
3915 if (laneSzBlg2 == 2) {
3916 // 32x4
3917 // First, interleave at the 64-bit lane size.
3918 IRTemp p0 = newTempV128();
3919 IRTemp p1 = newTempV128();
3920 IRTemp p2 = newTempV128();
3921 IRTemp p3 = newTempV128();
3922 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 3, u0, u1, u2, u3);
3923 // And interleave (cat) at the 32 bit size.
3924 assign(*i0, CEV32x4(EX(p1), EX(p0)));
3925 assign(*i1, COD32x4(EX(p1), EX(p0)));
3926 assign(*i2, CEV32x4(EX(p3), EX(p2)));
3927 assign(*i3, COD32x4(EX(p3), EX(p2)));
3928 return;
3930 if (laneSzBlg2 == 1) {
3931 // 16x8
3932 // First, interleave at the 32-bit lane size.
3933 IRTemp p0 = newTempV128();
3934 IRTemp p1 = newTempV128();
3935 IRTemp p2 = newTempV128();
3936 IRTemp p3 = newTempV128();
3937 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 2, u0, u1, u2, u3);
3938 // And rearrange within each vector, to get the right 16 bit lanes.
3939 assign(*i0, COD16x8(EX(p0), SHL(EX(p0), 2)));
3940 assign(*i1, COD16x8(EX(p1), SHL(EX(p1), 2)));
3941 assign(*i2, COD16x8(EX(p2), SHL(EX(p2), 2)));
3942 assign(*i3, COD16x8(EX(p3), SHL(EX(p3), 2)));
3943 return;
3945 if (laneSzBlg2 == 0) {
3946 // 8x16
3947 // First, interleave at the 16-bit lane size.
3948 IRTemp p0 = newTempV128();
3949 IRTemp p1 = newTempV128();
3950 IRTemp p2 = newTempV128();
3951 IRTemp p3 = newTempV128();
3952 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 1, u0, u1, u2, u3);
3953 // And rearrange within each vector, to get the right 8 bit lanes.
3954 assign(*i0, IHI32x4(COD8x16(EX(p0),EX(p0)), CEV8x16(EX(p0),EX(p0))));
3955 assign(*i1, IHI32x4(COD8x16(EX(p1),EX(p1)), CEV8x16(EX(p1),EX(p1))));
3956 assign(*i2, IHI32x4(COD8x16(EX(p2),EX(p2)), CEV8x16(EX(p2),EX(p2))));
3957 assign(*i3, IHI32x4(COD8x16(EX(p3),EX(p3)), CEV8x16(EX(p3),EX(p3))));
3958 return;
3960 /*NOTREACHED*/
3961 vassert(0);
3965 /* Do deinterleaving for 1 128 bit vector, for LD1 insns. */
3966 static
3967 void math_DEINTERLEAVE1_128( /*OUTx1*/ IRTemp* u0,
3968 UInt laneSzBlg2, IRTemp i0 )
3970 assign(*u0, mkexpr(i0));
3974 /* Do deinterleaving for 2 128 bit vectors, for LD2 insns. */
3975 static
3976 void math_DEINTERLEAVE2_128( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
3977 UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
3979 /* This is pretty easy, since we have primitives directly to
3980 hand. */
3981 if (laneSzBlg2 == 3) {
3982 // 64x2
3983 // i1 == B1 A1, i0 == B0 A0
3984 // u1 == B1 B0, u0 == A1 A0
3985 assign(*u0, binop(Iop_InterleaveLO64x2, mkexpr(i1), mkexpr(i0)));
3986 assign(*u1, binop(Iop_InterleaveHI64x2, mkexpr(i1), mkexpr(i0)));
3987 return;
3989 if (laneSzBlg2 == 2) {
3990 // 32x4
3991 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3992 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3993 assign(*u0, binop(Iop_CatEvenLanes32x4, mkexpr(i1), mkexpr(i0)));
3994 assign(*u1, binop(Iop_CatOddLanes32x4, mkexpr(i1), mkexpr(i0)));
3995 return;
3997 if (laneSzBlg2 == 1) {
3998 // 16x8
3999 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
4000 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
4001 // u1 == B{7..0}, u0 == A{7..0}
4002 assign(*u0, binop(Iop_CatEvenLanes16x8, mkexpr(i1), mkexpr(i0)));
4003 assign(*u1, binop(Iop_CatOddLanes16x8, mkexpr(i1), mkexpr(i0)));
4004 return;
4006 if (laneSzBlg2 == 0) {
4007 // 8x16
4008 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
4009 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
4010 // u1 == B{f..0}, u0 == A{f..0}
4011 assign(*u0, binop(Iop_CatEvenLanes8x16, mkexpr(i1), mkexpr(i0)));
4012 assign(*u1, binop(Iop_CatOddLanes8x16, mkexpr(i1), mkexpr(i0)));
4013 return;
4015 /*NOTREACHED*/
4016 vassert(0);
4020 /* Do deinterleaving for 3 128 bit vectors, for LD3 insns. */
4021 static
4022 void math_DEINTERLEAVE3_128(
4023 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
4024 UInt laneSzBlg2,
4025 IRTemp i0, IRTemp i1, IRTemp i2 )
4027 if (laneSzBlg2 == 3) {
4028 // 64x2
4029 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
4030 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
4031 assign(*u2, ILO64x2( ROL(EX(i2),8), EX(i1) ));
4032 assign(*u1, ILO64x2( EX(i2), ROL(EX(i0),8) ));
4033 assign(*u0, ILO64x2( ROL(EX(i1),8), EX(i0) ));
4034 return;
4037 if (laneSzBlg2 == 2) {
4038 // 32x4
4039 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
4040 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
4041 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
4042 IRTemp t_a1c0b0a0 = newTempV128();
4043 IRTemp t_a2c1b1a1 = newTempV128();
4044 IRTemp t_a3c2b2a2 = newTempV128();
4045 IRTemp t_a0c3b3a3 = newTempV128();
4046 IRTemp p0 = newTempV128();
4047 IRTemp p1 = newTempV128();
4048 IRTemp p2 = newTempV128();
4049 // Compute some intermediate values.
4050 assign(t_a1c0b0a0, EX(i0));
4051 assign(t_a2c1b1a1, SL(EX(i1),EX(i0),3*4));
4052 assign(t_a3c2b2a2, SL(EX(i2),EX(i1),2*4));
4053 assign(t_a0c3b3a3, SL(EX(i0),EX(i2),1*4));
4054 // First deinterleave into lane-pairs
4055 assign(p0, ILO32x4(EX(t_a2c1b1a1),EX(t_a1c0b0a0)));
4056 assign(p1, ILO64x2(ILO32x4(EX(t_a0c3b3a3), EX(t_a3c2b2a2)),
4057 IHI32x4(EX(t_a2c1b1a1), EX(t_a1c0b0a0))));
4058 assign(p2, ILO32x4(ROR(EX(t_a0c3b3a3),1*4), ROR(EX(t_a3c2b2a2),1*4)));
4059 // Then deinterleave at 64x2 granularity.
4060 math_DEINTERLEAVE3_128(u0, u1, u2, 3, p0, p1, p2);
4061 return;
4064 if (laneSzBlg2 == 1) {
4065 // 16x8
4066 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
4067 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
4068 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
4070 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
4071 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
4072 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
4074 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
4075 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
4076 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
4078 IRTemp s0, s1, s2, s3, t0, t1, t2, t3, p0, p1, p2, c00111111;
4079 s0 = s1 = s2 = s3
4080 = t0 = t1 = t2 = t3 = p0 = p1 = p2 = c00111111 = IRTemp_INVALID;
4081 newTempsV128_4(&s0, &s1, &s2, &s3);
4082 newTempsV128_4(&t0, &t1, &t2, &t3);
4083 newTempsV128_4(&p0, &p1, &p2, &c00111111);
4085 // s0 == b2a2 c1b1a1 c0b0a0
4086 // s1 == b4a4 c3b3c3 c2b2a2
4087 // s2 == b6a6 c5b5a5 c4b4a4
4088 // s3 == b0a0 c7b7a7 c6b6a6
4089 assign(s0, EX(i0));
4090 assign(s1, SL(EX(i1),EX(i0),6*2));
4091 assign(s2, SL(EX(i2),EX(i1),4*2));
4092 assign(s3, SL(EX(i0),EX(i2),2*2));
4094 // t0 == 0 0 c1c0 b1b0 a1a0
4095 // t1 == 0 0 c3c2 b3b2 a3a2
4096 // t2 == 0 0 c5c4 b5b4 a5a4
4097 // t3 == 0 0 c7c6 b7b6 a7a6
4098 assign(c00111111, mkV128(0x0FFF));
4099 assign(t0, AND( ILO16x8( ROR(EX(s0),3*2), EX(s0)), EX(c00111111)));
4100 assign(t1, AND( ILO16x8( ROR(EX(s1),3*2), EX(s1)), EX(c00111111)));
4101 assign(t2, AND( ILO16x8( ROR(EX(s2),3*2), EX(s2)), EX(c00111111)));
4102 assign(t3, AND( ILO16x8( ROR(EX(s3),3*2), EX(s3)), EX(c00111111)));
4104 assign(p0, OR2(EX(t0), SHL(EX(t1),6*2)));
4105 assign(p1, OR2(SHL(EX(t2),4*2), SHR(EX(t1),2*2)));
4106 assign(p2, OR2(SHL(EX(t3),2*2), SHR(EX(t2),4*2)));
4108 // Then deinterleave at 32x4 granularity.
4109 math_DEINTERLEAVE3_128(u0, u1, u2, 2, p0, p1, p2);
4110 return;
4113 if (laneSzBlg2 == 0) {
4114 // 8x16. This is the same scheme as for 16x8, with twice the
4115 // number of intermediate values.
4117 // u2 == C{f..0}
4118 // u1 == B{f..0}
4119 // u0 == A{f..0}
4121 // i2 == CBA{f} CBA{e} CBA{d} CBA{c} CBA{b} C{a}
4122 // i1 == BA{a} CBA{9} CBA{8} CBA{7} CBA{6} CB{5}
4123 // i0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4125 // p2 == C{fe} B{fe} A{fe} C{dc} B{dc} A{dc} C{ba} B{ba}
4126 // p1 == A{ba} C{98} B{98} A{98} C{76} B{76} A{76} C{54}
4127 // p0 == B{54} A{54} C{32} B{32} A{32} C{10} B{10} A{10}
4129 IRTemp s0, s1, s2, s3, s4, s5, s6, s7,
4130 t0, t1, t2, t3, t4, t5, t6, t7, p0, p1, p2, cMASK;
4131 s0 = s1 = s2 = s3 = s4 = s5 = s6 = s7
4132 = t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = p0 = p1 = p2 = cMASK
4133 = IRTemp_INVALID;
4134 newTempsV128_4(&s0, &s1, &s2, &s3);
4135 newTempsV128_4(&s4, &s5, &s6, &s7);
4136 newTempsV128_4(&t0, &t1, &t2, &t3);
4137 newTempsV128_4(&t4, &t5, &t6, &t7);
4138 newTempsV128_4(&p0, &p1, &p2, &cMASK);
4140 // s0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4141 // s1 == A{7} CBA{6} CBA{5} CBA{4} CBA{3} CBA{2}
4142 // s2 == A{9} CBA{8} CBA{7} CBA{6} CBA{5} CBA{4}
4143 // s3 == A{b} CBA{a} CBA{9} CBA{8} CBA{7} CBA{6}
4144 // s4 == A{d} CBA{c} CBA{b} CBA{a} CBA{9} CBA{8}
4145 // s5 == A{f} CBA{e} CBA{d} CBA{c} CBA{b} CBA{a}
4146 // s6 == A{1} CBA{0} CBA{f} CBA{e} CBA{d} CBA{c}
4147 // s7 == A{3} CBA{2} CBA{1} CBA{0} CBA{f} CBA{e}
4148 assign(s0, SL(EX(i1),EX(i0), 0));
4149 assign(s1, SL(EX(i1),EX(i0), 6));
4150 assign(s2, SL(EX(i1),EX(i0),12));
4151 assign(s3, SL(EX(i2),EX(i1), 2));
4152 assign(s4, SL(EX(i2),EX(i1), 8));
4153 assign(s5, SL(EX(i2),EX(i1),14));
4154 assign(s6, SL(EX(i0),EX(i2), 4));
4155 assign(s7, SL(EX(i0),EX(i2),10));
4157 // t0 == 0--(ten)--0 C1 C0 B1 B0 A1 A0
4158 // t1 == 0--(ten)--0 C3 C2 B3 B2 A3 A2
4159 // t2 == 0--(ten)--0 C5 C4 B5 B4 A5 A4
4160 // t3 == 0--(ten)--0 C7 C6 B7 B6 A7 A6
4161 // t4 == 0--(ten)--0 C9 C8 B9 B8 A9 A8
4162 // t5 == 0--(ten)--0 Cb Ca Bb Ba Ab Aa
4163 // t6 == 0--(ten)--0 Cd Cc Bd Bc Ad Ac
4164 // t7 == 0--(ten)--0 Cf Ce Bf Be Af Ae
4165 assign(cMASK, mkV128(0x003F));
4166 assign(t0, AND( ILO8x16( ROR(EX(s0),3), EX(s0)), EX(cMASK)));
4167 assign(t1, AND( ILO8x16( ROR(EX(s1),3), EX(s1)), EX(cMASK)));
4168 assign(t2, AND( ILO8x16( ROR(EX(s2),3), EX(s2)), EX(cMASK)));
4169 assign(t3, AND( ILO8x16( ROR(EX(s3),3), EX(s3)), EX(cMASK)));
4170 assign(t4, AND( ILO8x16( ROR(EX(s4),3), EX(s4)), EX(cMASK)));
4171 assign(t5, AND( ILO8x16( ROR(EX(s5),3), EX(s5)), EX(cMASK)));
4172 assign(t6, AND( ILO8x16( ROR(EX(s6),3), EX(s6)), EX(cMASK)));
4173 assign(t7, AND( ILO8x16( ROR(EX(s7),3), EX(s7)), EX(cMASK)));
4175 assign(p0, OR3( SHL(EX(t2),12), SHL(EX(t1),6), EX(t0) ));
4176 assign(p1, OR4( SHL(EX(t5),14), SHL(EX(t4),8),
4177 SHL(EX(t3),2), SHR(EX(t2),4) ));
4178 assign(p2, OR3( SHL(EX(t7),10), SHL(EX(t6),4), SHR(EX(t5),2) ));
4180 // Then deinterleave at 16x8 granularity.
4181 math_DEINTERLEAVE3_128(u0, u1, u2, 1, p0, p1, p2);
4182 return;
4185 /*NOTREACHED*/
4186 vassert(0);
4190 /* Do deinterleaving for 4 128 bit vectors, for LD4 insns. */
4191 static
4192 void math_DEINTERLEAVE4_128(
4193 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4194 UInt laneSzBlg2,
4195 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4197 if (laneSzBlg2 == 3) {
4198 // 64x2
4199 assign(*u0, ILO64x2(EX(i2), EX(i0)));
4200 assign(*u1, IHI64x2(EX(i2), EX(i0)));
4201 assign(*u2, ILO64x2(EX(i3), EX(i1)));
4202 assign(*u3, IHI64x2(EX(i3), EX(i1)));
4203 return;
4205 if (laneSzBlg2 == 2) {
4206 // 32x4
4207 IRTemp p0 = newTempV128();
4208 IRTemp p2 = newTempV128();
4209 IRTemp p1 = newTempV128();
4210 IRTemp p3 = newTempV128();
4211 assign(p0, ILO32x4(EX(i1), EX(i0)));
4212 assign(p1, IHI32x4(EX(i1), EX(i0)));
4213 assign(p2, ILO32x4(EX(i3), EX(i2)));
4214 assign(p3, IHI32x4(EX(i3), EX(i2)));
4215 // And now do what we did for the 64-bit case.
4216 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 3, p0, p1, p2, p3);
4217 return;
4219 if (laneSzBlg2 == 1) {
4220 // 16x8
4221 // Deinterleave into 32-bit chunks, then do as the 32-bit case.
4222 IRTemp p0 = newTempV128();
4223 IRTemp p1 = newTempV128();
4224 IRTemp p2 = newTempV128();
4225 IRTemp p3 = newTempV128();
4226 assign(p0, IHI16x8(EX(i0), SHL(EX(i0), 8)));
4227 assign(p1, IHI16x8(EX(i1), SHL(EX(i1), 8)));
4228 assign(p2, IHI16x8(EX(i2), SHL(EX(i2), 8)));
4229 assign(p3, IHI16x8(EX(i3), SHL(EX(i3), 8)));
4230 // From here on is like the 32 bit case.
4231 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 2, p0, p1, p2, p3);
4232 return;
4234 if (laneSzBlg2 == 0) {
4235 // 8x16
4236 // Deinterleave into 16-bit chunks, then do as the 16-bit case.
4237 IRTemp p0 = newTempV128();
4238 IRTemp p1 = newTempV128();
4239 IRTemp p2 = newTempV128();
4240 IRTemp p3 = newTempV128();
4241 assign(p0, IHI64x2( IHI8x16(EX(i0),ROL(EX(i0),4)),
4242 ILO8x16(EX(i0),ROL(EX(i0),4)) ));
4243 assign(p1, IHI64x2( IHI8x16(EX(i1),ROL(EX(i1),4)),
4244 ILO8x16(EX(i1),ROL(EX(i1),4)) ));
4245 assign(p2, IHI64x2( IHI8x16(EX(i2),ROL(EX(i2),4)),
4246 ILO8x16(EX(i2),ROL(EX(i2),4)) ));
4247 assign(p3, IHI64x2( IHI8x16(EX(i3),ROL(EX(i3),4)),
4248 ILO8x16(EX(i3),ROL(EX(i3),4)) ));
4249 // From here on is like the 16 bit case.
4250 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 1, p0, p1, p2, p3);
4251 return;
4253 /*NOTREACHED*/
4254 vassert(0);
4258 /* Wrappers that use the full-width (de)interleavers to do half-width
4259 (de)interleaving. The scheme is to clone each input lane in the
4260 lower half of each incoming value, do a full width (de)interleave
4261 at the next lane size up, and remove every other lane of the the
4262 result. The returned values may have any old junk in the upper
4263 64 bits -- the caller must ignore that. */
4265 /* Helper function -- get doubling and narrowing operations. */
4266 static
4267 void math_get_doubler_and_halver ( /*OUT*/IROp* doubler,
4268 /*OUT*/IROp* halver,
4269 UInt laneSzBlg2 )
4271 switch (laneSzBlg2) {
4272 case 2:
4273 *doubler = Iop_InterleaveLO32x4; *halver = Iop_CatEvenLanes32x4;
4274 break;
4275 case 1:
4276 *doubler = Iop_InterleaveLO16x8; *halver = Iop_CatEvenLanes16x8;
4277 break;
4278 case 0:
4279 *doubler = Iop_InterleaveLO8x16; *halver = Iop_CatEvenLanes8x16;
4280 break;
4281 default:
4282 vassert(0);
4286 /* Do interleaving for 1 64 bit vector, for ST1 insns. */
4287 static
4288 void math_INTERLEAVE1_64( /*OUTx1*/ IRTemp* i0,
4289 UInt laneSzBlg2, IRTemp u0 )
4291 assign(*i0, mkexpr(u0));
4295 /* Do interleaving for 2 64 bit vectors, for ST2 insns. */
4296 static
4297 void math_INTERLEAVE2_64( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
4298 UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
4300 if (laneSzBlg2 == 3) {
4301 // 1x64, degenerate case
4302 assign(*i0, EX(u0));
4303 assign(*i1, EX(u1));
4304 return;
4307 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4308 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4309 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4311 IRTemp du0 = newTempV128();
4312 IRTemp du1 = newTempV128();
4313 assign(du0, binop(doubler, EX(u0), EX(u0)));
4314 assign(du1, binop(doubler, EX(u1), EX(u1)));
4315 IRTemp di0 = newTempV128();
4316 IRTemp di1 = newTempV128();
4317 math_INTERLEAVE2_128(&di0, &di1, laneSzBlg2 + 1, du0, du1);
4318 assign(*i0, binop(halver, EX(di0), EX(di0)));
4319 assign(*i1, binop(halver, EX(di1), EX(di1)));
4323 /* Do interleaving for 3 64 bit vectors, for ST3 insns. */
4324 static
4325 void math_INTERLEAVE3_64(
4326 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
4327 UInt laneSzBlg2,
4328 IRTemp u0, IRTemp u1, IRTemp u2 )
4330 if (laneSzBlg2 == 3) {
4331 // 1x64, degenerate case
4332 assign(*i0, EX(u0));
4333 assign(*i1, EX(u1));
4334 assign(*i2, EX(u2));
4335 return;
4338 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4339 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4340 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4342 IRTemp du0 = newTempV128();
4343 IRTemp du1 = newTempV128();
4344 IRTemp du2 = newTempV128();
4345 assign(du0, binop(doubler, EX(u0), EX(u0)));
4346 assign(du1, binop(doubler, EX(u1), EX(u1)));
4347 assign(du2, binop(doubler, EX(u2), EX(u2)));
4348 IRTemp di0 = newTempV128();
4349 IRTemp di1 = newTempV128();
4350 IRTemp di2 = newTempV128();
4351 math_INTERLEAVE3_128(&di0, &di1, &di2, laneSzBlg2 + 1, du0, du1, du2);
4352 assign(*i0, binop(halver, EX(di0), EX(di0)));
4353 assign(*i1, binop(halver, EX(di1), EX(di1)));
4354 assign(*i2, binop(halver, EX(di2), EX(di2)));
4358 /* Do interleaving for 4 64 bit vectors, for ST4 insns. */
4359 static
4360 void math_INTERLEAVE4_64(
4361 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
4362 UInt laneSzBlg2,
4363 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
4365 if (laneSzBlg2 == 3) {
4366 // 1x64, degenerate case
4367 assign(*i0, EX(u0));
4368 assign(*i1, EX(u1));
4369 assign(*i2, EX(u2));
4370 assign(*i3, EX(u3));
4371 return;
4374 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4375 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4376 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4378 IRTemp du0 = newTempV128();
4379 IRTemp du1 = newTempV128();
4380 IRTemp du2 = newTempV128();
4381 IRTemp du3 = newTempV128();
4382 assign(du0, binop(doubler, EX(u0), EX(u0)));
4383 assign(du1, binop(doubler, EX(u1), EX(u1)));
4384 assign(du2, binop(doubler, EX(u2), EX(u2)));
4385 assign(du3, binop(doubler, EX(u3), EX(u3)));
4386 IRTemp di0 = newTempV128();
4387 IRTemp di1 = newTempV128();
4388 IRTemp di2 = newTempV128();
4389 IRTemp di3 = newTempV128();
4390 math_INTERLEAVE4_128(&di0, &di1, &di2, &di3,
4391 laneSzBlg2 + 1, du0, du1, du2, du3);
4392 assign(*i0, binop(halver, EX(di0), EX(di0)));
4393 assign(*i1, binop(halver, EX(di1), EX(di1)));
4394 assign(*i2, binop(halver, EX(di2), EX(di2)));
4395 assign(*i3, binop(halver, EX(di3), EX(di3)));
4399 /* Do deinterleaving for 1 64 bit vector, for LD1 insns. */
4400 static
4401 void math_DEINTERLEAVE1_64( /*OUTx1*/ IRTemp* u0,
4402 UInt laneSzBlg2, IRTemp i0 )
4404 assign(*u0, mkexpr(i0));
4408 /* Do deinterleaving for 2 64 bit vectors, for LD2 insns. */
4409 static
4410 void math_DEINTERLEAVE2_64( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
4411 UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
4413 if (laneSzBlg2 == 3) {
4414 // 1x64, degenerate case
4415 assign(*u0, EX(i0));
4416 assign(*u1, EX(i1));
4417 return;
4420 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4421 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4422 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4424 IRTemp di0 = newTempV128();
4425 IRTemp di1 = newTempV128();
4426 assign(di0, binop(doubler, EX(i0), EX(i0)));
4427 assign(di1, binop(doubler, EX(i1), EX(i1)));
4429 IRTemp du0 = newTempV128();
4430 IRTemp du1 = newTempV128();
4431 math_DEINTERLEAVE2_128(&du0, &du1, laneSzBlg2 + 1, di0, di1);
4432 assign(*u0, binop(halver, EX(du0), EX(du0)));
4433 assign(*u1, binop(halver, EX(du1), EX(du1)));
4437 /* Do deinterleaving for 3 64 bit vectors, for LD3 insns. */
4438 static
4439 void math_DEINTERLEAVE3_64(
4440 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
4441 UInt laneSzBlg2,
4442 IRTemp i0, IRTemp i1, IRTemp i2 )
4444 if (laneSzBlg2 == 3) {
4445 // 1x64, degenerate case
4446 assign(*u0, EX(i0));
4447 assign(*u1, EX(i1));
4448 assign(*u2, EX(i2));
4449 return;
4452 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4453 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4454 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4456 IRTemp di0 = newTempV128();
4457 IRTemp di1 = newTempV128();
4458 IRTemp di2 = newTempV128();
4459 assign(di0, binop(doubler, EX(i0), EX(i0)));
4460 assign(di1, binop(doubler, EX(i1), EX(i1)));
4461 assign(di2, binop(doubler, EX(i2), EX(i2)));
4462 IRTemp du0 = newTempV128();
4463 IRTemp du1 = newTempV128();
4464 IRTemp du2 = newTempV128();
4465 math_DEINTERLEAVE3_128(&du0, &du1, &du2, laneSzBlg2 + 1, di0, di1, di2);
4466 assign(*u0, binop(halver, EX(du0), EX(du0)));
4467 assign(*u1, binop(halver, EX(du1), EX(du1)));
4468 assign(*u2, binop(halver, EX(du2), EX(du2)));
4472 /* Do deinterleaving for 4 64 bit vectors, for LD4 insns. */
4473 static
4474 void math_DEINTERLEAVE4_64(
4475 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4476 UInt laneSzBlg2,
4477 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4479 if (laneSzBlg2 == 3) {
4480 // 1x64, degenerate case
4481 assign(*u0, EX(i0));
4482 assign(*u1, EX(i1));
4483 assign(*u2, EX(i2));
4484 assign(*u3, EX(i3));
4485 return;
4488 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4489 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4490 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4492 IRTemp di0 = newTempV128();
4493 IRTemp di1 = newTempV128();
4494 IRTemp di2 = newTempV128();
4495 IRTemp di3 = newTempV128();
4496 assign(di0, binop(doubler, EX(i0), EX(i0)));
4497 assign(di1, binop(doubler, EX(i1), EX(i1)));
4498 assign(di2, binop(doubler, EX(i2), EX(i2)));
4499 assign(di3, binop(doubler, EX(i3), EX(i3)));
4500 IRTemp du0 = newTempV128();
4501 IRTemp du1 = newTempV128();
4502 IRTemp du2 = newTempV128();
4503 IRTemp du3 = newTempV128();
4504 math_DEINTERLEAVE4_128(&du0, &du1, &du2, &du3,
4505 laneSzBlg2 + 1, di0, di1, di2, di3);
4506 assign(*u0, binop(halver, EX(du0), EX(du0)));
4507 assign(*u1, binop(halver, EX(du1), EX(du1)));
4508 assign(*u2, binop(halver, EX(du2), EX(du2)));
4509 assign(*u3, binop(halver, EX(du3), EX(du3)));
4513 #undef EX
4514 #undef SL
4515 #undef ROR
4516 #undef ROL
4517 #undef SHR
4518 #undef SHL
4519 #undef ILO64x2
4520 #undef IHI64x2
4521 #undef ILO32x4
4522 #undef IHI32x4
4523 #undef ILO16x8
4524 #undef IHI16x8
4525 #undef ILO16x8
4526 #undef IHI16x8
4527 #undef CEV32x4
4528 #undef COD32x4
4529 #undef COD16x8
4530 #undef COD8x16
4531 #undef CEV8x16
4532 #undef AND
4533 #undef OR2
4534 #undef OR3
4535 #undef OR4
4538 /*------------------------------------------------------------*/
4539 /*--- Load and Store instructions ---*/
4540 /*------------------------------------------------------------*/
4542 /* Generate the EA for a "reg + reg" style amode. This is done from
4543 parts of the insn, but for sanity checking sake it takes the whole
4544 insn. This appears to depend on insn[15:12], with opt=insn[15:13]
4545 and S=insn[12]:
4547 The possible forms, along with their opt:S values, are:
4548 011:0 Xn|SP + Xm
4549 111:0 Xn|SP + Xm
4550 011:1 Xn|SP + Xm * transfer_szB
4551 111:1 Xn|SP + Xm * transfer_szB
4552 010:0 Xn|SP + 32Uto64(Wm)
4553 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB
4554 110:0 Xn|SP + 32Sto64(Wm)
4555 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB
4557 Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of
4558 the transfer size is insn[23,31,30]. For integer loads/stores,
4559 insn[23] is zero, hence szLg2 can be at most 3 in such cases.
4561 If the decoding fails, it returns IRTemp_INVALID.
4563 isInt is True iff this is decoding is for transfers to/from integer
4564 registers. If False it is for transfers to/from vector registers.
4566 static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt )
4568 UInt optS = SLICE_UInt(insn, 15, 12);
4569 UInt mm = SLICE_UInt(insn, 20, 16);
4570 UInt nn = SLICE_UInt(insn, 9, 5);
4571 UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
4572 | SLICE_UInt(insn, 31, 30); // Log2 of the size
4574 buf[0] = 0;
4576 /* Sanity checks, that this really is a load/store insn. */
4577 if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
4578 goto fail;
4580 if (isInt
4581 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
4582 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
4583 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
4584 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
4585 goto fail;
4587 if (!isInt
4588 && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
4589 goto fail;
4591 /* Throw out non-verified but possibly valid cases. */
4592 switch (szLg2) {
4593 case BITS3(0,0,0): break; // 8 bit, valid for both int and vec
4594 case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
4595 case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
4596 case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
4597 case BITS3(1,0,0): // can only ever be valid for the vector case
4598 if (isInt) goto fail; else break;
4599 case BITS3(1,0,1): // these sizes are never valid
4600 case BITS3(1,1,0):
4601 case BITS3(1,1,1): goto fail;
4603 default: vassert(0);
4606 IRExpr* rhs = NULL;
4607 switch (optS) {
4608 case BITS4(1,1,1,0): goto fail; //ATC
4609 case BITS4(0,1,1,0):
4610 rhs = getIReg64orZR(mm);
4611 vex_sprintf(buf, "[%s, %s]",
4612 nameIReg64orZR(nn), nameIReg64orZR(mm));
4613 break;
4614 case BITS4(1,1,1,1): goto fail; //ATC
4615 case BITS4(0,1,1,1):
4616 rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
4617 vex_sprintf(buf, "[%s, %s lsl %u]",
4618 nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
4619 break;
4620 case BITS4(0,1,0,0):
4621 rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
4622 vex_sprintf(buf, "[%s, %s uxtx]",
4623 nameIReg64orZR(nn), nameIReg32orZR(mm));
4624 break;
4625 case BITS4(0,1,0,1):
4626 rhs = binop(Iop_Shl64,
4627 unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
4628 vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
4629 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4630 break;
4631 case BITS4(1,1,0,0):
4632 rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
4633 vex_sprintf(buf, "[%s, %s sxtx]",
4634 nameIReg64orZR(nn), nameIReg32orZR(mm));
4635 break;
4636 case BITS4(1,1,0,1):
4637 rhs = binop(Iop_Shl64,
4638 unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
4639 vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
4640 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4641 break;
4642 default:
4643 /* The rest appear to be genuinely invalid */
4644 goto fail;
4647 vassert(rhs);
4648 IRTemp res = newTemp(Ity_I64);
4649 assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
4650 return res;
4652 fail:
4653 if (0 /*really, sigill_diag, but that causes too much plumbing*/) {
4654 vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
4656 return IRTemp_INVALID;
4660 /* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
4661 bits of DATAE :: Ity_I64. */
4662 static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
4664 IRExpr* addrE = mkexpr(addr);
4665 switch (szB) {
4666 case 8:
4667 storeLE(addrE, dataE);
4668 break;
4669 case 4:
4670 storeLE(addrE, unop(Iop_64to32, dataE));
4671 break;
4672 case 2:
4673 storeLE(addrE, unop(Iop_64to16, dataE));
4674 break;
4675 case 1:
4676 storeLE(addrE, unop(Iop_64to8, dataE));
4677 break;
4678 default:
4679 vassert(0);
4684 /* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
4685 placing the result in an Ity_I64 temporary. */
4686 static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
4688 IRTemp res = newTemp(Ity_I64);
4689 IRExpr* addrE = mkexpr(addr);
4690 switch (szB) {
4691 case 8:
4692 assign(res, loadLE(Ity_I64,addrE));
4693 break;
4694 case 4:
4695 assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
4696 break;
4697 case 2:
4698 assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
4699 break;
4700 case 1:
4701 assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
4702 break;
4703 default:
4704 vassert(0);
4706 return res;
4710 /* Generate a "standard 7" name, from bitQ and size. But also
4711 allow ".1d" since that's occasionally useful. */
4712 static
4713 const HChar* nameArr_Q_SZ ( UInt bitQ, UInt size )
4715 vassert(bitQ <= 1 && size <= 3);
4716 const HChar* nms[8]
4717 = { "8b", "4h", "2s", "1d", "16b", "8h", "4s", "2d" };
4718 UInt ix = (bitQ << 2) | size;
4719 vassert(ix < 8);
4720 return nms[ix];
4724 static
4725 Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
4726 const VexAbiInfo* abiinfo, Bool sigill_diag)
4728 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
4730 /* ------------ LDR,STR (immediate, uimm12) ----------- */
4731 /* uimm12 is scaled by the transfer size
4733 31 29 26 21 9 4
4734 | | | | | |
4735 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8]
4736 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8]
4738 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4]
4739 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4]
4741 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2]
4742 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2]
4744 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1]
4745 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1]
4747 if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
4748 UInt szLg2 = INSN(31,30);
4749 UInt szB = 1 << szLg2;
4750 Bool isLD = INSN(22,22) == 1;
4751 UInt offs = INSN(21,10) * szB;
4752 UInt nn = INSN(9,5);
4753 UInt tt = INSN(4,0);
4754 IRTemp ta = newTemp(Ity_I64);
4755 assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
4756 if (nn == 31) { /* FIXME generate stack alignment check */ }
4757 vassert(szLg2 < 4);
4758 if (isLD) {
4759 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
4760 } else {
4761 gen_narrowing_store(szB, ta, getIReg64orZR(tt));
4763 const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
4764 const HChar* st_name[4] = { "strb", "strh", "str", "str" };
4765 DIP("%s %s, [%s, #%u]\n",
4766 (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
4767 nameIReg64orSP(nn), offs);
4768 return True;
4771 /* ------------ LDUR,STUR (immediate, simm9) ----------- */
4773 31 29 26 20 11 9 4
4774 | | | | | | |
4775 (at-Rn-then-Rn=EA) | | |
4776 sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9
4777 sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9
4779 (at-EA-then-Rn=EA)
4780 sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]!
4781 sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]!
4783 (at-EA)
4784 sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9]
4785 sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9]
4787 simm9 is unscaled.
4789 The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the
4790 load case this is because would create two competing values for
4791 Rt. In the store case the reason is unclear, but the spec
4792 disallows it anyway.
4794 Stores are narrowing, loads are unsigned widening. sz encodes
4795 the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
4797 if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
4798 == BITS9(1,1,1, 0,0,0,0,0, 0)) {
4799 UInt szLg2 = INSN(31,30);
4800 UInt szB = 1 << szLg2;
4801 Bool isLoad = INSN(22,22) == 1;
4802 UInt imm9 = INSN(20,12);
4803 UInt nn = INSN(9,5);
4804 UInt tt = INSN(4,0);
4805 Bool wBack = INSN(10,10) == 1;
4806 UInt how = INSN(11,10);
4807 if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) {
4808 /* undecodable; fall through */
4809 } else {
4810 if (nn == 31) { /* FIXME generate stack alignment check */ }
4812 // Compute the transfer address TA and the writeback address WA.
4813 IRTemp tRN = newTemp(Ity_I64);
4814 assign(tRN, getIReg64orSP(nn));
4815 IRTemp tEA = newTemp(Ity_I64);
4816 Long simm9 = (Long)sx_to_64(imm9, 9);
4817 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
4819 IRTemp tTA = newTemp(Ity_I64);
4820 IRTemp tWA = newTemp(Ity_I64);
4821 switch (how) {
4822 case BITS2(0,1):
4823 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4824 case BITS2(1,1):
4825 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4826 case BITS2(0,0):
4827 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4828 default:
4829 vassert(0); /* NOTREACHED */
4832 /* Normally rN would be updated after the transfer. However, in
4833 the special cases typifed by
4834 str x30, [sp,#-16]!
4835 str w1, [sp,#-32]!
4836 it is necessary to update SP before the transfer, (1)
4837 because Memcheck will otherwise complain about a write
4838 below the stack pointer, and (2) because the segfault
4839 stack extension mechanism will otherwise extend the stack
4840 only down to SP before the instruction, which might not be
4841 far enough, if the -16/-32 bit takes the actual access
4842 address to the next page.
4844 Bool earlyWBack
4845 = wBack && simm9 < 0 && (szB == 8 || szB == 4)
4846 && how == BITS2(1,1) && nn == 31 && !isLoad;
4848 if (wBack && earlyWBack)
4849 putIReg64orSP(nn, mkexpr(tEA));
4851 if (isLoad) {
4852 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
4853 } else {
4854 gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
4857 if (wBack && !earlyWBack)
4858 putIReg64orSP(nn, mkexpr(tEA));
4860 const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
4861 const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
4862 const HChar* fmt_str = NULL;
4863 switch (how) {
4864 case BITS2(0,1):
4865 fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4866 break;
4867 case BITS2(1,1):
4868 fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4869 break;
4870 case BITS2(0,0):
4871 fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
4872 break;
4873 default:
4874 vassert(0);
4876 DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
4877 nameIRegOrZR(szB == 8, tt),
4878 nameIReg64orSP(nn), simm9);
4879 return True;
4883 /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
4884 /* L==1 => mm==LD
4885 L==0 => mm==ST
4886 x==0 => 32 bit transfers, and zero extended loads
4887 x==1 => 64 bit transfers
4888 simm7 is scaled by the (single-register) transfer size
4890 (at-Rn-then-Rn=EA)
4891 x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm
4893 (at-EA-then-Rn=EA)
4894 x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]!
4896 (at-EA)
4897 x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]
4899 UInt insn_30_23 = INSN(30,23);
4900 if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
4901 || insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
4902 || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
4903 UInt bL = INSN(22,22);
4904 UInt bX = INSN(31,31);
4905 UInt bWBack = INSN(23,23);
4906 UInt rT1 = INSN(4,0);
4907 UInt rN = INSN(9,5);
4908 UInt rT2 = INSN(14,10);
4909 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
4910 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
4911 || (bL && rT1 == rT2)) {
4912 /* undecodable; fall through */
4913 } else {
4914 if (rN == 31) { /* FIXME generate stack alignment check */ }
4916 // Compute the transfer address TA and the writeback address WA.
4917 IRTemp tRN = newTemp(Ity_I64);
4918 assign(tRN, getIReg64orSP(rN));
4919 IRTemp tEA = newTemp(Ity_I64);
4920 simm7 = (bX ? 8 : 4) * simm7;
4921 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
4923 IRTemp tTA = newTemp(Ity_I64);
4924 IRTemp tWA = newTemp(Ity_I64);
4925 switch (INSN(24,23)) {
4926 case BITS2(0,1):
4927 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4928 case BITS2(1,1):
4929 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4930 case BITS2(1,0):
4931 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4932 default:
4933 vassert(0); /* NOTREACHED */
4936 /* Normally rN would be updated after the transfer. However, in
4937 the special case typifed by
4938 stp x29, x30, [sp,#-112]!
4939 it is necessary to update SP before the transfer, (1)
4940 because Memcheck will otherwise complain about a write
4941 below the stack pointer, and (2) because the segfault
4942 stack extension mechanism will otherwise extend the stack
4943 only down to SP before the instruction, which might not be
4944 far enough, if the -112 bit takes the actual access
4945 address to the next page.
4947 Bool earlyWBack
4948 = bWBack && simm7 < 0
4949 && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
4951 if (bWBack && earlyWBack)
4952 putIReg64orSP(rN, mkexpr(tEA));
4954 /**/ if (bL == 1 && bX == 1) {
4955 // 64 bit load
4956 putIReg64orZR(rT1, loadLE(Ity_I64,
4957 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
4958 putIReg64orZR(rT2, loadLE(Ity_I64,
4959 binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
4960 } else if (bL == 1 && bX == 0) {
4961 // 32 bit load
4962 putIReg32orZR(rT1, loadLE(Ity_I32,
4963 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
4964 putIReg32orZR(rT2, loadLE(Ity_I32,
4965 binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
4966 } else if (bL == 0 && bX == 1) {
4967 // 64 bit store
4968 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
4969 getIReg64orZR(rT1));
4970 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
4971 getIReg64orZR(rT2));
4972 } else {
4973 vassert(bL == 0 && bX == 0);
4974 // 32 bit store
4975 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
4976 getIReg32orZR(rT1));
4977 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
4978 getIReg32orZR(rT2));
4981 if (bWBack && !earlyWBack)
4982 putIReg64orSP(rN, mkexpr(tEA));
4984 const HChar* fmt_str = NULL;
4985 switch (INSN(24,23)) {
4986 case BITS2(0,1):
4987 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4988 break;
4989 case BITS2(1,1):
4990 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4991 break;
4992 case BITS2(1,0):
4993 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
4994 break;
4995 default:
4996 vassert(0);
4998 DIP(fmt_str, bL == 0 ? "st" : "ld",
4999 nameIRegOrZR(bX == 1, rT1),
5000 nameIRegOrZR(bX == 1, rT2),
5001 nameIReg64orSP(rN), simm7);
5002 return True;
5006 /* -------- LDPSW (immediate, simm7) (INT REGS) -------- */
5007 /* Does 32 bit transfers which are sign extended to 64 bits.
5008 simm7 is scaled by the (single-register) transfer size
5010 (at-Rn-then-Rn=EA)
5011 01 101 0001 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP], #imm
5013 (at-EA-then-Rn=EA)
5014 01 101 0011 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP, #imm]!
5016 (at-EA)
5017 01 101 0010 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP, #imm]
5019 UInt insn_31_22 = INSN(31,22);
5020 if (insn_31_22 == BITS10(0,1,1,0,1,0,0,0,1,1)
5021 || insn_31_22 == BITS10(0,1,1,0,1,0,0,1,1,1)
5022 || insn_31_22 == BITS10(0,1,1,0,1,0,0,1,0,1)) {
5023 UInt bWBack = INSN(23,23);
5024 UInt rT1 = INSN(4,0);
5025 UInt rN = INSN(9,5);
5026 UInt rT2 = INSN(14,10);
5027 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
5028 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
5029 || (rT1 == rT2)) {
5030 /* undecodable; fall through */
5031 } else {
5032 if (rN == 31) { /* FIXME generate stack alignment check */ }
5034 // Compute the transfer address TA and the writeback address WA.
5035 IRTemp tRN = newTemp(Ity_I64);
5036 assign(tRN, getIReg64orSP(rN));
5037 IRTemp tEA = newTemp(Ity_I64);
5038 simm7 = 4 * simm7;
5039 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
5041 IRTemp tTA = newTemp(Ity_I64);
5042 IRTemp tWA = newTemp(Ity_I64);
5043 switch (INSN(24,23)) {
5044 case BITS2(0,1):
5045 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
5046 case BITS2(1,1):
5047 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
5048 case BITS2(1,0):
5049 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
5050 default:
5051 vassert(0); /* NOTREACHED */
5054 // 32 bit load, sign extended to 64 bits
5055 putIReg64orZR(rT1, unop(Iop_32Sto64,
5056 loadLE(Ity_I32, binop(Iop_Add64,
5057 mkexpr(tTA),
5058 mkU64(0)))));
5059 putIReg64orZR(rT2, unop(Iop_32Sto64,
5060 loadLE(Ity_I32, binop(Iop_Add64,
5061 mkexpr(tTA),
5062 mkU64(4)))));
5063 if (bWBack)
5064 putIReg64orSP(rN, mkexpr(tEA));
5066 const HChar* fmt_str = NULL;
5067 switch (INSN(24,23)) {
5068 case BITS2(0,1):
5069 fmt_str = "ldpsw %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5070 break;
5071 case BITS2(1,1):
5072 fmt_str = "ldpsw %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5073 break;
5074 case BITS2(1,0):
5075 fmt_str = "ldpsw %s, %s, [%s, #%lld] (at-Rn)\n";
5076 break;
5077 default:
5078 vassert(0);
5080 DIP(fmt_str, nameIReg64orZR(rT1),
5081 nameIReg64orZR(rT2),
5082 nameIReg64orSP(rN), simm7);
5083 return True;
5087 /* ---------------- LDR (literal, int reg) ---------------- */
5088 /* 31 29 23 4
5089 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)]
5090 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)]
5091 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)]
5092 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)]
5093 Just handles the first two cases for now.
5095 if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
5096 UInt imm19 = INSN(23,5);
5097 UInt rT = INSN(4,0);
5098 UInt bX = INSN(30,30);
5099 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
5100 if (bX) {
5101 putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
5102 } else {
5103 putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
5105 DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
5106 return True;
5109 /* -------------- {LD,ST}R (integer register) --------------- */
5110 /* 31 29 20 15 12 11 9 4
5111 | | | | | | | |
5112 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R<m>{ext/sh}]
5113 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R<m>{ext/sh}]
5114 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R<m>{ext/sh}]
5115 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R<m>{ext/sh}]
5117 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R<m>{ext/sh}]
5118 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R<m>{ext/sh}]
5119 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R<m>{ext/sh}]
5120 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R<m>{ext/sh}]
5122 if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
5123 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5124 HChar dis_buf[64];
5125 UInt szLg2 = INSN(31,30);
5126 Bool isLD = INSN(22,22) == 1;
5127 UInt tt = INSN(4,0);
5128 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
5129 if (ea != IRTemp_INVALID) {
5130 switch (szLg2) {
5131 case 3: /* 64 bit */
5132 if (isLD) {
5133 putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
5134 DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
5135 } else {
5136 storeLE(mkexpr(ea), getIReg64orZR(tt));
5137 DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
5139 break;
5140 case 2: /* 32 bit */
5141 if (isLD) {
5142 putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
5143 DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
5144 } else {
5145 storeLE(mkexpr(ea), getIReg32orZR(tt));
5146 DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
5148 break;
5149 case 1: /* 16 bit */
5150 if (isLD) {
5151 putIReg64orZR(tt, unop(Iop_16Uto64,
5152 loadLE(Ity_I16, mkexpr(ea))));
5153 DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5154 } else {
5155 storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
5156 DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5158 break;
5159 case 0: /* 8 bit */
5160 if (isLD) {
5161 putIReg64orZR(tt, unop(Iop_8Uto64,
5162 loadLE(Ity_I8, mkexpr(ea))));
5163 DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
5164 } else {
5165 storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
5166 DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
5168 break;
5169 default:
5170 vassert(0);
5172 return True;
5176 /* -------------- LDRS{B,H,W} (uimm12) -------------- */
5177 /* 31 29 26 23 21 9 4
5178 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4]
5179 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2]
5180 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1]
5181 where
5182 Rt is Wt when x==1, Xt when x==0
5184 if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
5185 /* Further checks on bits 31:30 and 22 */
5186 Bool valid = False;
5187 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5188 case BITS3(1,0,0):
5189 case BITS3(0,1,0): case BITS3(0,1,1):
5190 case BITS3(0,0,0): case BITS3(0,0,1):
5191 valid = True;
5192 break;
5194 if (valid) {
5195 UInt szLg2 = INSN(31,30);
5196 UInt bitX = INSN(22,22);
5197 UInt imm12 = INSN(21,10);
5198 UInt nn = INSN(9,5);
5199 UInt tt = INSN(4,0);
5200 UInt szB = 1 << szLg2;
5201 IRExpr* ea = binop(Iop_Add64,
5202 getIReg64orSP(nn), mkU64(imm12 * szB));
5203 switch (szB) {
5204 case 4:
5205 vassert(bitX == 0);
5206 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
5207 DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
5208 nameIReg64orSP(nn), imm12 * szB);
5209 break;
5210 case 2:
5211 if (bitX == 1) {
5212 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
5213 } else {
5214 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
5216 DIP("ldrsh %s, [%s, #%u]\n",
5217 nameIRegOrZR(bitX == 0, tt),
5218 nameIReg64orSP(nn), imm12 * szB);
5219 break;
5220 case 1:
5221 if (bitX == 1) {
5222 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
5223 } else {
5224 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
5226 DIP("ldrsb %s, [%s, #%u]\n",
5227 nameIRegOrZR(bitX == 0, tt),
5228 nameIReg64orSP(nn), imm12 * szB);
5229 break;
5230 default:
5231 vassert(0);
5233 return True;
5235 /* else fall through */
5238 /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
5239 /* (at-Rn-then-Rn=EA)
5240 31 29 23 21 20 11 9 4
5241 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9
5242 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9
5243 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9
5245 (at-EA-then-Rn=EA)
5246 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]!
5247 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]!
5248 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]!
5249 where
5250 Rt is Wt when x==1, Xt when x==0
5251 transfer-at-Rn when [11]==0, at EA when [11]==1
5253 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5254 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5255 /* Further checks on bits 31:30 and 22 */
5256 Bool valid = False;
5257 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5258 case BITS3(1,0,0): // LDRSW Xt
5259 case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
5260 case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
5261 valid = True;
5262 break;
5264 if (valid) {
5265 UInt szLg2 = INSN(31,30);
5266 UInt imm9 = INSN(20,12);
5267 Bool atRN = INSN(11,11) == 0;
5268 UInt nn = INSN(9,5);
5269 UInt tt = INSN(4,0);
5270 IRTemp tRN = newTemp(Ity_I64);
5271 IRTemp tEA = newTemp(Ity_I64);
5272 IRTemp tTA = IRTemp_INVALID;
5273 ULong simm9 = sx_to_64(imm9, 9);
5274 Bool is64 = INSN(22,22) == 0;
5275 assign(tRN, getIReg64orSP(nn));
5276 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5277 tTA = atRN ? tRN : tEA;
5278 HChar ch = '?';
5279 /* There are 5 cases:
5280 byte load, SX to 64
5281 byte load, SX to 32, ZX to 64
5282 halfword load, SX to 64
5283 halfword load, SX to 32, ZX to 64
5284 word load, SX to 64
5285 The ifs below handle them in the listed order.
5287 if (szLg2 == 0) {
5288 ch = 'b';
5289 if (is64) {
5290 putIReg64orZR(tt, unop(Iop_8Sto64,
5291 loadLE(Ity_I8, mkexpr(tTA))));
5292 } else {
5293 putIReg32orZR(tt, unop(Iop_8Sto32,
5294 loadLE(Ity_I8, mkexpr(tTA))));
5297 else if (szLg2 == 1) {
5298 ch = 'h';
5299 if (is64) {
5300 putIReg64orZR(tt, unop(Iop_16Sto64,
5301 loadLE(Ity_I16, mkexpr(tTA))));
5302 } else {
5303 putIReg32orZR(tt, unop(Iop_16Sto32,
5304 loadLE(Ity_I16, mkexpr(tTA))));
5307 else if (szLg2 == 2 && is64) {
5308 ch = 'w';
5309 putIReg64orZR(tt, unop(Iop_32Sto64,
5310 loadLE(Ity_I32, mkexpr(tTA))));
5312 else {
5313 vassert(0);
5315 putIReg64orSP(nn, mkexpr(tEA));
5316 DIP(atRN ? "ldrs%c %s, [%s], #%llu\n" : "ldrs%c %s, [%s, #%llu]!",
5317 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
5318 return True;
5320 /* else fall through */
5323 /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
5324 /* 31 29 23 21 20 11 9 4
5325 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9]
5326 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9]
5327 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9]
5328 where
5329 Rt is Wt when x==1, Xt when x==0
5331 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5332 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5333 /* Further checks on bits 31:30 and 22 */
5334 Bool valid = False;
5335 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5336 case BITS3(1,0,0): // LDURSW Xt
5337 case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
5338 case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
5339 valid = True;
5340 break;
5342 if (valid) {
5343 UInt szLg2 = INSN(31,30);
5344 UInt imm9 = INSN(20,12);
5345 UInt nn = INSN(9,5);
5346 UInt tt = INSN(4,0);
5347 IRTemp tRN = newTemp(Ity_I64);
5348 IRTemp tEA = newTemp(Ity_I64);
5349 ULong simm9 = sx_to_64(imm9, 9);
5350 Bool is64 = INSN(22,22) == 0;
5351 assign(tRN, getIReg64orSP(nn));
5352 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5353 HChar ch = '?';
5354 /* There are 5 cases:
5355 byte load, SX to 64
5356 byte load, SX to 32, ZX to 64
5357 halfword load, SX to 64
5358 halfword load, SX to 32, ZX to 64
5359 word load, SX to 64
5360 The ifs below handle them in the listed order.
5362 if (szLg2 == 0) {
5363 ch = 'b';
5364 if (is64) {
5365 putIReg64orZR(tt, unop(Iop_8Sto64,
5366 loadLE(Ity_I8, mkexpr(tEA))));
5367 } else {
5368 putIReg32orZR(tt, unop(Iop_8Sto32,
5369 loadLE(Ity_I8, mkexpr(tEA))));
5372 else if (szLg2 == 1) {
5373 ch = 'h';
5374 if (is64) {
5375 putIReg64orZR(tt, unop(Iop_16Sto64,
5376 loadLE(Ity_I16, mkexpr(tEA))));
5377 } else {
5378 putIReg32orZR(tt, unop(Iop_16Sto32,
5379 loadLE(Ity_I16, mkexpr(tEA))));
5382 else if (szLg2 == 2 && is64) {
5383 ch = 'w';
5384 putIReg64orZR(tt, unop(Iop_32Sto64,
5385 loadLE(Ity_I32, mkexpr(tEA))));
5387 else {
5388 vassert(0);
5390 DIP("ldurs%c %s, [%s, #%lld]",
5391 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), (Long)simm9);
5392 return True;
5394 /* else fall through */
5397 /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
5398 /* L==1 => mm==LD
5399 L==0 => mm==ST
5400 sz==00 => 32 bit (S) transfers
5401 sz==01 => 64 bit (D) transfers
5402 sz==10 => 128 bit (Q) transfers
5403 sz==11 isn't allowed
5404 simm7 is scaled by the (single-register) transfer size
5406 31 29 26 22 21 14 9 4
5408 sz 101 1000 L imm7 t2 n t1 mmNP SDQt1, SDQt2, [Xn|SP, #imm]
5409 (at-EA, with nontemporal hint)
5411 sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm
5412 (at-Rn-then-Rn=EA)
5414 sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]
5415 (at-EA)
5417 sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]!
5418 (at-EA-then-Rn=EA)
5420 if (INSN(29,25) == BITS5(1,0,1,1,0)) {
5421 UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
5422 Bool isLD = INSN(22,22) == 1;
5423 Bool wBack = INSN(23,23) == 1;
5424 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
5425 UInt tt2 = INSN(14,10);
5426 UInt nn = INSN(9,5);
5427 UInt tt1 = INSN(4,0);
5428 if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) {
5429 /* undecodable; fall through */
5430 } else {
5431 if (nn == 31) { /* FIXME generate stack alignment check */ }
5433 // Compute the transfer address TA and the writeback address WA.
5434 UInt szB = 4 << szSlg2; /* szB is the per-register size */
5435 IRTemp tRN = newTemp(Ity_I64);
5436 assign(tRN, getIReg64orSP(nn));
5437 IRTemp tEA = newTemp(Ity_I64);
5438 simm7 = szB * simm7;
5439 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
5441 IRTemp tTA = newTemp(Ity_I64);
5442 IRTemp tWA = newTemp(Ity_I64);
5443 switch (INSN(24,23)) {
5444 case BITS2(0,1):
5445 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
5446 case BITS2(1,1):
5447 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
5448 case BITS2(1,0):
5449 case BITS2(0,0):
5450 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
5451 default:
5452 vassert(0); /* NOTREACHED */
5455 IRType ty = Ity_INVALID;
5456 switch (szB) {
5457 case 4: ty = Ity_F32; break;
5458 case 8: ty = Ity_F64; break;
5459 case 16: ty = Ity_V128; break;
5460 default: vassert(0);
5463 /* Normally rN would be updated after the transfer. However, in
5464 the special cases typifed by
5465 stp q0, q1, [sp,#-512]!
5466 stp d0, d1, [sp,#-512]!
5467 stp s0, s1, [sp,#-512]!
5468 it is necessary to update SP before the transfer, (1)
5469 because Memcheck will otherwise complain about a write
5470 below the stack pointer, and (2) because the segfault
5471 stack extension mechanism will otherwise extend the stack
5472 only down to SP before the instruction, which might not be
5473 far enough, if the -512 bit takes the actual access
5474 address to the next page.
5476 Bool earlyWBack
5477 = wBack && simm7 < 0
5478 && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
5480 if (wBack && earlyWBack)
5481 putIReg64orSP(nn, mkexpr(tEA));
5483 if (isLD) {
5484 if (szB < 16) {
5485 putQReg128(tt1, mkV128(0x0000));
5487 putQRegLO(tt1,
5488 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
5489 if (szB < 16) {
5490 putQReg128(tt2, mkV128(0x0000));
5492 putQRegLO(tt2,
5493 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
5494 } else {
5495 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
5496 getQRegLO(tt1, ty));
5497 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
5498 getQRegLO(tt2, ty));
5501 if (wBack && !earlyWBack)
5502 putIReg64orSP(nn, mkexpr(tEA));
5504 const HChar* fmt_str = NULL;
5505 switch (INSN(24,23)) {
5506 case BITS2(0,1):
5507 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5508 break;
5509 case BITS2(1,1):
5510 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5511 break;
5512 case BITS2(1,0):
5513 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
5514 break;
5515 case BITS2(0,0):
5516 fmt_str = "%snp %s, %s, [%s, #%lld] (at-Rn)\n";
5517 break;
5518 default:
5519 vassert(0);
5521 DIP(fmt_str, isLD ? "ld" : "st",
5522 nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
5523 nameIReg64orSP(nn), simm7);
5524 return True;
5528 /* -------------- {LD,ST}R (vector register) --------------- */
5529 /* 31 29 23 20 15 12 11 9 4
5530 | | | | | | | | |
5531 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R<m>{ext/sh}]
5532 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R<m>{ext/sh}]
5533 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R<m>{ext/sh}]
5534 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R<m>{ext/sh}]
5535 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R<m>{ext/sh}]
5537 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R<m>{ext/sh}]
5538 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R<m>{ext/sh}]
5539 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R<m>{ext/sh}]
5540 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R<m>{ext/sh}]
5541 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R<m>{ext/sh}]
5543 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5544 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5545 HChar dis_buf[64];
5546 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5547 Bool isLD = INSN(22,22) == 1;
5548 UInt tt = INSN(4,0);
5549 if (szLg2 > 4) goto after_LDR_STR_vector_register;
5550 IRTemp ea = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/);
5551 if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
5552 switch (szLg2) {
5553 case 0: /* 8 bit */
5554 if (isLD) {
5555 putQReg128(tt, mkV128(0x0000));
5556 putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
5557 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
5558 } else {
5559 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
5560 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
5562 break;
5563 case 1:
5564 if (isLD) {
5565 putQReg128(tt, mkV128(0x0000));
5566 putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
5567 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
5568 } else {
5569 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
5570 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
5572 break;
5573 case 2: /* 32 bit */
5574 if (isLD) {
5575 putQReg128(tt, mkV128(0x0000));
5576 putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
5577 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
5578 } else {
5579 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
5580 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
5582 break;
5583 case 3: /* 64 bit */
5584 if (isLD) {
5585 putQReg128(tt, mkV128(0x0000));
5586 putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
5587 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
5588 } else {
5589 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
5590 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
5592 break;
5593 case 4:
5594 if (isLD) {
5595 putQReg128(tt, loadLE(Ity_V128, mkexpr(ea)));
5596 DIP("ldr %s, %s\n", nameQReg128(tt), dis_buf);
5597 } else {
5598 storeLE(mkexpr(ea), getQReg128(tt));
5599 DIP("str %s, %s\n", nameQReg128(tt), dis_buf);
5601 break;
5602 default:
5603 vassert(0);
5605 return True;
5607 after_LDR_STR_vector_register:
5609 /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
5610 /* 31 29 22 20 15 12 11 9 4
5611 | | | | | | | | |
5612 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
5614 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
5615 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
5617 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
5618 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
5620 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5621 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5622 HChar dis_buf[64];
5623 UInt szLg2 = INSN(31,30);
5624 Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
5625 UInt tt = INSN(4,0);
5626 if (szLg2 == 3) goto after_LDRS_integer_register;
5627 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
5628 if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
5629 /* Enumerate the 5 variants explicitly. */
5630 if (szLg2 == 2/*32 bit*/ && sxTo64) {
5631 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
5632 DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
5633 return True;
5635 else
5636 if (szLg2 == 1/*16 bit*/) {
5637 if (sxTo64) {
5638 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
5639 DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
5640 } else {
5641 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
5642 DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5644 return True;
5646 else
5647 if (szLg2 == 0/*8 bit*/) {
5648 if (sxTo64) {
5649 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
5650 DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
5651 } else {
5652 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
5653 DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
5655 return True;
5657 /* else it's an invalid combination */
5659 after_LDRS_integer_register:
5661 /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
5662 /* This is the Unsigned offset variant only. The Post-Index and
5663 Pre-Index variants are below.
5665 31 29 23 21 9 4
5666 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1]
5667 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2]
5668 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4]
5669 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8]
5670 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16]
5672 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1]
5673 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2]
5674 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4]
5675 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8]
5676 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16]
5678 if (INSN(29,24) == BITS6(1,1,1,1,0,1)
5679 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
5680 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5681 Bool isLD = INSN(22,22) == 1;
5682 UInt pimm12 = INSN(21,10) << szLg2;
5683 UInt nn = INSN(9,5);
5684 UInt tt = INSN(4,0);
5685 IRTemp tEA = newTemp(Ity_I64);
5686 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5687 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
5688 if (isLD) {
5689 if (szLg2 < 4) {
5690 putQReg128(tt, mkV128(0x0000));
5692 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
5693 } else {
5694 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
5696 DIP("%s %s, [%s, #%u]\n",
5697 isLD ? "ldr" : "str",
5698 nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
5699 return True;
5702 /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
5703 /* These are the Post-Index and Pre-Index variants.
5705 31 29 23 20 11 9 4
5706 (at-Rn-then-Rn=EA)
5707 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm
5708 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm
5709 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm
5710 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm
5711 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm
5713 (at-EA-then-Rn=EA)
5714 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]!
5715 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]!
5716 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]!
5717 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]!
5718 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]!
5720 Stores are the same except with bit 22 set to 0.
5722 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5723 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5724 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5725 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5726 Bool isLD = INSN(22,22) == 1;
5727 UInt imm9 = INSN(20,12);
5728 Bool atRN = INSN(11,11) == 0;
5729 UInt nn = INSN(9,5);
5730 UInt tt = INSN(4,0);
5731 IRTemp tRN = newTemp(Ity_I64);
5732 IRTemp tEA = newTemp(Ity_I64);
5733 IRTemp tTA = IRTemp_INVALID;
5734 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5735 ULong simm9 = sx_to_64(imm9, 9);
5736 assign(tRN, getIReg64orSP(nn));
5737 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5738 tTA = atRN ? tRN : tEA;
5740 /* Do early writeback for the cases typified by
5741 str d8, [sp, #-32]!
5742 str d10, [sp, #-128]!
5743 str q1, [sp, #-32]!
5744 for the same reasons as described in a similar comment in the
5745 "LDP,STP (immediate, simm7) (FP&VEC)" case just above.
5747 Bool earlyWBack
5748 = !atRN && !isLD && (ty == Ity_F64 || ty == Ity_V128)
5749 && nn == 31 && ((Long)simm9) < 0;
5751 if (earlyWBack)
5752 putIReg64orSP(nn, mkexpr(tEA));
5754 if (isLD) {
5755 if (szLg2 < 4) {
5756 putQReg128(tt, mkV128(0x0000));
5758 putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
5759 } else {
5760 storeLE(mkexpr(tTA), getQRegLO(tt, ty));
5763 if (!earlyWBack)
5764 putIReg64orSP(nn, mkexpr(tEA));
5766 DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
5767 isLD ? "ldr" : "str",
5768 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
5769 return True;
5772 /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
5773 /* 31 29 23 20 11 9 4
5774 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm]
5775 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm]
5776 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm]
5777 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm]
5778 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm]
5780 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm]
5781 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm]
5782 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm]
5783 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm]
5784 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm]
5786 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5787 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5788 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5789 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5790 Bool isLD = INSN(22,22) == 1;
5791 UInt imm9 = INSN(20,12);
5792 UInt nn = INSN(9,5);
5793 UInt tt = INSN(4,0);
5794 ULong simm9 = sx_to_64(imm9, 9);
5795 IRTemp tEA = newTemp(Ity_I64);
5796 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5797 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
5798 if (isLD) {
5799 if (szLg2 < 4) {
5800 putQReg128(tt, mkV128(0x0000));
5802 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
5803 } else {
5804 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
5806 DIP("%s %s, [%s, #%lld]\n",
5807 isLD ? "ldur" : "stur",
5808 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
5809 return True;
5812 /* ---------------- LDR (literal, SIMD&FP) ---------------- */
5813 /* 31 29 23 4
5814 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)]
5815 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)]
5816 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)]
5818 if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
5819 UInt szB = 4 << INSN(31,30);
5820 UInt imm19 = INSN(23,5);
5821 UInt tt = INSN(4,0);
5822 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
5823 IRType ty = preferredVectorSubTypeFromSize(szB);
5824 putQReg128(tt, mkV128(0x0000));
5825 putQRegLO(tt, loadLE(ty, mkU64(ea)));
5826 DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
5827 return True;
5830 /* ------ LD1/ST1 (multiple 1-elem structs to/from 1 reg ------ */
5831 /* ------ LD2/ST2 (multiple 2-elem structs to/from 2 regs ------ */
5832 /* ------ LD3/ST3 (multiple 3-elem structs to/from 3 regs ------ */
5833 /* ------ LD4/ST4 (multiple 4-elem structs to/from 4 regs ------ */
5834 /* 31 29 26 22 21 20 15 11 9 4
5836 0q 001 1000 L 0 00000 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP]
5837 0q 001 1001 L 0 m 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP], step
5839 0q 001 1000 L 0 00000 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP]
5840 0q 001 1001 L 0 m 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP], step
5842 0q 001 1000 L 0 00000 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP]
5843 0q 001 1001 L 0 m 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP], step
5845 0q 001 1000 L 0 00000 0111 sz n t xx1 {Vt.T}, [Xn|SP]
5846 0q 001 1001 L 0 m 0111 sz n t xx1 {Vt.T}, [Xn|SP], step
5848 T = defined by Q and sz in the normal way
5849 step = if m == 11111 then transfer-size else Xm
5850 xx = case L of 1 -> LD ; 0 -> ST
5852 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
5853 && INSN(21,21) == 0) {
5854 Bool bitQ = INSN(30,30);
5855 Bool isPX = INSN(23,23) == 1;
5856 Bool isLD = INSN(22,22) == 1;
5857 UInt mm = INSN(20,16);
5858 UInt opc = INSN(15,12);
5859 UInt sz = INSN(11,10);
5860 UInt nn = INSN(9,5);
5861 UInt tt = INSN(4,0);
5862 Bool isQ = bitQ == 1;
5863 Bool is1d = sz == BITS2(1,1) && !isQ;
5864 UInt nRegs = 0;
5865 switch (opc) {
5866 case BITS4(0,0,0,0): nRegs = 4; break;
5867 case BITS4(0,1,0,0): nRegs = 3; break;
5868 case BITS4(1,0,0,0): nRegs = 2; break;
5869 case BITS4(0,1,1,1): nRegs = 1; break;
5870 default: break;
5873 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
5874 If we see it, set nRegs to 0 so as to cause the next conditional
5875 to fail. */
5876 if (!isPX && mm != 0)
5877 nRegs = 0;
5879 if (nRegs == 1 /* .1d is allowed */
5880 || (nRegs >= 2 && nRegs <= 4 && !is1d) /* .1d is not allowed */) {
5882 UInt xferSzB = (isQ ? 16 : 8) * nRegs;
5884 /* Generate the transfer address (TA) and if necessary the
5885 writeback address (WB) */
5886 IRTemp tTA = newTemp(Ity_I64);
5887 assign(tTA, getIReg64orSP(nn));
5888 if (nn == 31) { /* FIXME generate stack alignment check */ }
5889 IRTemp tWB = IRTemp_INVALID;
5890 if (isPX) {
5891 tWB = newTemp(Ity_I64);
5892 assign(tWB, binop(Iop_Add64,
5893 mkexpr(tTA),
5894 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
5895 : getIReg64orZR(mm)));
5898 /* -- BEGIN generate the transfers -- */
5900 IRTemp u0, u1, u2, u3, i0, i1, i2, i3;
5901 u0 = u1 = u2 = u3 = i0 = i1 = i2 = i3 = IRTemp_INVALID;
5902 switch (nRegs) {
5903 case 4: u3 = newTempV128(); i3 = newTempV128(); /* fallthru */
5904 case 3: u2 = newTempV128(); i2 = newTempV128(); /* fallthru */
5905 case 2: u1 = newTempV128(); i1 = newTempV128(); /* fallthru */
5906 case 1: u0 = newTempV128(); i0 = newTempV128(); break;
5907 default: vassert(0);
5910 /* -- Multiple 128 or 64 bit stores -- */
5911 if (!isLD) {
5912 switch (nRegs) {
5913 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
5914 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
5915 case 2: assign(u1, getQReg128((tt+1) % 32)); /* fallthru */
5916 case 1: assign(u0, getQReg128((tt+0) % 32)); break;
5917 default: vassert(0);
5919 switch (nRegs) {
5920 case 4: (isQ ? math_INTERLEAVE4_128 : math_INTERLEAVE4_64)
5921 (&i0, &i1, &i2, &i3, sz, u0, u1, u2, u3);
5922 break;
5923 case 3: (isQ ? math_INTERLEAVE3_128 : math_INTERLEAVE3_64)
5924 (&i0, &i1, &i2, sz, u0, u1, u2);
5925 break;
5926 case 2: (isQ ? math_INTERLEAVE2_128 : math_INTERLEAVE2_64)
5927 (&i0, &i1, sz, u0, u1);
5928 break;
5929 case 1: (isQ ? math_INTERLEAVE1_128 : math_INTERLEAVE1_64)
5930 (&i0, sz, u0);
5931 break;
5932 default: vassert(0);
5934 # define MAYBE_NARROW_TO_64(_expr) \
5935 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
5936 UInt step = isQ ? 16 : 8;
5937 switch (nRegs) {
5938 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
5939 MAYBE_NARROW_TO_64(mkexpr(i3)) );
5940 /* fallthru */
5941 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
5942 MAYBE_NARROW_TO_64(mkexpr(i2)) );
5943 /* fallthru */
5944 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
5945 MAYBE_NARROW_TO_64(mkexpr(i1)) );
5946 /* fallthru */
5947 case 1: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
5948 MAYBE_NARROW_TO_64(mkexpr(i0)) );
5949 break;
5950 default: vassert(0);
5952 # undef MAYBE_NARROW_TO_64
5955 /* -- Multiple 128 or 64 bit loads -- */
5956 else /* isLD */ {
5957 UInt step = isQ ? 16 : 8;
5958 IRType loadTy = isQ ? Ity_V128 : Ity_I64;
5959 # define MAYBE_WIDEN_FROM_64(_expr) \
5960 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
5961 switch (nRegs) {
5962 case 4:
5963 assign(i3, MAYBE_WIDEN_FROM_64(
5964 loadLE(loadTy,
5965 binop(Iop_Add64, mkexpr(tTA),
5966 mkU64(3 * step)))));
5967 /* fallthru */
5968 case 3:
5969 assign(i2, MAYBE_WIDEN_FROM_64(
5970 loadLE(loadTy,
5971 binop(Iop_Add64, mkexpr(tTA),
5972 mkU64(2 * step)))));
5973 /* fallthru */
5974 case 2:
5975 assign(i1, MAYBE_WIDEN_FROM_64(
5976 loadLE(loadTy,
5977 binop(Iop_Add64, mkexpr(tTA),
5978 mkU64(1 * step)))));
5979 /* fallthru */
5980 case 1:
5981 assign(i0, MAYBE_WIDEN_FROM_64(
5982 loadLE(loadTy,
5983 binop(Iop_Add64, mkexpr(tTA),
5984 mkU64(0 * step)))));
5985 break;
5986 default:
5987 vassert(0);
5989 # undef MAYBE_WIDEN_FROM_64
5990 switch (nRegs) {
5991 case 4: (isQ ? math_DEINTERLEAVE4_128 : math_DEINTERLEAVE4_64)
5992 (&u0, &u1, &u2, &u3, sz, i0,i1,i2,i3);
5993 break;
5994 case 3: (isQ ? math_DEINTERLEAVE3_128 : math_DEINTERLEAVE3_64)
5995 (&u0, &u1, &u2, sz, i0, i1, i2);
5996 break;
5997 case 2: (isQ ? math_DEINTERLEAVE2_128 : math_DEINTERLEAVE2_64)
5998 (&u0, &u1, sz, i0, i1);
5999 break;
6000 case 1: (isQ ? math_DEINTERLEAVE1_128 : math_DEINTERLEAVE1_64)
6001 (&u0, sz, i0);
6002 break;
6003 default: vassert(0);
6005 switch (nRegs) {
6006 case 4: putQReg128( (tt+3) % 32,
6007 math_MAYBE_ZERO_HI64(bitQ, u3));
6008 /* fallthru */
6009 case 3: putQReg128( (tt+2) % 32,
6010 math_MAYBE_ZERO_HI64(bitQ, u2));
6011 /* fallthru */
6012 case 2: putQReg128( (tt+1) % 32,
6013 math_MAYBE_ZERO_HI64(bitQ, u1));
6014 /* fallthru */
6015 case 1: putQReg128( (tt+0) % 32,
6016 math_MAYBE_ZERO_HI64(bitQ, u0));
6017 break;
6018 default: vassert(0);
6022 /* -- END generate the transfers -- */
6024 /* Do the writeback, if necessary */
6025 if (isPX) {
6026 putIReg64orSP(nn, mkexpr(tWB));
6029 HChar pxStr[20];
6030 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6031 if (isPX) {
6032 if (mm == BITS5(1,1,1,1,1))
6033 vex_sprintf(pxStr, ", #%u", xferSzB);
6034 else
6035 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6037 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6038 DIP("%s%u {v%u.%s .. v%u.%s}, [%s]%s\n",
6039 isLD ? "ld" : "st", nRegs,
6040 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6041 pxStr);
6043 return True;
6045 /* else fall through */
6048 /* ------ LD1/ST1 (multiple 1-elem structs to/from 2 regs ------ */
6049 /* ------ LD1/ST1 (multiple 1-elem structs to/from 3 regs ------ */
6050 /* ------ LD1/ST1 (multiple 1-elem structs to/from 4 regs ------ */
6051 /* 31 29 26 22 21 20 15 11 9 4
6053 0q 001 1000 L 0 00000 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP]
6054 0q 001 1001 L 0 m 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP], step
6056 0q 001 1000 L 0 00000 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP]
6057 0q 001 1001 L 0 m 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP], step
6059 0q 001 1000 L 0 00000 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP]
6060 0q 001 1001 L 0 m 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP], step
6062 T = defined by Q and sz in the normal way
6063 step = if m == 11111 then transfer-size else Xm
6064 xx = case L of 1 -> LD ; 0 -> ST
6066 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
6067 && INSN(21,21) == 0) {
6068 Bool bitQ = INSN(30,30);
6069 Bool isPX = INSN(23,23) == 1;
6070 Bool isLD = INSN(22,22) == 1;
6071 UInt mm = INSN(20,16);
6072 UInt opc = INSN(15,12);
6073 UInt sz = INSN(11,10);
6074 UInt nn = INSN(9,5);
6075 UInt tt = INSN(4,0);
6076 Bool isQ = bitQ == 1;
6077 UInt nRegs = 0;
6078 switch (opc) {
6079 case BITS4(0,0,1,0): nRegs = 4; break;
6080 case BITS4(0,1,1,0): nRegs = 3; break;
6081 case BITS4(1,0,1,0): nRegs = 2; break;
6082 default: break;
6085 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
6086 If we see it, set nRegs to 0 so as to cause the next conditional
6087 to fail. */
6088 if (!isPX && mm != 0)
6089 nRegs = 0;
6091 if (nRegs >= 2 && nRegs <= 4) {
6093 UInt xferSzB = (isQ ? 16 : 8) * nRegs;
6095 /* Generate the transfer address (TA) and if necessary the
6096 writeback address (WB) */
6097 IRTemp tTA = newTemp(Ity_I64);
6098 assign(tTA, getIReg64orSP(nn));
6099 if (nn == 31) { /* FIXME generate stack alignment check */ }
6100 IRTemp tWB = IRTemp_INVALID;
6101 if (isPX) {
6102 tWB = newTemp(Ity_I64);
6103 assign(tWB, binop(Iop_Add64,
6104 mkexpr(tTA),
6105 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6106 : getIReg64orZR(mm)));
6109 /* -- BEGIN generate the transfers -- */
6111 IRTemp u0, u1, u2, u3;
6112 u0 = u1 = u2 = u3 = IRTemp_INVALID;
6113 switch (nRegs) {
6114 case 4: u3 = newTempV128(); /* fallthru */
6115 case 3: u2 = newTempV128(); /* fallthru */
6116 case 2: u1 = newTempV128();
6117 u0 = newTempV128(); break;
6118 default: vassert(0);
6121 /* -- Multiple 128 or 64 bit stores -- */
6122 if (!isLD) {
6123 switch (nRegs) {
6124 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
6125 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
6126 case 2: assign(u1, getQReg128((tt+1) % 32));
6127 assign(u0, getQReg128((tt+0) % 32)); break;
6128 default: vassert(0);
6130 # define MAYBE_NARROW_TO_64(_expr) \
6131 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
6132 UInt step = isQ ? 16 : 8;
6133 switch (nRegs) {
6134 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
6135 MAYBE_NARROW_TO_64(mkexpr(u3)) );
6136 /* fallthru */
6137 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
6138 MAYBE_NARROW_TO_64(mkexpr(u2)) );
6139 /* fallthru */
6140 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
6141 MAYBE_NARROW_TO_64(mkexpr(u1)) );
6142 storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
6143 MAYBE_NARROW_TO_64(mkexpr(u0)) );
6144 break;
6145 default: vassert(0);
6147 # undef MAYBE_NARROW_TO_64
6150 /* -- Multiple 128 or 64 bit loads -- */
6151 else /* isLD */ {
6152 UInt step = isQ ? 16 : 8;
6153 IRType loadTy = isQ ? Ity_V128 : Ity_I64;
6154 # define MAYBE_WIDEN_FROM_64(_expr) \
6155 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
6156 switch (nRegs) {
6157 case 4:
6158 assign(u3, MAYBE_WIDEN_FROM_64(
6159 loadLE(loadTy,
6160 binop(Iop_Add64, mkexpr(tTA),
6161 mkU64(3 * step)))));
6162 /* fallthru */
6163 case 3:
6164 assign(u2, MAYBE_WIDEN_FROM_64(
6165 loadLE(loadTy,
6166 binop(Iop_Add64, mkexpr(tTA),
6167 mkU64(2 * step)))));
6168 /* fallthru */
6169 case 2:
6170 assign(u1, MAYBE_WIDEN_FROM_64(
6171 loadLE(loadTy,
6172 binop(Iop_Add64, mkexpr(tTA),
6173 mkU64(1 * step)))));
6174 assign(u0, MAYBE_WIDEN_FROM_64(
6175 loadLE(loadTy,
6176 binop(Iop_Add64, mkexpr(tTA),
6177 mkU64(0 * step)))));
6178 break;
6179 default:
6180 vassert(0);
6182 # undef MAYBE_WIDEN_FROM_64
6183 switch (nRegs) {
6184 case 4: putQReg128( (tt+3) % 32,
6185 math_MAYBE_ZERO_HI64(bitQ, u3));
6186 /* fallthru */
6187 case 3: putQReg128( (tt+2) % 32,
6188 math_MAYBE_ZERO_HI64(bitQ, u2));
6189 /* fallthru */
6190 case 2: putQReg128( (tt+1) % 32,
6191 math_MAYBE_ZERO_HI64(bitQ, u1));
6192 putQReg128( (tt+0) % 32,
6193 math_MAYBE_ZERO_HI64(bitQ, u0));
6194 break;
6195 default: vassert(0);
6199 /* -- END generate the transfers -- */
6201 /* Do the writeback, if necessary */
6202 if (isPX) {
6203 putIReg64orSP(nn, mkexpr(tWB));
6206 HChar pxStr[20];
6207 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6208 if (isPX) {
6209 if (mm == BITS5(1,1,1,1,1))
6210 vex_sprintf(pxStr, ", #%u", xferSzB);
6211 else
6212 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6214 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6215 DIP("%s1 {v%u.%s .. v%u.%s}, [%s]%s\n",
6216 isLD ? "ld" : "st",
6217 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6218 pxStr);
6220 return True;
6222 /* else fall through */
6225 /* ---------- LD1R (single structure, replicate) ---------- */
6226 /* ---------- LD2R (single structure, replicate) ---------- */
6227 /* ---------- LD3R (single structure, replicate) ---------- */
6228 /* ---------- LD4R (single structure, replicate) ---------- */
6229 /* 31 29 22 20 15 11 9 4
6230 0q 001 1010 10 00000 110 0 sz n t LD1R {Vt.T}, [Xn|SP]
6231 0q 001 1011 10 m 110 0 sz n t LD1R {Vt.T}, [Xn|SP], step
6233 0q 001 1010 11 00000 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP]
6234 0q 001 1011 11 m 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP], step
6236 0q 001 1010 10 00000 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP]
6237 0q 001 1011 10 m 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP], step
6239 0q 001 1010 11 00000 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP]
6240 0q 001 1011 11 m 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP], step
6242 step = if m == 11111 then transfer-size else Xm
6244 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)
6245 && INSN(22,22) == 1 && INSN(15,14) == BITS2(1,1)
6246 && INSN(12,12) == 0) {
6247 UInt bitQ = INSN(30,30);
6248 Bool isPX = INSN(23,23) == 1;
6249 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6250 UInt mm = INSN(20,16);
6251 UInt sz = INSN(11,10);
6252 UInt nn = INSN(9,5);
6253 UInt tt = INSN(4,0);
6255 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6256 if (isPX || mm == 0) {
6258 IRType ty = integerIRTypeOfSize(1 << sz);
6260 UInt laneSzB = 1 << sz;
6261 UInt xferSzB = laneSzB * nRegs;
6263 /* Generate the transfer address (TA) and if necessary the
6264 writeback address (WB) */
6265 IRTemp tTA = newTemp(Ity_I64);
6266 assign(tTA, getIReg64orSP(nn));
6267 if (nn == 31) { /* FIXME generate stack alignment check */ }
6268 IRTemp tWB = IRTemp_INVALID;
6269 if (isPX) {
6270 tWB = newTemp(Ity_I64);
6271 assign(tWB, binop(Iop_Add64,
6272 mkexpr(tTA),
6273 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6274 : getIReg64orZR(mm)));
6277 /* Do the writeback, if necessary */
6278 if (isPX) {
6279 putIReg64orSP(nn, mkexpr(tWB));
6282 IRTemp e0, e1, e2, e3, v0, v1, v2, v3;
6283 e0 = e1 = e2 = e3 = v0 = v1 = v2 = v3 = IRTemp_INVALID;
6284 switch (nRegs) {
6285 case 4:
6286 e3 = newTemp(ty);
6287 assign(e3, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6288 mkU64(3 * laneSzB))));
6289 v3 = math_DUP_TO_V128(e3, ty);
6290 putQReg128((tt+3) % 32, math_MAYBE_ZERO_HI64(bitQ, v3));
6291 /* fallthrough */
6292 case 3:
6293 e2 = newTemp(ty);
6294 assign(e2, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6295 mkU64(2 * laneSzB))));
6296 v2 = math_DUP_TO_V128(e2, ty);
6297 putQReg128((tt+2) % 32, math_MAYBE_ZERO_HI64(bitQ, v2));
6298 /* fallthrough */
6299 case 2:
6300 e1 = newTemp(ty);
6301 assign(e1, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6302 mkU64(1 * laneSzB))));
6303 v1 = math_DUP_TO_V128(e1, ty);
6304 putQReg128((tt+1) % 32, math_MAYBE_ZERO_HI64(bitQ, v1));
6305 /* fallthrough */
6306 case 1:
6307 e0 = newTemp(ty);
6308 assign(e0, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6309 mkU64(0 * laneSzB))));
6310 v0 = math_DUP_TO_V128(e0, ty);
6311 putQReg128((tt+0) % 32, math_MAYBE_ZERO_HI64(bitQ, v0));
6312 break;
6313 default:
6314 vassert(0);
6317 HChar pxStr[20];
6318 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6319 if (isPX) {
6320 if (mm == BITS5(1,1,1,1,1))
6321 vex_sprintf(pxStr, ", #%u", xferSzB);
6322 else
6323 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6325 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6326 DIP("ld%ur {v%u.%s .. v%u.%s}, [%s]%s\n",
6327 nRegs,
6328 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6329 pxStr);
6331 return True;
6333 /* else fall through */
6336 /* ------ LD1/ST1 (single structure, to/from one lane) ------ */
6337 /* ------ LD2/ST2 (single structure, to/from one lane) ------ */
6338 /* ------ LD3/ST3 (single structure, to/from one lane) ------ */
6339 /* ------ LD4/ST4 (single structure, to/from one lane) ------ */
6340 /* 31 29 22 21 20 15 11 9 4
6341 0q 001 1010 L 0 00000 xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP]
6342 0q 001 1011 L 0 m xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP], step
6344 0q 001 1010 L 1 00000 xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP]
6345 0q 001 1011 L 1 m xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP], step
6347 0q 001 1010 L 0 00000 xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP]
6348 0q 001 1011 L 0 m xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP], step
6350 0q 001 1010 L 1 00000 xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP]
6351 0q 001 1011 L 1 m xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP], step
6353 step = if m == 11111 then transfer-size else Xm
6354 op = case L of 1 -> LD ; 0 -> ST
6356 laneszB,ix = case xx:q:S:sz of 00:b:b:bb -> 1, bbbb
6357 01:b:b:b0 -> 2, bbb
6358 10:b:b:00 -> 4, bb
6359 10:b:0:01 -> 8, b
6361 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)) {
6362 UInt bitQ = INSN(30,30);
6363 Bool isPX = INSN(23,23) == 1;
6364 Bool isLD = INSN(22,22) == 1;
6365 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6366 UInt mm = INSN(20,16);
6367 UInt xx = INSN(15,14);
6368 UInt bitS = INSN(12,12);
6369 UInt sz = INSN(11,10);
6370 UInt nn = INSN(9,5);
6371 UInt tt = INSN(4,0);
6373 Bool valid = True;
6375 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6376 if (!isPX && mm != 0)
6377 valid = False;
6379 UInt laneSzB = 0; /* invalid */
6380 UInt ix = 16; /* invalid */
6382 UInt xx_q_S_sz = (xx << 4) | (bitQ << 3) | (bitS << 2) | sz;
6383 switch (xx_q_S_sz) {
6384 case 0x00: case 0x01: case 0x02: case 0x03:
6385 case 0x04: case 0x05: case 0x06: case 0x07:
6386 case 0x08: case 0x09: case 0x0A: case 0x0B:
6387 case 0x0C: case 0x0D: case 0x0E: case 0x0F:
6388 laneSzB = 1; ix = xx_q_S_sz & 0xF;
6389 break;
6390 case 0x10: case 0x12: case 0x14: case 0x16:
6391 case 0x18: case 0x1A: case 0x1C: case 0x1E:
6392 laneSzB = 2; ix = (xx_q_S_sz >> 1) & 7;
6393 break;
6394 case 0x20: case 0x24: case 0x28: case 0x2C:
6395 laneSzB = 4; ix = (xx_q_S_sz >> 2) & 3;
6396 break;
6397 case 0x21: case 0x29:
6398 laneSzB = 8; ix = (xx_q_S_sz >> 3) & 1;
6399 break;
6400 default:
6401 break;
6404 if (valid && laneSzB != 0) {
6406 IRType ty = integerIRTypeOfSize(laneSzB);
6407 UInt xferSzB = laneSzB * nRegs;
6409 /* Generate the transfer address (TA) and if necessary the
6410 writeback address (WB) */
6411 IRTemp tTA = newTemp(Ity_I64);
6412 assign(tTA, getIReg64orSP(nn));
6413 if (nn == 31) { /* FIXME generate stack alignment check */ }
6414 IRTemp tWB = IRTemp_INVALID;
6415 if (isPX) {
6416 tWB = newTemp(Ity_I64);
6417 assign(tWB, binop(Iop_Add64,
6418 mkexpr(tTA),
6419 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6420 : getIReg64orZR(mm)));
6423 /* Do the writeback, if necessary */
6424 if (isPX) {
6425 putIReg64orSP(nn, mkexpr(tWB));
6428 switch (nRegs) {
6429 case 4: {
6430 IRExpr* addr
6431 = binop(Iop_Add64, mkexpr(tTA), mkU64(3 * laneSzB));
6432 if (isLD) {
6433 putQRegLane((tt+3) % 32, ix, loadLE(ty, addr));
6434 } else {
6435 storeLE(addr, getQRegLane((tt+3) % 32, ix, ty));
6438 /* fallthrough */
6439 case 3: {
6440 IRExpr* addr
6441 = binop(Iop_Add64, mkexpr(tTA), mkU64(2 * laneSzB));
6442 if (isLD) {
6443 putQRegLane((tt+2) % 32, ix, loadLE(ty, addr));
6444 } else {
6445 storeLE(addr, getQRegLane((tt+2) % 32, ix, ty));
6448 /* fallthrough */
6449 case 2: {
6450 IRExpr* addr
6451 = binop(Iop_Add64, mkexpr(tTA), mkU64(1 * laneSzB));
6452 if (isLD) {
6453 putQRegLane((tt+1) % 32, ix, loadLE(ty, addr));
6454 } else {
6455 storeLE(addr, getQRegLane((tt+1) % 32, ix, ty));
6458 /* fallthrough */
6459 case 1: {
6460 IRExpr* addr
6461 = binop(Iop_Add64, mkexpr(tTA), mkU64(0 * laneSzB));
6462 if (isLD) {
6463 putQRegLane((tt+0) % 32, ix, loadLE(ty, addr));
6464 } else {
6465 storeLE(addr, getQRegLane((tt+0) % 32, ix, ty));
6467 break;
6469 default:
6470 vassert(0);
6473 HChar pxStr[20];
6474 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6475 if (isPX) {
6476 if (mm == BITS5(1,1,1,1,1))
6477 vex_sprintf(pxStr, ", #%u", xferSzB);
6478 else
6479 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6481 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6482 DIP("%s%u {v%u.%s .. v%u.%s}[%u], [%s]%s\n",
6483 isLD ? "ld" : "st", nRegs,
6484 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr,
6485 ix, nameIReg64orSP(nn), pxStr);
6487 return True;
6489 /* else fall through */
6492 /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
6493 /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
6494 /* 31 29 23 20 14 9 4
6495 sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP]
6496 sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP]
6497 sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP]
6498 sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP]
6500 /* For the "standard" implementation we pass through the LL and SC to
6501 the host. For the "fallback" implementation, for details see
6502 https://bugs.kde.org/show_bug.cgi?id=344524 and
6503 https://bugs.kde.org/show_bug.cgi?id=369459,
6504 but in short:
6506 LoadLinked(addr)
6507 gs.LLsize = load_size // 1, 2, 4 or 8
6508 gs.LLaddr = addr
6509 gs.LLdata = zeroExtend(*addr)
6511 StoreCond(addr, data)
6512 tmp_LLsize = gs.LLsize
6513 gs.LLsize = 0 // "no transaction"
6514 if tmp_LLsize != store_size -> fail
6515 if addr != gs.LLaddr -> fail
6516 if zeroExtend(*addr) != gs.LLdata -> fail
6517 cas_ok = CAS(store_size, addr, gs.LLdata -> data)
6518 if !cas_ok -> fail
6519 succeed
6521 When thread scheduled
6522 gs.LLsize = 0 // "no transaction"
6523 (coregrind/m_scheduler/scheduler.c, run_thread_for_a_while()
6524 has to do this bit)
6526 if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
6527 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
6528 && INSN(14,10) == BITS5(1,1,1,1,1)) {
6529 UInt szBlg2 = INSN(31,30);
6530 Bool isLD = INSN(22,22) == 1;
6531 Bool isAcqOrRel = INSN(15,15) == 1;
6532 UInt ss = INSN(20,16);
6533 UInt nn = INSN(9,5);
6534 UInt tt = INSN(4,0);
6536 vassert(szBlg2 < 4);
6537 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6538 IRType ty = integerIRTypeOfSize(szB);
6539 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
6541 IRTemp ea = newTemp(Ity_I64);
6542 assign(ea, getIReg64orSP(nn));
6543 /* FIXME generate check that ea is szB-aligned */
6545 if (isLD && ss == BITS5(1,1,1,1,1)) {
6546 IRTemp res = newTemp(ty);
6547 if (abiinfo->guest__use_fallback_LLSC) {
6548 // Do the load first so we don't update any guest state
6549 // if it faults.
6550 IRTemp loaded_data64 = newTemp(Ity_I64);
6551 assign(loaded_data64, widenUto64(ty, loadLE(ty, mkexpr(ea))));
6552 stmt( IRStmt_Put( OFFB_LLSC_DATA, mkexpr(loaded_data64) ));
6553 stmt( IRStmt_Put( OFFB_LLSC_ADDR, mkexpr(ea) ));
6554 stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(szB) ));
6555 putIReg64orZR(tt, mkexpr(loaded_data64));
6556 } else {
6557 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
6558 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
6560 if (isAcqOrRel) {
6561 stmt(IRStmt_MBE(Imbe_Fence));
6563 DIP("ld%sx%s %s, [%s] %s\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6564 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn),
6565 abiinfo->guest__use_fallback_LLSC
6566 ? "(fallback implementation)" : "");
6567 return True;
6569 if (!isLD) {
6570 if (isAcqOrRel) {
6571 stmt(IRStmt_MBE(Imbe_Fence));
6573 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
6574 if (abiinfo->guest__use_fallback_LLSC) {
6575 // This is really ugly, since we don't have any way to do
6576 // proper if-then-else. First, set up as if the SC failed,
6577 // and jump forwards if it really has failed.
6579 // Continuation address
6580 IRConst* nia = IRConst_U64(guest_PC_curr_instr + 4);
6582 // "the SC failed". Any non-zero value means failure.
6583 putIReg64orZR(ss, mkU64(1));
6585 IRTemp tmp_LLsize = newTemp(Ity_I64);
6586 assign(tmp_LLsize, IRExpr_Get(OFFB_LLSC_SIZE, Ity_I64));
6587 stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(0) // "no transaction"
6589 // Fail if no or wrong-size transaction
6590 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
6591 stmt( IRStmt_Exit(
6592 binop(Iop_CmpNE64, mkexpr(tmp_LLsize), mkU64(szB)),
6593 Ijk_Boring, nia, OFFB_PC
6595 // Fail if the address doesn't match the LL address
6596 stmt( IRStmt_Exit(
6597 binop(Iop_CmpNE64, mkexpr(ea),
6598 IRExpr_Get(OFFB_LLSC_ADDR, Ity_I64)),
6599 Ijk_Boring, nia, OFFB_PC
6601 // Fail if the data doesn't match the LL data
6602 IRTemp llsc_data64 = newTemp(Ity_I64);
6603 assign(llsc_data64, IRExpr_Get(OFFB_LLSC_DATA, Ity_I64));
6604 stmt( IRStmt_Exit(
6605 binop(Iop_CmpNE64, widenUto64(ty, loadLE(ty, mkexpr(ea))),
6606 mkexpr(llsc_data64)),
6607 Ijk_Boring, nia, OFFB_PC
6609 // Try to CAS the new value in.
6610 IRTemp old = newTemp(ty);
6611 IRTemp expd = newTemp(ty);
6612 assign(expd, narrowFrom64(ty, mkexpr(llsc_data64)));
6613 stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID, old,
6614 Iend_LE, mkexpr(ea),
6615 /*expdHi*/NULL, mkexpr(expd),
6616 /*dataHi*/NULL, data
6617 )));
6618 // Fail if the CAS failed (viz, old != expd)
6619 stmt( IRStmt_Exit(
6620 binop(Iop_CmpNE64,
6621 widenUto64(ty, mkexpr(old)),
6622 widenUto64(ty, mkexpr(expd))),
6623 Ijk_Boring, nia, OFFB_PC
6625 // Otherwise we succeeded (!)
6626 putIReg64orZR(ss, mkU64(0));
6627 } else {
6628 IRTemp res = newTemp(Ity_I1);
6629 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
6630 /* IR semantics: res is 1 if store succeeds, 0 if it fails.
6631 Need to set rS to 1 on failure, 0 on success. */
6632 putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
6633 mkU64(1)));
6635 DIP("st%sx%s %s, %s, [%s] %s\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6636 nameIRegOrZR(False, ss),
6637 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn),
6638 abiinfo->guest__use_fallback_LLSC
6639 ? "(fallback implementation)" : "");
6640 return True;
6642 /* else fall through */
6645 /* ------------------ LDA{R,RH,RB} ------------------ */
6646 /* ------------------ STL{R,RH,RB} ------------------ */
6647 /* 31 29 23 20 14 9 4
6648 sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn|SP]
6649 sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn|SP]
6651 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
6652 && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
6653 UInt szBlg2 = INSN(31,30);
6654 Bool isLD = INSN(22,22) == 1;
6655 UInt nn = INSN(9,5);
6656 UInt tt = INSN(4,0);
6658 vassert(szBlg2 < 4);
6659 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6660 IRType ty = integerIRTypeOfSize(szB);
6661 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
6663 IRTemp ea = newTemp(Ity_I64);
6664 assign(ea, getIReg64orSP(nn));
6665 /* FIXME generate check that ea is szB-aligned */
6667 if (isLD) {
6668 IRTemp res = newTemp(ty);
6669 assign(res, loadLE(ty, mkexpr(ea)));
6670 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
6671 stmt(IRStmt_MBE(Imbe_Fence));
6672 DIP("lda%s %s, [%s]\n", suffix[szBlg2],
6673 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6674 } else {
6675 stmt(IRStmt_MBE(Imbe_Fence));
6676 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
6677 storeLE(mkexpr(ea), data);
6678 DIP("stl%s %s, [%s]\n", suffix[szBlg2],
6679 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6681 return True;
6684 /* The PRFM cases that follow are possibly allow Rt values (the
6685 prefetch operation) which are not allowed by the documentation.
6686 This should be looked into. */
6687 /* ------------------ PRFM (immediate) ------------------ */
6688 /* 31 21 9 4
6689 11 111 00110 imm12 n t PRFM pfrop=Rt, [Xn|SP, #pimm]
6691 if (INSN(31,22) == BITS10(1,1,1,1,1,0,0,1,1,0)) {
6692 UInt imm12 = INSN(21,10);
6693 UInt nn = INSN(9,5);
6694 UInt tt = INSN(4,0);
6695 /* Generating any IR here is pointless, except for documentation
6696 purposes, as it will get optimised away later. */
6697 IRTemp ea = newTemp(Ity_I64);
6698 assign(ea, binop(Iop_Add64, getIReg64orSP(nn), mkU64(imm12 * 8)));
6699 DIP("prfm prfop=%u, [%s, #%u]\n", tt, nameIReg64orSP(nn), imm12 * 8);
6700 return True;
6703 /* ------------------ PRFM (register) ------------------ */
6704 /* 31 29 22 20 15 12 11 9 4
6705 11 1110001 01 Rm opt S 10 Rn Rt PRFM pfrop=Rt, [Xn|SP, R<m>{ext/sh}]
6707 if (INSN(31,21) == BITS11(1,1,1,1,1,0,0,0,1,0,1)
6708 && INSN(11,10) == BITS2(1,0)) {
6709 HChar dis_buf[64];
6710 UInt tt = INSN(4,0);
6711 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
6712 if (ea != IRTemp_INVALID) {
6713 /* No actual code to generate. */
6714 DIP("prfm prfop=%u, %s\n", tt, dis_buf);
6715 return True;
6719 /* ------------------ PRFM (unscaled offset) ------------------ */
6720 /* 31 29 22 20 11 9 4
6721 11 1110001 00 imm9 00 Rn Rt PRFM pfrop=Rt, [Xn|SP, #simm]
6723 if (INSN(31,21) == BITS11(1,1, 1,1,1,0,0,0,1, 0,0)
6724 && INSN(11,10) == BITS2(0,0)) {
6725 ULong imm9 = INSN(20,12);
6726 UInt nn = INSN(9,5);
6727 UInt tt = INSN(4,0);
6728 ULong offset = sx_to_64(imm9, 9);
6729 IRTemp ea = newTemp(Ity_I64);
6730 assign(ea, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offset)));
6731 /* No actual code to generate. */
6732 DIP("prfum prfop=%u, [%s, #0x%llx]\n", tt, nameIReg64orSP(nn), offset);
6733 return True;
6736 /* ---------------- ARMv8.1-LSE: Atomic Memory Operations ---------------- */
6737 /* 31 29 23 22 21 20 15 11 9 4
6738 sz 111000 A R 1 s 0000 00 n t LDADD{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6739 sz 111000 A R 1 s 0001 00 n t LDCLR{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6740 sz 111000 A R 1 s 0010 00 n t LDEOR{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6741 sz 111000 A R 1 s 0011 00 n t LDSET{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6742 sz 111000 A R 1 s 0100 00 n t LDSMAX{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6743 sz 111000 A R 1 s 0101 00 n t LDSMIN{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6744 sz 111000 A R 1 s 0110 00 n t LDUMAX{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6745 sz 111000 A R 1 s 0111 00 n t LDUMIN{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6746 sz 111000 A R 1 s 1000 00 n t SWP{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6748 if (INSN(29,24) == BITS6(1,1,1,0,0,0)
6749 && INSN(21,21) == 1
6750 && (INSN(15,12) <= BITS4(1,0,0,0))
6751 && INSN(11,10) == BITS2(0,0)) {
6752 UInt szBlg2 = INSN(31,30);
6753 Bool isAcq = INSN(23,23) == 1;
6754 Bool isRel = INSN(22,22) == 1;
6755 UInt ss = INSN(20,16);
6756 UInt opc = INSN(15,12);
6757 UInt nn = INSN(9,5);
6758 UInt tt = INSN(4,0);
6760 const HChar* nm = NULL;
6761 const HChar* suffix[4] = { "b", "h", "", "" };
6763 vassert(szBlg2 < 4);
6764 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 bytes*/
6765 IRType ty = integerIRTypeOfSize(szB);
6766 Bool is64 = szB == 8;
6767 Bool isSigned = (opc == 4) || (opc == 5) /*smax || smin*/;
6769 // IR used to emulate these atomic memory ops:
6770 // 1) barrier
6771 // 2) load
6772 // 3) widen operands and do arithmetic/logic op
6773 // 4) cas to see if target memory updated
6774 // 5) barrier
6775 // 6) repeat from 1) if cas says target memory not updated
6776 // 7) update register
6778 IRTemp ea = newTemp(Ity_I64);
6779 assign(ea, getIReg64orSP(nn));
6781 // Insert barrier before loading for acquire and acquire-release variants:
6782 // A and AL.
6783 if (isAcq && (tt != 31))
6784 stmt(IRStmt_MBE(Imbe_Fence));
6786 // Load LHS from memory, RHS from register.
6787 IRTemp orig = newTemp(ty);
6788 assign(orig, loadLE(ty, mkexpr(ea)));
6789 IRExpr *lhs = mkexpr(orig);
6790 IRExpr *rhs = narrowFrom64(ty, getIReg64orZR(ss));
6791 IRExpr *res = NULL;
6793 lhs = isSigned ? widenSto64(ty, lhs) : widenUto64(ty, lhs);
6794 rhs = isSigned ? widenSto64(ty, rhs) : widenUto64(ty, rhs);
6796 // Perform the operation.
6797 switch (opc) {
6798 case 0:
6799 nm = "ldadd";
6800 res = binop(Iop_Add64, lhs, rhs);
6801 break;
6802 case 1:
6803 nm = "ldclr";
6804 res = binop(Iop_And64, lhs, unop(mkNOT(Ity_I64), rhs));
6805 break;
6806 case 2:
6807 nm = "ldeor";
6808 res = binop(Iop_Xor64, lhs, rhs);
6809 break;
6810 case 3:
6811 nm = "ldset";
6812 res = binop(Iop_Or64, lhs, rhs);
6813 break;
6814 case 4:
6815 nm = "ldsmax";
6816 res = IRExpr_ITE(binop(Iop_CmpLT64S, lhs, rhs), rhs, lhs);
6817 break;
6818 case 5:
6819 nm = "ldsmin";
6820 res = IRExpr_ITE(binop(Iop_CmpLT64S, lhs, rhs), lhs, rhs);
6821 break;
6822 case 6:
6823 nm = "ldumax";
6824 res = IRExpr_ITE(binop(Iop_CmpLT64U, lhs, rhs), rhs, rhs);
6825 break;
6826 case 7:
6827 nm = "ldumin";
6828 res = IRExpr_ITE(binop(Iop_CmpLT64U, lhs, rhs), lhs, rhs);
6829 break;
6830 case 8:
6831 nm = "swp";
6832 res = lhs;
6833 break;
6834 default:
6835 vassert(0);
6836 break;
6839 // Store the result back if LHS remains unchanged in memory.
6840 IRTemp old = newTemp(ty);
6841 stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID, old,
6842 Iend_LE, mkexpr(ea),
6843 /*expdHi*/NULL, mkexpr(orig),
6844 /*dataHi*/NULL, narrowFrom64(ty, res))) );
6846 // Insert barrier after storing for release and acquire-release variants:
6847 // L and AL.
6848 if (isRel)
6849 stmt(IRStmt_MBE(Imbe_Fence));
6851 // Retry if the CAS failed (i.e. when old != orig).
6852 IRConst* nia = IRConst_U64(guest_PC_curr_instr);
6853 stmt( IRStmt_Exit(
6854 binop(Iop_CasCmpNE64,
6855 widenUto64(ty, mkexpr(old)),
6856 widenUto64(ty, mkexpr(orig))),
6857 Ijk_Boring, nia, OFFB_PC ));
6858 // Otherwise we succeeded.
6859 putIReg64orZR(tt, widenUto64(ty, mkexpr(old)));
6861 DIP("%s%s%s%s %s, %s, [%s]\n", nm, isAcq ? "a" : "", isRel ? "l" : "",
6862 suffix[szBlg2], nameIRegOrZR(is64, ss), nameIRegOrZR(is64, tt),
6863 nameIReg64orSP(nn));
6864 return True;
6867 /* ------------------ ARMv8.1-LSE: Compare-and-Swap ------------------ */
6868 /* 31 29 22 21 20 15 14 9 4
6869 sz 0010001 A 1 s R 11111 n t CAS{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6871 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
6872 && INSN(21,21) == 1
6873 && INSN(14,10) == BITS5(1,1,1,1,1)) {
6874 UInt szBlg2 = INSN(31,30);
6875 Bool isAcq = INSN(22,22) == 1;
6876 Bool isRel = INSN(15,15) == 1;
6877 UInt ss = INSN(20,16);
6878 UInt nn = INSN(9,5);
6879 UInt tt = INSN(4,0);
6881 const HChar* suffix[4] = { "b", "h", "", "" };
6883 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6884 IRType ty = integerIRTypeOfSize(szB);
6885 Bool is64 = szB == 8;
6887 IRExpr *exp = narrowFrom64(ty, getIReg64orZR(ss));
6888 IRExpr *new = narrowFrom64(ty, getIReg64orZR(tt));
6890 if (isAcq)
6891 stmt(IRStmt_MBE(Imbe_Fence));
6893 // Store the result back if LHS remains unchanged in memory.
6894 IRTemp old = newTemp(ty);
6895 stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID, old,
6896 Iend_LE, getIReg64orSP(nn),
6897 /*expdHi*/NULL, exp,
6898 /*dataHi*/NULL, new)) );
6900 if (isRel)
6901 stmt(IRStmt_MBE(Imbe_Fence));
6903 putIReg64orZR(ss, widenUto64(ty, mkexpr(old)));
6904 DIP("cas%s%s%s %s, %s, [%s]\n",
6905 isAcq ? "a" : "", isRel ? "l" : "", suffix[szBlg2],
6906 nameIRegOrZR(is64, ss), nameIRegOrZR(is64, tt), nameIReg64orSP(nn));
6907 return True;
6910 /* ---------------- ARMv8.1-LSE: Compare-and-Swap Pair --------------- */
6911 /* 31 30 29 22 21 20 15 14 9 4
6912 0 sz 0010000 A 1 s R 11111 n t CASP{,A}{,L} <Rs>, <Rt>, [<Xn|SP>]
6914 if (INSN(31,31) == 0
6915 && INSN(29,23) == BITS7(0,0,1,0,0,0,0)
6916 && INSN(21,21) == 1
6917 && INSN(14,10) == BITS5(1,1,1,1,1)) {
6918 UInt is64 = INSN(30,30);
6919 Bool isAcq = INSN(22,22) == 1;
6920 Bool isRel = INSN(15,15) == 1;
6921 UInt ss = INSN(20,16);
6922 UInt nn = INSN(9,5);
6923 UInt tt = INSN(4,0);
6925 if ((ss & 0x1) || (tt & 0x1)) {
6926 /* undefined; fall through */
6927 } else {
6928 IRExpr *expLo = getIRegOrZR(is64, ss);
6929 IRExpr *expHi = getIRegOrZR(is64, ss + 1);
6930 IRExpr *newLo = getIRegOrZR(is64, tt);
6931 IRExpr *newHi = getIRegOrZR(is64, tt + 1);
6932 IRTemp oldLo = newTemp(is64 ? Ity_I64 : Ity_I32);
6933 IRTemp oldHi = newTemp(is64 ? Ity_I64 : Ity_I32);
6935 if (isAcq)
6936 stmt(IRStmt_MBE(Imbe_Fence));
6938 stmt( IRStmt_CAS(mkIRCAS(oldHi, oldLo,
6939 Iend_LE, getIReg64orSP(nn),
6940 expHi, expLo,
6941 newHi, newLo)) );
6943 if (isRel)
6944 stmt(IRStmt_MBE(Imbe_Fence));
6946 putIRegOrZR(is64, ss, mkexpr(oldLo));
6947 putIRegOrZR(is64, ss+1, mkexpr(oldHi));
6948 DIP("casp%s%s %s, %s, %s, %s, [%s]\n",
6949 isAcq ? "a" : "", isRel ? "l" : "",
6950 nameIRegOrZR(is64, ss), nameIRegOrZR(is64, ss+1),
6951 nameIRegOrZR(is64, tt), nameIRegOrZR(is64, tt+1),
6952 nameIReg64orSP(nn));
6953 return True;
6957 if (sigill_diag) {
6958 vex_printf("ARM64 front end: load_store\n");
6961 return False;
6962 # undef INSN
6966 /*------------------------------------------------------------*/
6967 /*--- Control flow and misc instructions ---*/
6968 /*------------------------------------------------------------*/
6970 static
6971 Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn,
6972 const VexArchInfo* archinfo,
6973 const VexAbiInfo* abiinfo, Bool sigill_diag)
6975 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
6977 /* ---------------------- B cond ----------------------- */
6978 /* 31 24 4 3
6979 0101010 0 imm19 0 cond */
6980 if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
6981 UInt cond = INSN(3,0);
6982 ULong uimm64 = INSN(23,5) << 2;
6983 Long simm64 = (Long)sx_to_64(uimm64, 21);
6984 vassert(dres->whatNext == Dis_Continue);
6985 vassert(dres->len == 4);
6986 vassert(dres->jk_StopHere == Ijk_INVALID);
6987 stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
6988 Ijk_Boring,
6989 IRConst_U64(guest_PC_curr_instr + simm64),
6990 OFFB_PC) );
6991 putPC(mkU64(guest_PC_curr_instr + 4));
6992 dres->whatNext = Dis_StopHere;
6993 dres->jk_StopHere = Ijk_Boring;
6994 DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
6995 return True;
6998 /* -------------------- B{L} uncond -------------------- */
6999 if (INSN(30,26) == BITS5(0,0,1,0,1)) {
7000 /* 000101 imm26 B (PC + sxTo64(imm26 << 2))
7001 100101 imm26 B (PC + sxTo64(imm26 << 2))
7003 UInt bLink = INSN(31,31);
7004 ULong uimm64 = INSN(25,0) << 2;
7005 Long simm64 = (Long)sx_to_64(uimm64, 28);
7006 if (bLink) {
7007 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
7009 putPC(mkU64(guest_PC_curr_instr + simm64));
7010 dres->whatNext = Dis_StopHere;
7011 dres->jk_StopHere = Ijk_Call;
7012 DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
7013 guest_PC_curr_instr + simm64);
7014 return True;
7017 /* --------------------- B{L} reg --------------------- */
7018 /* 31 24 22 20 15 9 4
7019 1101011 00 10 11111 000000 nn 00000 RET Rn
7020 1101011 00 01 11111 000000 nn 00000 CALL Rn
7021 1101011 00 00 11111 000000 nn 00000 JMP Rn
7023 if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
7024 && INSN(20,16) == BITS5(1,1,1,1,1)
7025 && INSN(15,10) == BITS6(0,0,0,0,0,0)
7026 && INSN(4,0) == BITS5(0,0,0,0,0)) {
7027 UInt branch_type = INSN(22,21);
7028 UInt nn = INSN(9,5);
7029 if (branch_type == BITS2(1,0) /* RET */) {
7030 putPC(getIReg64orZR(nn));
7031 dres->whatNext = Dis_StopHere;
7032 dres->jk_StopHere = Ijk_Ret;
7033 DIP("ret %s\n", nameIReg64orZR(nn));
7034 return True;
7036 if (branch_type == BITS2(0,1) /* CALL */) {
7037 IRTemp dst = newTemp(Ity_I64);
7038 assign(dst, getIReg64orZR(nn));
7039 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
7040 putPC(mkexpr(dst));
7041 dres->whatNext = Dis_StopHere;
7042 dres->jk_StopHere = Ijk_Call;
7043 DIP("blr %s\n", nameIReg64orZR(nn));
7044 return True;
7046 if (branch_type == BITS2(0,0) /* JMP */) {
7047 putPC(getIReg64orZR(nn));
7048 dres->whatNext = Dis_StopHere;
7049 dres->jk_StopHere = Ijk_Boring;
7050 DIP("jmp %s\n", nameIReg64orZR(nn));
7051 return True;
7055 /* -------------------- CB{N}Z -------------------- */
7056 /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
7057 sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2))
7059 if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
7060 Bool is64 = INSN(31,31) == 1;
7061 Bool bIfZ = INSN(24,24) == 0;
7062 ULong uimm64 = INSN(23,5) << 2;
7063 UInt rT = INSN(4,0);
7064 Long simm64 = (Long)sx_to_64(uimm64, 21);
7065 IRExpr* cond = NULL;
7066 if (is64) {
7067 cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
7068 getIReg64orZR(rT), mkU64(0));
7069 } else {
7070 cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
7071 getIReg32orZR(rT), mkU32(0));
7073 stmt( IRStmt_Exit(cond,
7074 Ijk_Boring,
7075 IRConst_U64(guest_PC_curr_instr + simm64),
7076 OFFB_PC) );
7077 putPC(mkU64(guest_PC_curr_instr + 4));
7078 dres->whatNext = Dis_StopHere;
7079 dres->jk_StopHere = Ijk_Boring;
7080 DIP("cb%sz %s, 0x%llx\n",
7081 bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
7082 guest_PC_curr_instr + simm64);
7083 return True;
7086 /* -------------------- TB{N}Z -------------------- */
7087 /* 31 30 24 23 18 5 4
7088 b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
7089 b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
7091 if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
7092 UInt b5 = INSN(31,31);
7093 Bool bIfZ = INSN(24,24) == 0;
7094 UInt b40 = INSN(23,19);
7095 UInt imm14 = INSN(18,5);
7096 UInt tt = INSN(4,0);
7097 UInt bitNo = (b5 << 5) | b40;
7098 ULong uimm64 = imm14 << 2;
7099 Long simm64 = sx_to_64(uimm64, 16);
7100 IRExpr* cond
7101 = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
7102 binop(Iop_And64,
7103 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
7104 mkU64(1)),
7105 mkU64(0));
7106 stmt( IRStmt_Exit(cond,
7107 Ijk_Boring,
7108 IRConst_U64(guest_PC_curr_instr + simm64),
7109 OFFB_PC) );
7110 putPC(mkU64(guest_PC_curr_instr + 4));
7111 dres->whatNext = Dis_StopHere;
7112 dres->jk_StopHere = Ijk_Boring;
7113 DIP("tb%sz %s, #%u, 0x%llx\n",
7114 bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
7115 guest_PC_curr_instr + simm64);
7116 return True;
7119 /* -------------------- SVC -------------------- */
7120 /* 11010100 000 imm16 000 01
7121 Don't bother with anything except the imm16==0 case.
7123 if (INSN(31,0) == 0xD4000001) {
7124 putPC(mkU64(guest_PC_curr_instr + 4));
7125 dres->whatNext = Dis_StopHere;
7126 dres->jk_StopHere = Ijk_Sys_syscall;
7127 DIP("svc #0\n");
7128 return True;
7131 /* ------------------ M{SR,RS} ------------------ */
7132 /* ---- Cases for TPIDR_EL0 ----
7133 0xD51BD0 010 Rt MSR tpidr_el0, rT
7134 0xD53BD0 010 Rt MRS rT, tpidr_el0
7136 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
7137 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
7138 Bool toSys = INSN(21,21) == 0;
7139 UInt tt = INSN(4,0);
7140 if (toSys) {
7141 stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
7142 DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
7143 } else {
7144 putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
7145 DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
7147 return True;
7149 /* ---- Cases for FPCR ----
7150 0xD51B44 000 Rt MSR fpcr, rT
7151 0xD53B44 000 Rt MSR rT, fpcr
7153 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
7154 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
7155 Bool toSys = INSN(21,21) == 0;
7156 UInt tt = INSN(4,0);
7157 if (toSys) {
7158 stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
7159 DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
7160 } else {
7161 putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
7162 DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
7164 return True;
7166 /* ---- Cases for FPSR ----
7167 0xD51B44 001 Rt MSR fpsr, rT
7168 0xD53B44 001 Rt MSR rT, fpsr
7169 The only part of this we model is FPSR.QC. All other bits
7170 are ignored when writing to it and RAZ when reading from it.
7172 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
7173 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
7174 Bool toSys = INSN(21,21) == 0;
7175 UInt tt = INSN(4,0);
7176 if (toSys) {
7177 /* Just deal with FPSR.QC. Make up a V128 value which is
7178 zero if Xt[27] is zero and any other value if Xt[27] is
7179 nonzero. */
7180 IRTemp qc64 = newTemp(Ity_I64);
7181 assign(qc64, binop(Iop_And64,
7182 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(27)),
7183 mkU64(1)));
7184 IRExpr* qcV128 = binop(Iop_64HLtoV128, mkexpr(qc64), mkexpr(qc64));
7185 stmt( IRStmt_Put( OFFB_QCFLAG, qcV128 ) );
7186 DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
7187 } else {
7188 /* Generate a value which is all zeroes except for bit 27,
7189 which must be zero if QCFLAG is all zeroes and one otherwise. */
7190 IRTemp qcV128 = newTempV128();
7191 assign(qcV128, IRExpr_Get( OFFB_QCFLAG, Ity_V128 ));
7192 IRTemp qc64 = newTemp(Ity_I64);
7193 assign(qc64, binop(Iop_Or64, unop(Iop_V128HIto64, mkexpr(qcV128)),
7194 unop(Iop_V128to64, mkexpr(qcV128))));
7195 IRExpr* res = binop(Iop_Shl64,
7196 unop(Iop_1Uto64,
7197 binop(Iop_CmpNE64, mkexpr(qc64), mkU64(0))),
7198 mkU8(27));
7199 putIReg64orZR(tt, res);
7200 DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
7202 return True;
7204 /* ---- Cases for NZCV ----
7205 D51B42 000 Rt MSR nzcv, rT
7206 D53B42 000 Rt MRS rT, nzcv
7207 The only parts of NZCV that actually exist are bits 31:28, which
7208 are the N Z C and V bits themselves. Hence the flags thunk provides
7209 all the state we need.
7211 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
7212 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
7213 Bool toSys = INSN(21,21) == 0;
7214 UInt tt = INSN(4,0);
7215 if (toSys) {
7216 IRTemp t = newTemp(Ity_I64);
7217 assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
7218 setFlags_COPY(t);
7219 DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
7220 } else {
7221 IRTemp res = newTemp(Ity_I64);
7222 assign(res, mk_arm64g_calculate_flags_nzcv());
7223 putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
7224 DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
7226 return True;
7228 /* ---- Cases for DCZID_EL0 ----
7229 Don't support arbitrary reads and writes to this register. Just
7230 return the value 16, which indicates that the DC ZVA instruction
7231 is not permitted, so we don't have to emulate it.
7232 D5 3B 00 111 Rt MRS rT, dczid_el0
7234 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
7235 UInt tt = INSN(4,0);
7236 putIReg64orZR(tt, mkU64(1<<4));
7237 DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt));
7238 return True;
7240 /* ---- Cases for CTR_EL0 ----
7241 We just handle reads, and make up a value from the D and I line
7242 sizes in the VexArchInfo we are given, and patch in the following
7243 fields that the Foundation model gives ("natively"):
7244 CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
7245 D5 3B 00 001 Rt MRS rT, dczid_el0
7247 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
7248 UInt tt = INSN(4,0);
7249 /* Need to generate a value from dMinLine_lg2_szB and
7250 dMinLine_lg2_szB. The value in the register is in 32-bit
7251 units, so need to subtract 2 from the values in the
7252 VexArchInfo. We can assume that the values here are valid --
7253 disInstr_ARM64 checks them -- so there's no need to deal with
7254 out-of-range cases. */
7255 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
7256 && archinfo->arm64_dMinLine_lg2_szB <= 17
7257 && archinfo->arm64_iMinLine_lg2_szB >= 2
7258 && archinfo->arm64_iMinLine_lg2_szB <= 17);
7259 UInt val
7260 = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16)
7261 | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0);
7262 putIReg64orZR(tt, mkU64(val));
7263 DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt));
7264 return True;
7266 /* ---- Cases for CNTVCT_EL0 ----
7267 This is a timestamp counter of some sort. Support reads of it only
7268 by passing through to the host.
7269 D5 3B E0 010 Rt MRS Xt, cntvct_el0
7271 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE040) {
7272 UInt tt = INSN(4,0);
7273 IRTemp val = newTemp(Ity_I64);
7274 IRExpr** args = mkIRExprVec_0();
7275 IRDirty* d = unsafeIRDirty_1_N (
7276 val,
7277 0/*regparms*/,
7278 "arm64g_dirtyhelper_MRS_CNTVCT_EL0",
7279 &arm64g_dirtyhelper_MRS_CNTVCT_EL0,
7280 args
7282 /* execute the dirty call, dumping the result in val. */
7283 stmt( IRStmt_Dirty(d) );
7284 putIReg64orZR(tt, mkexpr(val));
7285 DIP("mrs %s, cntvct_el0\n", nameIReg64orZR(tt));
7286 return True;
7288 /* ---- Cases for CNTFRQ_EL0 ----
7289 This is always RO at EL0, so it's safe to pass through to the host.
7290 D5 3B E0 000 Rt MRS Xt, cntfrq_el0
7292 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE000) {
7293 UInt tt = INSN(4,0);
7294 IRTemp val = newTemp(Ity_I64);
7295 IRExpr** args = mkIRExprVec_0();
7296 IRDirty* d = unsafeIRDirty_1_N (
7297 val,
7298 0/*regparms*/,
7299 "arm64g_dirtyhelper_MRS_CNTFRQ_EL0",
7300 &arm64g_dirtyhelper_MRS_CNTFRQ_EL0,
7301 args
7303 /* execute the dirty call, dumping the result in val. */
7304 stmt( IRStmt_Dirty(d) );
7305 putIReg64orZR(tt, mkexpr(val));
7306 DIP("mrs %s, cntfrq_el0\n", nameIReg64orZR(tt));
7307 return True;
7310 /* ------------------ IC_IVAU ------------------ */
7311 /* D5 0B 75 001 Rt ic ivau, rT
7313 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
7314 /* We will always be provided with a valid iMinLine value. */
7315 vassert(archinfo->arm64_iMinLine_lg2_szB >= 2
7316 && archinfo->arm64_iMinLine_lg2_szB <= 17);
7317 /* Round the requested address, in rT, down to the start of the
7318 containing block. */
7319 UInt tt = INSN(4,0);
7320 ULong lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB;
7321 IRTemp addr = newTemp(Ity_I64);
7322 assign( addr, binop( Iop_And64,
7323 getIReg64orZR(tt),
7324 mkU64(~(lineszB - 1))) );
7325 /* Set the invalidation range, request exit-and-invalidate, with
7326 continuation at the next instruction. */
7327 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
7328 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
7329 /* be paranoid ... */
7330 stmt( IRStmt_MBE(Imbe_Fence) );
7331 putPC(mkU64( guest_PC_curr_instr + 4 ));
7332 dres->whatNext = Dis_StopHere;
7333 dres->jk_StopHere = Ijk_InvalICache;
7334 DIP("ic ivau, %s\n", nameIReg64orZR(tt));
7335 return True;
7338 /* ------------------ DC_CVAU ------------------ */
7339 /* D5 0B 7B 001 Rt dc cvau, rT
7340 D5 0B 7E 001 Rt dc civac, rT
7342 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20
7343 || (INSN(31,0) & 0xFFFFFFE0) == 0xD50B7E20) {
7344 /* Exactly the same scheme as for IC IVAU, except we observe the
7345 dMinLine size, and request an Ijk_FlushDCache instead of
7346 Ijk_InvalICache. */
7347 /* We will always be provided with a valid dMinLine value. */
7348 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
7349 && archinfo->arm64_dMinLine_lg2_szB <= 17);
7350 /* Round the requested address, in rT, down to the start of the
7351 containing block. */
7352 UInt tt = INSN(4,0);
7353 ULong lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB;
7354 IRTemp addr = newTemp(Ity_I64);
7355 assign( addr, binop( Iop_And64,
7356 getIReg64orZR(tt),
7357 mkU64(~(lineszB - 1))) );
7358 /* Set the flush range, request exit-and-flush, with
7359 continuation at the next instruction. */
7360 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
7361 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
7362 /* be paranoid ... */
7363 stmt( IRStmt_MBE(Imbe_Fence) );
7364 putPC(mkU64( guest_PC_curr_instr + 4 ));
7365 dres->whatNext = Dis_StopHere;
7366 dres->jk_StopHere = Ijk_FlushDCache;
7367 DIP("dc cvau, %s\n", nameIReg64orZR(tt));
7368 return True;
7371 /* ------------------ ISB, DMB, DSB ------------------ */
7372 /* 31 21 11 7 6 4
7373 11010 10100 0 00 011 0011 CRm 1 01 11111 DMB opt
7374 11010 10100 0 00 011 0011 CRm 1 00 11111 DSB opt
7375 11010 10100 0 00 011 0011 CRm 1 10 11111 ISB opt
7377 if (INSN(31,22) == BITS10(1,1,0,1,0,1,0,1,0,0)
7378 && INSN(21,12) == BITS10(0,0,0,0,1,1,0,0,1,1)
7379 && INSN(7,7) == 1
7380 && INSN(6,5) <= BITS2(1,0) && INSN(4,0) == BITS5(1,1,1,1,1)) {
7381 UInt opc = INSN(6,5);
7382 UInt CRm = INSN(11,8);
7383 vassert(opc <= 2 && CRm <= 15);
7384 stmt(IRStmt_MBE(Imbe_Fence));
7385 const HChar* opNames[3]
7386 = { "dsb", "dmb", "isb" };
7387 const HChar* howNames[16]
7388 = { "#0", "oshld", "oshst", "osh", "#4", "nshld", "nshst", "nsh",
7389 "#8", "ishld", "ishst", "ish", "#12", "ld", "st", "sy" };
7390 DIP("%s %s\n", opNames[opc], howNames[CRm]);
7391 return True;
7394 /* -------------------- NOP -------------------- */
7395 if (INSN(31,0) == 0xD503201F) {
7396 DIP("nop\n");
7397 return True;
7400 /* -------------------- BRK -------------------- */
7401 /* 31 23 20 4
7402 1101 0100 001 imm16 00000 BRK #imm16
7404 if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,0)
7405 && INSN(23,21) == BITS3(0,0,1) && INSN(4,0) == BITS5(0,0,0,0,0)) {
7406 UInt imm16 = INSN(20,5);
7407 /* Request SIGTRAP and then restart of this insn. */
7408 putPC(mkU64(guest_PC_curr_instr + 0));
7409 dres->whatNext = Dis_StopHere;
7410 dres->jk_StopHere = Ijk_SigTRAP;
7411 DIP("brk #%u\n", imm16);
7412 return True;
7415 /* ------------------- YIELD ------------------- */
7416 /* 31 23 15 7
7417 1101 0101 0000 0011 0010 0000 0011 1111
7419 if (INSN(31,0) == 0xD503203F) {
7420 /* Request yield followed by continuation at the next insn. */
7421 putPC(mkU64(guest_PC_curr_instr + 4));
7422 dres->whatNext = Dis_StopHere;
7423 dres->jk_StopHere = Ijk_Yield;
7424 DIP("yield\n");
7425 return True;
7428 /* -------------------- HINT ------------------- */
7429 /* 31 23 15 11 4 3
7430 1101 0101 0000 0011 0010 imm7 1 1111
7431 Catch otherwise unhandled HINT instructions - any
7432 like YIELD which are explicitly handled should go
7433 above this case.
7435 if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,1)
7436 && INSN(23,16) == BITS8(0,0,0,0,0,0,1,1)
7437 && INSN(15,12) == BITS4(0,0,1,0)
7438 && INSN(4,0) == BITS5(1,1,1,1,1)) {
7439 UInt imm7 = INSN(11,5);
7440 DIP("hint #%u\n", imm7);
7441 return True;
7444 /* ------------------- CLREX ------------------ */
7445 /* 31 23 15 11 7
7446 1101 0101 0000 0011 0011 m 0101 1111 CLREX CRm
7447 CRm is apparently ignored.
7449 if ((INSN(31,0) & 0xFFFFF0FF) == 0xD503305F) {
7450 UInt mm = INSN(11,8);
7451 /* AFAICS, this simply cancels a (all?) reservations made by a
7452 (any?) preceding LDREX(es). Arrange to hand it through to
7453 the back end. */
7454 if (abiinfo->guest__use_fallback_LLSC) {
7455 stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(0) )); // "no transaction"
7456 } else {
7457 stmt( IRStmt_MBE(Imbe_CancelReservation) );
7459 DIP("clrex #%u\n", mm);
7460 return True;
7463 if (sigill_diag) {
7464 vex_printf("ARM64 front end: branch_etc\n");
7466 return False;
7467 # undef INSN
7471 /*------------------------------------------------------------*/
7472 /*--- SIMD and FP instructions: helper functions ---*/
7473 /*------------------------------------------------------------*/
7475 /* Some constructors for interleave/deinterleave expressions. */
7477 static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) {
7478 // returns a0 b0
7479 return binop(Iop_InterleaveLO64x2, mkexpr(a10), mkexpr(b10));
7482 static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) {
7483 // returns a1 b1
7484 return binop(Iop_InterleaveHI64x2, mkexpr(a10), mkexpr(b10));
7487 static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
7488 // returns a2 a0 b2 b0
7489 return binop(Iop_CatEvenLanes32x4, mkexpr(a3210), mkexpr(b3210));
7492 static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
7493 // returns a3 a1 b3 b1
7494 return binop(Iop_CatOddLanes32x4, mkexpr(a3210), mkexpr(b3210));
7497 static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) {
7498 // returns a1 b1 a0 b0
7499 return binop(Iop_InterleaveLO32x4, mkexpr(a3210), mkexpr(b3210));
7502 static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) {
7503 // returns a3 b3 a2 b2
7504 return binop(Iop_InterleaveHI32x4, mkexpr(a3210), mkexpr(b3210));
7507 static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7508 // returns a6 a4 a2 a0 b6 b4 b2 b0
7509 return binop(Iop_CatEvenLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
7512 static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7513 // returns a7 a5 a3 a1 b7 b5 b3 b1
7514 return binop(Iop_CatOddLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
7517 static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7518 // returns a3 b3 a2 b2 a1 b1 a0 b0
7519 return binop(Iop_InterleaveLO16x8, mkexpr(a76543210), mkexpr(b76543210));
7522 static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7523 // returns a7 b7 a6 b6 a5 b5 a4 b4
7524 return binop(Iop_InterleaveHI16x8, mkexpr(a76543210), mkexpr(b76543210));
7527 static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210,
7528 IRTemp bFEDCBA9876543210 ) {
7529 // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
7530 return binop(Iop_CatEvenLanes8x16, mkexpr(aFEDCBA9876543210),
7531 mkexpr(bFEDCBA9876543210));
7534 static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210,
7535 IRTemp bFEDCBA9876543210 ) {
7536 // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
7537 return binop(Iop_CatOddLanes8x16, mkexpr(aFEDCBA9876543210),
7538 mkexpr(bFEDCBA9876543210));
7541 static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210,
7542 IRTemp bFEDCBA9876543210 ) {
7543 // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
7544 return binop(Iop_InterleaveLO8x16, mkexpr(aFEDCBA9876543210),
7545 mkexpr(bFEDCBA9876543210));
7548 static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210,
7549 IRTemp bFEDCBA9876543210 ) {
7550 // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
7551 return binop(Iop_InterleaveHI8x16, mkexpr(aFEDCBA9876543210),
7552 mkexpr(bFEDCBA9876543210));
7555 /* Generate N copies of |bit| in the bottom of a ULong. */
7556 static ULong Replicate ( ULong bit, Int N )
7558 vassert(bit <= 1 && N >= 1 && N < 64);
7559 if (bit == 0) {
7560 return 0;
7561 } else {
7562 /* Careful. This won't work for N == 64. */
7563 return (1ULL << N) - 1;
7567 static ULong Replicate32x2 ( ULong bits32 )
7569 vassert(0 == (bits32 & ~0xFFFFFFFFULL));
7570 return (bits32 << 32) | bits32;
7573 static ULong Replicate16x4 ( ULong bits16 )
7575 vassert(0 == (bits16 & ~0xFFFFULL));
7576 return Replicate32x2((bits16 << 16) | bits16);
7579 static ULong Replicate8x8 ( ULong bits8 )
7581 vassert(0 == (bits8 & ~0xFFULL));
7582 return Replicate16x4((bits8 << 8) | bits8);
7585 /* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
7586 |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
7587 is 64. In the former case, the upper 32 bits of the returned value
7588 are guaranteed to be zero. */
7589 static ULong VFPExpandImm ( ULong imm8, Int N )
7591 vassert(imm8 <= 0xFF);
7592 vassert(N == 32 || N == 64);
7593 Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
7594 Int F = N - E - 1;
7595 ULong imm8_6 = (imm8 >> 6) & 1;
7596 /* sign: 1 bit */
7597 /* exp: E bits */
7598 /* frac: F bits */
7599 ULong sign = (imm8 >> 7) & 1;
7600 ULong exp = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1);
7601 ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6);
7602 vassert(sign < (1ULL << 1));
7603 vassert(exp < (1ULL << E));
7604 vassert(frac < (1ULL << F));
7605 vassert(1 + E + F == N);
7606 ULong res = (sign << (E+F)) | (exp << F) | frac;
7607 return res;
7610 /* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
7611 This might fail, as indicated by the returned Bool. Page 2530 of
7612 the manual. */
7613 static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res,
7614 UInt op, UInt cmode, UInt imm8 )
7616 vassert(op <= 1);
7617 vassert(cmode <= 15);
7618 vassert(imm8 <= 255);
7620 *res = 0; /* will overwrite iff returning True */
7622 ULong imm64 = 0;
7623 Bool testimm8 = False;
7625 switch (cmode >> 1) {
7626 case 0:
7627 testimm8 = False; imm64 = Replicate32x2(imm8); break;
7628 case 1:
7629 testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break;
7630 case 2:
7631 testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break;
7632 case 3:
7633 testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break;
7634 case 4:
7635 testimm8 = False; imm64 = Replicate16x4(imm8); break;
7636 case 5:
7637 testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break;
7638 case 6:
7639 testimm8 = True;
7640 if ((cmode & 1) == 0)
7641 imm64 = Replicate32x2((imm8 << 8) | 0xFF);
7642 else
7643 imm64 = Replicate32x2((imm8 << 16) | 0xFFFF);
7644 break;
7645 case 7:
7646 testimm8 = False;
7647 if ((cmode & 1) == 0 && op == 0)
7648 imm64 = Replicate8x8(imm8);
7649 if ((cmode & 1) == 0 && op == 1) {
7650 imm64 = 0; imm64 |= (imm8 & 0x80) ? 0xFF : 0x00;
7651 imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00;
7652 imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00;
7653 imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00;
7654 imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00;
7655 imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00;
7656 imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00;
7657 imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00;
7659 if ((cmode & 1) == 1 && op == 0) {
7660 ULong imm8_7 = (imm8 >> 7) & 1;
7661 ULong imm8_6 = (imm8 >> 6) & 1;
7662 ULong imm8_50 = imm8 & 63;
7663 ULong imm32 = (imm8_7 << (1 + 5 + 6 + 19))
7664 | ((imm8_6 ^ 1) << (5 + 6 + 19))
7665 | (Replicate(imm8_6, 5) << (6 + 19))
7666 | (imm8_50 << 19);
7667 imm64 = Replicate32x2(imm32);
7669 if ((cmode & 1) == 1 && op == 1) {
7670 // imm64 = imm8<7>:NOT(imm8<6>)
7671 // :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
7672 ULong imm8_7 = (imm8 >> 7) & 1;
7673 ULong imm8_6 = (imm8 >> 6) & 1;
7674 ULong imm8_50 = imm8 & 63;
7675 imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62)
7676 | (Replicate(imm8_6, 8) << 54)
7677 | (imm8_50 << 48);
7679 break;
7680 default:
7681 vassert(0);
7684 if (testimm8 && imm8 == 0)
7685 return False;
7687 *res = imm64;
7688 return True;
7691 /* Help a bit for decoding laneage for vector operations that can be
7692 of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
7693 and SZ bits, typically for vector floating point. */
7694 static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI, /*OUT*/IRType* tyF,
7695 /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper,
7696 /*OUT*/const HChar** arrSpec,
7697 Bool bitQ, Bool bitSZ )
7699 vassert(bitQ == True || bitQ == False);
7700 vassert(bitSZ == True || bitSZ == False);
7701 if (bitQ && bitSZ) { // 2x64
7702 if (tyI) *tyI = Ity_I64;
7703 if (tyF) *tyF = Ity_F64;
7704 if (nLanes) *nLanes = 2;
7705 if (zeroUpper) *zeroUpper = False;
7706 if (arrSpec) *arrSpec = "2d";
7707 return True;
7709 if (bitQ && !bitSZ) { // 4x32
7710 if (tyI) *tyI = Ity_I32;
7711 if (tyF) *tyF = Ity_F32;
7712 if (nLanes) *nLanes = 4;
7713 if (zeroUpper) *zeroUpper = False;
7714 if (arrSpec) *arrSpec = "4s";
7715 return True;
7717 if (!bitQ && !bitSZ) { // 2x32
7718 if (tyI) *tyI = Ity_I32;
7719 if (tyF) *tyF = Ity_F32;
7720 if (nLanes) *nLanes = 2;
7721 if (zeroUpper) *zeroUpper = True;
7722 if (arrSpec) *arrSpec = "2s";
7723 return True;
7725 // Else impliedly 1x64, which isn't allowed.
7726 return False;
7729 /* Helper for decoding laneage for shift-style vector operations
7730 that involve an immediate shift amount. */
7731 static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2,
7732 UInt immh, UInt immb )
7734 vassert(immh < (1<<4));
7735 vassert(immb < (1<<3));
7736 UInt immhb = (immh << 3) | immb;
7737 if (immh & 8) {
7738 if (shift) *shift = 128 - immhb;
7739 if (szBlg2) *szBlg2 = 3;
7740 return True;
7742 if (immh & 4) {
7743 if (shift) *shift = 64 - immhb;
7744 if (szBlg2) *szBlg2 = 2;
7745 return True;
7747 if (immh & 2) {
7748 if (shift) *shift = 32 - immhb;
7749 if (szBlg2) *szBlg2 = 1;
7750 return True;
7752 if (immh & 1) {
7753 if (shift) *shift = 16 - immhb;
7754 if (szBlg2) *szBlg2 = 0;
7755 return True;
7757 return False;
7760 /* Generate IR to fold all lanes of the V128 value in 'src' as
7761 characterised by the operator 'op', and return the result in the
7762 bottom bits of a V128, with all other bits set to zero. */
7763 static IRTemp math_FOLDV ( IRTemp src, IROp op )
7765 /* The basic idea is to use repeated applications of Iop_CatEven*
7766 and Iop_CatOdd* operators to 'src' so as to clone each lane into
7767 a complete vector. Then fold all those vectors with 'op' and
7768 zero out all but the least significant lane. */
7769 switch (op) {
7770 case Iop_Min8Sx16: case Iop_Min8Ux16:
7771 case Iop_Max8Sx16: case Iop_Max8Ux16: case Iop_Add8x16: {
7772 /* NB: temp naming here is misleading -- the naming is for 8
7773 lanes of 16 bit, whereas what is being operated on is 16
7774 lanes of 8 bits. */
7775 IRTemp x76543210 = src;
7776 IRTemp x76547654 = newTempV128();
7777 IRTemp x32103210 = newTempV128();
7778 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
7779 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
7780 IRTemp x76767676 = newTempV128();
7781 IRTemp x54545454 = newTempV128();
7782 IRTemp x32323232 = newTempV128();
7783 IRTemp x10101010 = newTempV128();
7784 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
7785 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
7786 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
7787 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
7788 IRTemp x77777777 = newTempV128();
7789 IRTemp x66666666 = newTempV128();
7790 IRTemp x55555555 = newTempV128();
7791 IRTemp x44444444 = newTempV128();
7792 IRTemp x33333333 = newTempV128();
7793 IRTemp x22222222 = newTempV128();
7794 IRTemp x11111111 = newTempV128();
7795 IRTemp x00000000 = newTempV128();
7796 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
7797 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
7798 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
7799 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
7800 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
7801 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
7802 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
7803 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
7804 /* Naming not misleading after here. */
7805 IRTemp xAllF = newTempV128();
7806 IRTemp xAllE = newTempV128();
7807 IRTemp xAllD = newTempV128();
7808 IRTemp xAllC = newTempV128();
7809 IRTemp xAllB = newTempV128();
7810 IRTemp xAllA = newTempV128();
7811 IRTemp xAll9 = newTempV128();
7812 IRTemp xAll8 = newTempV128();
7813 IRTemp xAll7 = newTempV128();
7814 IRTemp xAll6 = newTempV128();
7815 IRTemp xAll5 = newTempV128();
7816 IRTemp xAll4 = newTempV128();
7817 IRTemp xAll3 = newTempV128();
7818 IRTemp xAll2 = newTempV128();
7819 IRTemp xAll1 = newTempV128();
7820 IRTemp xAll0 = newTempV128();
7821 assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777));
7822 assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777));
7823 assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666));
7824 assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666));
7825 assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555));
7826 assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555));
7827 assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444));
7828 assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444));
7829 assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333));
7830 assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333));
7831 assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222));
7832 assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222));
7833 assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111));
7834 assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111));
7835 assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000));
7836 assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000));
7837 IRTemp maxFE = newTempV128();
7838 IRTemp maxDC = newTempV128();
7839 IRTemp maxBA = newTempV128();
7840 IRTemp max98 = newTempV128();
7841 IRTemp max76 = newTempV128();
7842 IRTemp max54 = newTempV128();
7843 IRTemp max32 = newTempV128();
7844 IRTemp max10 = newTempV128();
7845 assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE)));
7846 assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC)));
7847 assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA)));
7848 assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8)));
7849 assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6)));
7850 assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4)));
7851 assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2)));
7852 assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0)));
7853 IRTemp maxFEDC = newTempV128();
7854 IRTemp maxBA98 = newTempV128();
7855 IRTemp max7654 = newTempV128();
7856 IRTemp max3210 = newTempV128();
7857 assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC)));
7858 assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98)));
7859 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
7860 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7861 IRTemp maxFEDCBA98 = newTempV128();
7862 IRTemp max76543210 = newTempV128();
7863 assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98)));
7864 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
7865 IRTemp maxAllLanes = newTempV128();
7866 assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98),
7867 mkexpr(max76543210)));
7868 IRTemp res = newTempV128();
7869 assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes)));
7870 return res;
7872 case Iop_Min16Sx8: case Iop_Min16Ux8:
7873 case Iop_Max16Sx8: case Iop_Max16Ux8: case Iop_Add16x8: {
7874 IRTemp x76543210 = src;
7875 IRTemp x76547654 = newTempV128();
7876 IRTemp x32103210 = newTempV128();
7877 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
7878 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
7879 IRTemp x76767676 = newTempV128();
7880 IRTemp x54545454 = newTempV128();
7881 IRTemp x32323232 = newTempV128();
7882 IRTemp x10101010 = newTempV128();
7883 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
7884 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
7885 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
7886 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
7887 IRTemp x77777777 = newTempV128();
7888 IRTemp x66666666 = newTempV128();
7889 IRTemp x55555555 = newTempV128();
7890 IRTemp x44444444 = newTempV128();
7891 IRTemp x33333333 = newTempV128();
7892 IRTemp x22222222 = newTempV128();
7893 IRTemp x11111111 = newTempV128();
7894 IRTemp x00000000 = newTempV128();
7895 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
7896 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
7897 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
7898 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
7899 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
7900 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
7901 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
7902 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
7903 IRTemp max76 = newTempV128();
7904 IRTemp max54 = newTempV128();
7905 IRTemp max32 = newTempV128();
7906 IRTemp max10 = newTempV128();
7907 assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
7908 assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
7909 assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
7910 assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
7911 IRTemp max7654 = newTempV128();
7912 IRTemp max3210 = newTempV128();
7913 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
7914 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7915 IRTemp max76543210 = newTempV128();
7916 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
7917 IRTemp res = newTempV128();
7918 assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
7919 return res;
7921 case Iop_Max32Fx4: case Iop_Min32Fx4:
7922 case Iop_Min32Sx4: case Iop_Min32Ux4:
7923 case Iop_Max32Sx4: case Iop_Max32Ux4: case Iop_Add32x4: {
7924 IRTemp x3210 = src;
7925 IRTemp x3232 = newTempV128();
7926 IRTemp x1010 = newTempV128();
7927 assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
7928 assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
7929 IRTemp x3333 = newTempV128();
7930 IRTemp x2222 = newTempV128();
7931 IRTemp x1111 = newTempV128();
7932 IRTemp x0000 = newTempV128();
7933 assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
7934 assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
7935 assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
7936 assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
7937 IRTemp max32 = newTempV128();
7938 IRTemp max10 = newTempV128();
7939 assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
7940 assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
7941 IRTemp max3210 = newTempV128();
7942 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7943 IRTemp res = newTempV128();
7944 assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
7945 return res;
7947 case Iop_Add64x2: {
7948 IRTemp x10 = src;
7949 IRTemp x00 = newTempV128();
7950 IRTemp x11 = newTempV128();
7951 assign(x11, binop(Iop_InterleaveHI64x2, mkexpr(x10), mkexpr(x10)));
7952 assign(x00, binop(Iop_InterleaveLO64x2, mkexpr(x10), mkexpr(x10)));
7953 IRTemp max10 = newTempV128();
7954 assign(max10, binop(op, mkexpr(x11), mkexpr(x00)));
7955 IRTemp res = newTempV128();
7956 assign(res, unop(Iop_ZeroHI64ofV128, mkexpr(max10)));
7957 return res;
7959 default:
7960 vassert(0);
7965 /* Generate IR for TBL and TBX. This deals with the 128 bit case
7966 only. */
7967 static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
7968 IRTemp oor_values )
7970 vassert(len >= 0 && len <= 3);
7972 /* Generate some useful constants as concisely as possible. */
7973 IRTemp half15 = newTemp(Ity_I64);
7974 assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
7975 IRTemp half16 = newTemp(Ity_I64);
7976 assign(half16, mkU64(0x1010101010101010ULL));
7978 /* A zero vector */
7979 IRTemp allZero = newTempV128();
7980 assign(allZero, mkV128(0x0000));
7981 /* A vector containing 15 in each 8-bit lane */
7982 IRTemp all15 = newTempV128();
7983 assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
7984 /* A vector containing 16 in each 8-bit lane */
7985 IRTemp all16 = newTempV128();
7986 assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
7987 /* A vector containing 32 in each 8-bit lane */
7988 IRTemp all32 = newTempV128();
7989 assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
7990 /* A vector containing 48 in each 8-bit lane */
7991 IRTemp all48 = newTempV128();
7992 assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
7993 /* A vector containing 64 in each 8-bit lane */
7994 IRTemp all64 = newTempV128();
7995 assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
7997 /* Group the 16/32/48/64 vectors so as to be indexable. */
7998 IRTemp allXX[4] = { all16, all32, all48, all64 };
8000 /* Compute the result for each table vector, with zeroes in places
8001 where the index values are out of range, and OR them into the
8002 running vector. */
8003 IRTemp running_result = newTempV128();
8004 assign(running_result, mkV128(0));
8006 UInt tabent;
8007 for (tabent = 0; tabent <= len; tabent++) {
8008 vassert(tabent >= 0 && tabent < 4);
8009 IRTemp bias = newTempV128();
8010 assign(bias,
8011 mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
8012 IRTemp biased_indices = newTempV128();
8013 assign(biased_indices,
8014 binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
8015 IRTemp valid_mask = newTempV128();
8016 assign(valid_mask,
8017 binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
8018 IRTemp safe_biased_indices = newTempV128();
8019 assign(safe_biased_indices,
8020 binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
8021 IRTemp results_or_junk = newTempV128();
8022 assign(results_or_junk,
8023 binop(Iop_Perm8x16, mkexpr(tab[tabent]),
8024 mkexpr(safe_biased_indices)));
8025 IRTemp results_or_zero = newTempV128();
8026 assign(results_or_zero,
8027 binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
8028 /* And OR that into the running result. */
8029 IRTemp tmp = newTempV128();
8030 assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
8031 mkexpr(running_result)));
8032 running_result = tmp;
8035 /* So now running_result holds the overall result where the indices
8036 are in range, and zero in out-of-range lanes. Now we need to
8037 compute an overall validity mask and use this to copy in the
8038 lanes in the oor_values for out of range indices. This is
8039 unnecessary for TBL but will get folded out by iropt, so we lean
8040 on that and generate the same code for TBL and TBX here. */
8041 IRTemp overall_valid_mask = newTempV128();
8042 assign(overall_valid_mask,
8043 binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
8044 IRTemp result = newTempV128();
8045 assign(result,
8046 binop(Iop_OrV128,
8047 mkexpr(running_result),
8048 binop(Iop_AndV128,
8049 mkexpr(oor_values),
8050 unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
8051 return result;
8055 /* Let |argL| and |argR| be V128 values, and let |opI64x2toV128| be
8056 an op which takes two I64s and produces a V128. That is, a widening
8057 operator. Generate IR which applies |opI64x2toV128| to either the
8058 lower (if |is2| is False) or upper (if |is2| is True) halves of
8059 |argL| and |argR|, and return the value in a new IRTemp.
8061 static
8062 IRTemp math_BINARY_WIDENING_V128 ( Bool is2, IROp opI64x2toV128,
8063 IRExpr* argL, IRExpr* argR )
8065 IRTemp res = newTempV128();
8066 IROp slice = is2 ? Iop_V128HIto64 : Iop_V128to64;
8067 assign(res, binop(opI64x2toV128, unop(slice, argL),
8068 unop(slice, argR)));
8069 return res;
8073 /* Generate signed/unsigned absolute difference vector IR. */
8074 static
8075 IRTemp math_ABD ( Bool isU, UInt size, IRExpr* argLE, IRExpr* argRE )
8077 vassert(size <= 3);
8078 IRTemp argL = newTempV128();
8079 IRTemp argR = newTempV128();
8080 IRTemp msk = newTempV128();
8081 IRTemp res = newTempV128();
8082 assign(argL, argLE);
8083 assign(argR, argRE);
8084 assign(msk, binop(isU ? mkVecCMPGTU(size) : mkVecCMPGTS(size),
8085 mkexpr(argL), mkexpr(argR)));
8086 assign(res,
8087 binop(Iop_OrV128,
8088 binop(Iop_AndV128,
8089 binop(mkVecSUB(size), mkexpr(argL), mkexpr(argR)),
8090 mkexpr(msk)),
8091 binop(Iop_AndV128,
8092 binop(mkVecSUB(size), mkexpr(argR), mkexpr(argL)),
8093 unop(Iop_NotV128, mkexpr(msk)))));
8094 return res;
8098 /* Generate IR that takes a V128 and sign- or zero-widens
8099 either the lower or upper set of lanes to twice-as-wide,
8100 resulting in a new V128 value. */
8101 static
8102 IRTemp math_WIDEN_LO_OR_HI_LANES ( Bool zWiden, Bool fromUpperHalf,
8103 UInt sizeNarrow, IRExpr* srcE )
8105 IRTemp src = newTempV128();
8106 IRTemp res = newTempV128();
8107 assign(src, srcE);
8108 switch (sizeNarrow) {
8109 case X10:
8110 assign(res,
8111 binop(zWiden ? Iop_ShrN64x2 : Iop_SarN64x2,
8112 binop(fromUpperHalf ? Iop_InterleaveHI32x4
8113 : Iop_InterleaveLO32x4,
8114 mkexpr(src),
8115 mkexpr(src)),
8116 mkU8(32)));
8117 break;
8118 case X01:
8119 assign(res,
8120 binop(zWiden ? Iop_ShrN32x4 : Iop_SarN32x4,
8121 binop(fromUpperHalf ? Iop_InterleaveHI16x8
8122 : Iop_InterleaveLO16x8,
8123 mkexpr(src),
8124 mkexpr(src)),
8125 mkU8(16)));
8126 break;
8127 case X00:
8128 assign(res,
8129 binop(zWiden ? Iop_ShrN16x8 : Iop_SarN16x8,
8130 binop(fromUpperHalf ? Iop_InterleaveHI8x16
8131 : Iop_InterleaveLO8x16,
8132 mkexpr(src),
8133 mkexpr(src)),
8134 mkU8(8)));
8135 break;
8136 default:
8137 vassert(0);
8139 return res;
8143 /* Generate IR that takes a V128 and sign- or zero-widens
8144 either the even or odd lanes to twice-as-wide,
8145 resulting in a new V128 value. */
8146 static
8147 IRTemp math_WIDEN_EVEN_OR_ODD_LANES ( Bool zWiden, Bool fromOdd,
8148 UInt sizeNarrow, IRExpr* srcE )
8150 IRTemp src = newTempV128();
8151 IRTemp res = newTempV128();
8152 IROp opSAR = mkVecSARN(sizeNarrow+1);
8153 IROp opSHR = mkVecSHRN(sizeNarrow+1);
8154 IROp opSHL = mkVecSHLN(sizeNarrow+1);
8155 IROp opSxR = zWiden ? opSHR : opSAR;
8156 UInt amt = 0;
8157 switch (sizeNarrow) {
8158 case X10: amt = 32; break;
8159 case X01: amt = 16; break;
8160 case X00: amt = 8; break;
8161 default: vassert(0);
8163 assign(src, srcE);
8164 if (fromOdd) {
8165 assign(res, binop(opSxR, mkexpr(src), mkU8(amt)));
8166 } else {
8167 assign(res, binop(opSxR, binop(opSHL, mkexpr(src), mkU8(amt)),
8168 mkU8(amt)));
8170 return res;
8174 /* Generate IR that takes two V128s and narrows (takes lower half)
8175 of each lane, producing a single V128 value. */
8176 static
8177 IRTemp math_NARROW_LANES ( IRTemp argHi, IRTemp argLo, UInt sizeNarrow )
8179 IRTemp res = newTempV128();
8180 assign(res, binop(mkVecCATEVENLANES(sizeNarrow),
8181 mkexpr(argHi), mkexpr(argLo)));
8182 return res;
8186 /* Return a temp which holds the vector dup of the lane of width
8187 (1 << size) obtained from src[laneNo]. */
8188 static
8189 IRTemp math_DUP_VEC_ELEM ( IRExpr* src, UInt size, UInt laneNo )
8191 vassert(size <= 3);
8192 /* Normalise |laneNo| so it is of the form
8193 x000 for D, xx00 for S, xxx0 for H, and xxxx for B.
8194 This puts the bits we want to inspect at constant offsets
8195 regardless of the value of |size|.
8197 UInt ix = laneNo << size;
8198 vassert(ix <= 15);
8199 IROp ops[4] = { Iop_INVALID, Iop_INVALID, Iop_INVALID, Iop_INVALID };
8200 switch (size) {
8201 case 0: /* B */
8202 ops[0] = (ix & 1) ? Iop_CatOddLanes8x16 : Iop_CatEvenLanes8x16;
8203 /* fallthrough */
8204 case 1: /* H */
8205 ops[1] = (ix & 2) ? Iop_CatOddLanes16x8 : Iop_CatEvenLanes16x8;
8206 /* fallthrough */
8207 case 2: /* S */
8208 ops[2] = (ix & 4) ? Iop_CatOddLanes32x4 : Iop_CatEvenLanes32x4;
8209 /* fallthrough */
8210 case 3: /* D */
8211 ops[3] = (ix & 8) ? Iop_InterleaveHI64x2 : Iop_InterleaveLO64x2;
8212 break;
8213 default:
8214 vassert(0);
8216 IRTemp res = newTempV128();
8217 assign(res, src);
8218 Int i;
8219 for (i = 3; i >= 0; i--) {
8220 if (ops[i] == Iop_INVALID)
8221 break;
8222 IRTemp tmp = newTempV128();
8223 assign(tmp, binop(ops[i], mkexpr(res), mkexpr(res)));
8224 res = tmp;
8226 return res;
8230 /* Let |srcV| be a V128 value, and let |imm5| be a lane-and-size
8231 selector encoded as shown below. Return a new V128 holding the
8232 selected lane from |srcV| dup'd out to V128, and also return the
8233 lane number, log2 of the lane size in bytes, and width-character via
8234 *laneNo, *laneSzLg2 and *laneCh respectively. It may be that imm5
8235 is an invalid selector, in which case return
8236 IRTemp_INVALID, 0, 0 and '?' respectively.
8238 imm5 = xxxx1 signifies .b[xxxx]
8239 = xxx10 .h[xxx]
8240 = xx100 .s[xx]
8241 = x1000 .d[x]
8242 otherwise invalid
8244 static
8245 IRTemp handle_DUP_VEC_ELEM ( /*OUT*/UInt* laneNo,
8246 /*OUT*/UInt* laneSzLg2, /*OUT*/HChar* laneCh,
8247 IRExpr* srcV, UInt imm5 )
8249 *laneNo = 0;
8250 *laneSzLg2 = 0;
8251 *laneCh = '?';
8253 if (imm5 & 1) {
8254 *laneNo = (imm5 >> 1) & 15;
8255 *laneSzLg2 = 0;
8256 *laneCh = 'b';
8258 else if (imm5 & 2) {
8259 *laneNo = (imm5 >> 2) & 7;
8260 *laneSzLg2 = 1;
8261 *laneCh = 'h';
8263 else if (imm5 & 4) {
8264 *laneNo = (imm5 >> 3) & 3;
8265 *laneSzLg2 = 2;
8266 *laneCh = 's';
8268 else if (imm5 & 8) {
8269 *laneNo = (imm5 >> 4) & 1;
8270 *laneSzLg2 = 3;
8271 *laneCh = 'd';
8273 else {
8274 /* invalid */
8275 return IRTemp_INVALID;
8278 return math_DUP_VEC_ELEM(srcV, *laneSzLg2, *laneNo);
8282 /* Clone |imm| to every lane of a V128, with lane size log2 of |size|. */
8283 static
8284 IRTemp math_VEC_DUP_IMM ( UInt size, ULong imm )
8286 IRType ty = Ity_INVALID;
8287 IRTemp rcS = IRTemp_INVALID;
8288 switch (size) {
8289 case X01:
8290 vassert(imm <= 0xFFFFULL);
8291 ty = Ity_I16;
8292 rcS = newTemp(ty); assign(rcS, mkU16( (UShort)imm ));
8293 break;
8294 case X10:
8295 vassert(imm <= 0xFFFFFFFFULL);
8296 ty = Ity_I32;
8297 rcS = newTemp(ty); assign(rcS, mkU32( (UInt)imm ));
8298 break;
8299 case X11:
8300 ty = Ity_I64;
8301 rcS = newTemp(ty); assign(rcS, mkU64(imm)); break;
8302 default:
8303 vassert(0);
8305 IRTemp rcV = math_DUP_TO_V128(rcS, ty);
8306 return rcV;
8310 /* Let |new64| be a V128 in which only the lower 64 bits are interesting,
8311 and the upper can contain any value -- it is ignored. If |is2| is False,
8312 generate IR to put |new64| in the lower half of vector reg |dd| and zero
8313 the upper half. If |is2| is True, generate IR to put |new64| in the upper
8314 half of vector reg |dd| and leave the lower half unchanged. This
8315 simulates the behaviour of the "foo/foo2" instructions in which the
8316 destination is half the width of sources, for example addhn/addhn2.
8318 static
8319 void putLO64andZUorPutHI64 ( Bool is2, UInt dd, IRTemp new64 )
8321 if (is2) {
8322 /* Get the old contents of Vdd, zero the upper half, and replace
8323 it with 'x'. */
8324 IRTemp t_zero_oldLO = newTempV128();
8325 assign(t_zero_oldLO, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
8326 IRTemp t_newHI_zero = newTempV128();
8327 assign(t_newHI_zero, binop(Iop_InterleaveLO64x2, mkexpr(new64),
8328 mkV128(0x0000)));
8329 IRTemp res = newTempV128();
8330 assign(res, binop(Iop_OrV128, mkexpr(t_zero_oldLO),
8331 mkexpr(t_newHI_zero)));
8332 putQReg128(dd, mkexpr(res));
8333 } else {
8334 /* This is simple. */
8335 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(new64)));
8340 /* Compute vector SQABS at lane size |size| for |srcE|, returning
8341 the q result in |*qabs| and the normal result in |*nabs|. */
8342 static
8343 void math_SQABS ( /*OUT*/IRTemp* qabs, /*OUT*/IRTemp* nabs,
8344 IRExpr* srcE, UInt size )
8346 IRTemp src, mask, maskn, nsub, qsub;
8347 src = mask = maskn = nsub = qsub = IRTemp_INVALID;
8348 newTempsV128_7(&src, &mask, &maskn, &nsub, &qsub, nabs, qabs);
8349 assign(src, srcE);
8350 assign(mask, binop(mkVecCMPGTS(size), mkV128(0x0000), mkexpr(src)));
8351 assign(maskn, unop(Iop_NotV128, mkexpr(mask)));
8352 assign(nsub, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
8353 assign(qsub, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
8354 assign(*nabs, binop(Iop_OrV128,
8355 binop(Iop_AndV128, mkexpr(nsub), mkexpr(mask)),
8356 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
8357 assign(*qabs, binop(Iop_OrV128,
8358 binop(Iop_AndV128, mkexpr(qsub), mkexpr(mask)),
8359 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
8363 /* Compute vector SQNEG at lane size |size| for |srcE|, returning
8364 the q result in |*qneg| and the normal result in |*nneg|. */
8365 static
8366 void math_SQNEG ( /*OUT*/IRTemp* qneg, /*OUT*/IRTemp* nneg,
8367 IRExpr* srcE, UInt size )
8369 IRTemp src = IRTemp_INVALID;
8370 newTempsV128_3(&src, nneg, qneg);
8371 assign(src, srcE);
8372 assign(*nneg, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
8373 assign(*qneg, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
8377 /* Zero all except the least significant lane of |srcE|, where |size|
8378 indicates the lane size in the usual way. */
8379 static IRTemp math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size, IRExpr* srcE )
8381 vassert(size < 4);
8382 IRTemp t = newTempV128();
8383 assign(t, unop(mkVecZEROHIxxOFV128(size), srcE));
8384 return t;
8388 /* Generate IR to compute vector widening MULL from either the lower
8389 (is2==False) or upper (is2==True) halves of vecN and vecM. The
8390 widening multiplies are unsigned when isU==True and signed when
8391 isU==False. |size| is the narrow lane size indication. Optionally,
8392 the product may be added to or subtracted from vecD, at the wide lane
8393 size. This happens when |mas| is 'a' (add) or 's' (sub). When |mas|
8394 is 'm' (only multiply) then the accumulate part does not happen, and
8395 |vecD| is expected to == IRTemp_INVALID.
8397 Only size==0 (h_b_b), size==1 (s_h_h) and size==2 (d_s_s) variants
8398 are allowed. The result is returned in a new IRTemp, which is
8399 returned in *res. */
8400 static
8401 void math_MULL_ACC ( /*OUT*/IRTemp* res,
8402 Bool is2, Bool isU, UInt size, HChar mas,
8403 IRTemp vecN, IRTemp vecM, IRTemp vecD )
8405 vassert(res && *res == IRTemp_INVALID);
8406 vassert(size <= 2);
8407 vassert(mas == 'm' || mas == 'a' || mas == 's');
8408 if (mas == 'm') vassert(vecD == IRTemp_INVALID);
8409 IROp mulOp = isU ? mkVecMULLU(size) : mkVecMULLS(size);
8410 IROp accOp = (mas == 'a') ? mkVecADD(size+1)
8411 : (mas == 's' ? mkVecSUB(size+1)
8412 : Iop_INVALID);
8413 IRTemp mul = math_BINARY_WIDENING_V128(is2, mulOp,
8414 mkexpr(vecN), mkexpr(vecM));
8415 *res = newTempV128();
8416 assign(*res, mas == 'm' ? mkexpr(mul)
8417 : binop(accOp, mkexpr(vecD), mkexpr(mul)));
8421 /* Same as math_MULL_ACC, except the multiply is signed widening,
8422 the multiplied value is then doubled, before being added to or
8423 subtracted from the accumulated value. And everything is
8424 saturated. In all cases, saturation residuals are returned
8425 via (sat1q, sat1n), and in the accumulate cases,
8426 via (sat2q, sat2n) too. All results are returned in new temporaries.
8427 In the no-accumulate case, *sat2q and *sat2n are never instantiated,
8428 so the caller can tell this has happened. */
8429 static
8430 void math_SQDMULL_ACC ( /*OUT*/IRTemp* res,
8431 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
8432 /*OUT*/IRTemp* sat2q, /*OUT*/IRTemp* sat2n,
8433 Bool is2, UInt size, HChar mas,
8434 IRTemp vecN, IRTemp vecM, IRTemp vecD )
8436 vassert(size <= 2);
8437 vassert(mas == 'm' || mas == 'a' || mas == 's');
8438 /* Compute
8439 sat1q = vecN.D[is2] *sq vecM.d[is2] *q 2
8440 sat1n = vecN.D[is2] *s vecM.d[is2] * 2
8441 IOW take either the low or high halves of vecN and vecM, signed widen,
8442 multiply, double that, and signedly saturate. Also compute the same
8443 but without saturation.
8445 vassert(sat2q && *sat2q == IRTemp_INVALID);
8446 vassert(sat2n && *sat2n == IRTemp_INVALID);
8447 newTempsV128_3(sat1q, sat1n, res);
8448 IRTemp tq = math_BINARY_WIDENING_V128(is2, mkVecQDMULLS(size),
8449 mkexpr(vecN), mkexpr(vecM));
8450 IRTemp tn = math_BINARY_WIDENING_V128(is2, mkVecMULLS(size),
8451 mkexpr(vecN), mkexpr(vecM));
8452 assign(*sat1q, mkexpr(tq));
8453 assign(*sat1n, binop(mkVecADD(size+1), mkexpr(tn), mkexpr(tn)));
8455 /* If there is no accumulation, the final result is sat1q,
8456 and there's no assignment to sat2q or sat2n. */
8457 if (mas == 'm') {
8458 assign(*res, mkexpr(*sat1q));
8459 return;
8462 /* Compute
8463 sat2q = vecD +sq/-sq sat1q
8464 sat2n = vecD +/- sat1n
8465 result = sat2q
8467 newTempsV128_2(sat2q, sat2n);
8468 assign(*sat2q, binop(mas == 'a' ? mkVecQADDS(size+1) : mkVecQSUBS(size+1),
8469 mkexpr(vecD), mkexpr(*sat1q)));
8470 assign(*sat2n, binop(mas == 'a' ? mkVecADD(size+1) : mkVecSUB(size+1),
8471 mkexpr(vecD), mkexpr(*sat1n)));
8472 assign(*res, mkexpr(*sat2q));
8476 /* Generate IR for widening signed vector multiplies. The operands
8477 have their lane width signedly widened, and they are then multiplied
8478 at the wider width, returning results in two new IRTemps. */
8479 static
8480 void math_MULLS ( /*OUT*/IRTemp* resHI, /*OUT*/IRTemp* resLO,
8481 UInt sizeNarrow, IRTemp argL, IRTemp argR )
8483 vassert(sizeNarrow <= 2);
8484 newTempsV128_2(resHI, resLO);
8485 IRTemp argLhi = newTemp(Ity_I64);
8486 IRTemp argLlo = newTemp(Ity_I64);
8487 IRTemp argRhi = newTemp(Ity_I64);
8488 IRTemp argRlo = newTemp(Ity_I64);
8489 assign(argLhi, unop(Iop_V128HIto64, mkexpr(argL)));
8490 assign(argLlo, unop(Iop_V128to64, mkexpr(argL)));
8491 assign(argRhi, unop(Iop_V128HIto64, mkexpr(argR)));
8492 assign(argRlo, unop(Iop_V128to64, mkexpr(argR)));
8493 IROp opMulls = mkVecMULLS(sizeNarrow);
8494 assign(*resHI, binop(opMulls, mkexpr(argLhi), mkexpr(argRhi)));
8495 assign(*resLO, binop(opMulls, mkexpr(argLlo), mkexpr(argRlo)));
8499 /* Generate IR for SQDMULH and SQRDMULH: signedly wideningly multiply,
8500 double that, possibly add a rounding constant (R variants), and take
8501 the high half. */
8502 static
8503 void math_SQDMULH ( /*OUT*/IRTemp* res,
8504 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
8505 Bool isR, UInt size, IRTemp vN, IRTemp vM )
8507 vassert(size == X01 || size == X10); /* s or h only */
8509 newTempsV128_3(res, sat1q, sat1n);
8511 IRTemp mullsHI = IRTemp_INVALID, mullsLO = IRTemp_INVALID;
8512 math_MULLS(&mullsHI, &mullsLO, size, vN, vM);
8514 IRTemp addWide = mkVecADD(size+1);
8516 if (isR) {
8517 assign(*sat1q, binop(mkVecQRDMULHIS(size), mkexpr(vN), mkexpr(vM)));
8519 Int rcShift = size == X01 ? 15 : 31;
8520 IRTemp roundConst = math_VEC_DUP_IMM(size+1, 1ULL << rcShift);
8521 assign(*sat1n,
8522 binop(mkVecCATODDLANES(size),
8523 binop(addWide,
8524 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
8525 mkexpr(roundConst)),
8526 binop(addWide,
8527 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO)),
8528 mkexpr(roundConst))));
8529 } else {
8530 assign(*sat1q, binop(mkVecQDMULHIS(size), mkexpr(vN), mkexpr(vM)));
8532 assign(*sat1n,
8533 binop(mkVecCATODDLANES(size),
8534 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
8535 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO))));
8538 assign(*res, mkexpr(*sat1q));
8541 /* Generate IR for SQRDMLAH and SQRDMLSH: signedly wideningly multiply,
8542 double, add a rounding constant, take the high half and accumulate. */
8543 static
8544 void math_SQRDMLAH ( /*OUT*/IRTemp* res, /*OUT*/IRTemp* res_nosat, Bool isAdd,
8545 UInt size, IRTemp vD, IRTemp vN, IRTemp vM )
8547 vassert(size == X01 || size == X10); /* s or h only */
8549 /* SQRDMLAH = SQADD(A, SQRDMULH(B, C)) */
8551 IRTemp mul, mul_nosat, dummy;
8552 mul = mul_nosat = dummy = IRTemp_INVALID;
8553 math_SQDMULH(&mul, &dummy, &mul_nosat, True/*R*/, size, vN, vM);
8555 IROp op = isAdd ? mkVecADD(size) : mkVecSUB(size);
8556 IROp qop = isAdd ? mkVecQADDS(size) : mkVecQSUBS(size);
8557 newTempsV128_2(res, res_nosat);
8558 assign(*res, binop(qop, mkexpr(vD), mkexpr(mul)));
8559 assign(*res_nosat, binop(op, mkexpr(vD), mkexpr(mul_nosat)));
8563 /* Generate IR for SQSHL, UQSHL, SQSHLU by imm. Put the result in
8564 a new temp in *res, and the Q difference pair in new temps in
8565 *qDiff1 and *qDiff2 respectively. |nm| denotes which of the
8566 three operations it is. */
8567 static
8568 void math_QSHL_IMM ( /*OUT*/IRTemp* res,
8569 /*OUT*/IRTemp* qDiff1, /*OUT*/IRTemp* qDiff2,
8570 IRTemp src, UInt size, UInt shift, const HChar* nm )
8572 vassert(size <= 3);
8573 UInt laneBits = 8 << size;
8574 vassert(shift < laneBits);
8575 newTempsV128_3(res, qDiff1, qDiff2);
8576 IRTemp z128 = newTempV128();
8577 assign(z128, mkV128(0x0000));
8579 /* UQSHL */
8580 if (vex_streq(nm, "uqshl")) {
8581 IROp qop = mkVecQSHLNSATUU(size);
8582 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
8583 if (shift == 0) {
8584 /* No shift means no saturation. */
8585 assign(*qDiff1, mkexpr(z128));
8586 assign(*qDiff2, mkexpr(z128));
8587 } else {
8588 /* Saturation has occurred if any of the shifted-out bits are
8589 nonzero. We get the shifted-out bits by right-shifting the
8590 original value. */
8591 UInt rshift = laneBits - shift;
8592 vassert(rshift >= 1 && rshift < laneBits);
8593 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
8594 assign(*qDiff2, mkexpr(z128));
8596 return;
8599 /* SQSHL */
8600 if (vex_streq(nm, "sqshl")) {
8601 IROp qop = mkVecQSHLNSATSS(size);
8602 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
8603 if (shift == 0) {
8604 /* No shift means no saturation. */
8605 assign(*qDiff1, mkexpr(z128));
8606 assign(*qDiff2, mkexpr(z128));
8607 } else {
8608 /* Saturation has occurred if any of the shifted-out bits are
8609 different from the top bit of the original value. */
8610 UInt rshift = laneBits - 1 - shift;
8611 vassert(rshift >= 0 && rshift < laneBits-1);
8612 /* qDiff1 is the shifted out bits, and the top bit of the original
8613 value, preceded by zeroes. */
8614 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
8615 /* qDiff2 is the top bit of the original value, cloned the
8616 correct number of times. */
8617 assign(*qDiff2, binop(mkVecSHRN(size),
8618 binop(mkVecSARN(size), mkexpr(src),
8619 mkU8(laneBits-1)),
8620 mkU8(rshift)));
8621 /* This also succeeds in comparing the top bit of the original
8622 value to itself, which is a bit stupid, but not wrong. */
8624 return;
8627 /* SQSHLU */
8628 if (vex_streq(nm, "sqshlu")) {
8629 IROp qop = mkVecQSHLNSATSU(size);
8630 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
8631 if (shift == 0) {
8632 /* If there's no shift, saturation depends on the top bit
8633 of the source. */
8634 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(laneBits-1)));
8635 assign(*qDiff2, mkexpr(z128));
8636 } else {
8637 /* Saturation has occurred if any of the shifted-out bits are
8638 nonzero. We get the shifted-out bits by right-shifting the
8639 original value. */
8640 UInt rshift = laneBits - shift;
8641 vassert(rshift >= 1 && rshift < laneBits);
8642 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
8643 assign(*qDiff2, mkexpr(z128));
8645 return;
8648 vassert(0);
8652 /* Generate IR to do SRHADD and URHADD. */
8653 static
8654 IRTemp math_RHADD ( UInt size, Bool isU, IRTemp aa, IRTemp bb )
8656 /* Generate this:
8657 (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1)
8659 vassert(size <= 3);
8660 IROp opSHR = isU ? mkVecSHRN(size) : mkVecSARN(size);
8661 IROp opADD = mkVecADD(size);
8662 /* The only tricky bit is to generate the correct vector 1 constant. */
8663 const ULong ones64[4]
8664 = { 0x0101010101010101ULL, 0x0001000100010001ULL,
8665 0x0000000100000001ULL, 0x0000000000000001ULL };
8666 IRTemp imm64 = newTemp(Ity_I64);
8667 assign(imm64, mkU64(ones64[size]));
8668 IRTemp vecOne = newTempV128();
8669 assign(vecOne, binop(Iop_64HLtoV128, mkexpr(imm64), mkexpr(imm64)));
8670 IRTemp scaOne = newTemp(Ity_I8);
8671 assign(scaOne, mkU8(1));
8672 IRTemp res = newTempV128();
8673 assign(res,
8674 binop(opADD,
8675 binop(opSHR, mkexpr(aa), mkexpr(scaOne)),
8676 binop(opADD,
8677 binop(opSHR, mkexpr(bb), mkexpr(scaOne)),
8678 binop(opSHR,
8679 binop(opADD,
8680 binop(opADD,
8681 binop(Iop_AndV128, mkexpr(aa),
8682 mkexpr(vecOne)),
8683 binop(Iop_AndV128, mkexpr(bb),
8684 mkexpr(vecOne))
8686 mkexpr(vecOne)
8688 mkexpr(scaOne)
8693 return res;
8697 /* QCFLAG tracks the SIMD sticky saturation status. Update the status
8698 thusly: if, after application of |opZHI| to both |qres| and |nres|,
8699 they have the same value, leave QCFLAG unchanged. Otherwise, set it
8700 (implicitly) to 1. |opZHI| may only be one of the Iop_ZeroHIxxofV128
8701 operators, or Iop_INVALID, in which case |qres| and |nres| are used
8702 unmodified. The presence |opZHI| means this function can be used to
8703 generate QCFLAG update code for both scalar and vector SIMD operations.
8705 static
8706 void updateQCFLAGwithDifferenceZHI ( IRTemp qres, IRTemp nres, IROp opZHI )
8708 IRTemp diff = newTempV128();
8709 IRTemp oldQCFLAG = newTempV128();
8710 IRTemp newQCFLAG = newTempV128();
8711 if (opZHI == Iop_INVALID) {
8712 assign(diff, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres)));
8713 } else {
8714 vassert(opZHI == Iop_ZeroHI64ofV128
8715 || opZHI == Iop_ZeroHI96ofV128 || opZHI == Iop_ZeroHI112ofV128);
8716 assign(diff, unop(opZHI, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres))));
8718 assign(oldQCFLAG, IRExpr_Get(OFFB_QCFLAG, Ity_V128));
8719 assign(newQCFLAG, binop(Iop_OrV128, mkexpr(oldQCFLAG), mkexpr(diff)));
8720 stmt(IRStmt_Put(OFFB_QCFLAG, mkexpr(newQCFLAG)));
8724 /* A variant of updateQCFLAGwithDifferenceZHI in which |qres| and |nres|
8725 are used unmodified, hence suitable for QCFLAG updates for whole-vector
8726 operations. */
8727 static
8728 void updateQCFLAGwithDifference ( IRTemp qres, IRTemp nres )
8730 updateQCFLAGwithDifferenceZHI(qres, nres, Iop_INVALID);
8734 /* Generate IR to rearrange two vector values in a way which is useful
8735 for doing S/D add-pair etc operations. There are 3 cases:
8737 2d: [m1 m0] [n1 n0] --> [m1 n1] [m0 n0]
8739 4s: [m3 m2 m1 m0] [n3 n2 n1 n0] --> [m3 m1 n3 n1] [m2 m0 n2 n0]
8741 2s: [m2 m2 m1 m0] [n3 n2 n1 n0] --> [0 0 m1 n1] [0 0 m0 n0]
8743 The cases are distinguished as follows:
8744 isD == True, bitQ == 1 => 2d
8745 isD == False, bitQ == 1 => 4s
8746 isD == False, bitQ == 0 => 2s
8748 static
8749 void math_REARRANGE_FOR_FLOATING_PAIRWISE (
8750 /*OUT*/IRTemp* rearrL, /*OUT*/IRTemp* rearrR,
8751 IRTemp vecM, IRTemp vecN, Bool isD, UInt bitQ
8754 vassert(rearrL && *rearrL == IRTemp_INVALID);
8755 vassert(rearrR && *rearrR == IRTemp_INVALID);
8756 *rearrL = newTempV128();
8757 *rearrR = newTempV128();
8758 if (isD) {
8759 // 2d case
8760 vassert(bitQ == 1);
8761 assign(*rearrL, binop(Iop_InterleaveHI64x2, mkexpr(vecM), mkexpr(vecN)));
8762 assign(*rearrR, binop(Iop_InterleaveLO64x2, mkexpr(vecM), mkexpr(vecN)));
8764 else if (!isD && bitQ == 1) {
8765 // 4s case
8766 assign(*rearrL, binop(Iop_CatOddLanes32x4, mkexpr(vecM), mkexpr(vecN)));
8767 assign(*rearrR, binop(Iop_CatEvenLanes32x4, mkexpr(vecM), mkexpr(vecN)));
8768 } else {
8769 // 2s case
8770 vassert(!isD && bitQ == 0);
8771 IRTemp m1n1m0n0 = newTempV128();
8772 IRTemp m0n0m1n1 = newTempV128();
8773 assign(m1n1m0n0, binop(Iop_InterleaveLO32x4,
8774 mkexpr(vecM), mkexpr(vecN)));
8775 assign(m0n0m1n1, triop(Iop_SliceV128,
8776 mkexpr(m1n1m0n0), mkexpr(m1n1m0n0), mkU8(8)));
8777 assign(*rearrL, unop(Iop_ZeroHI64ofV128, mkexpr(m1n1m0n0)));
8778 assign(*rearrR, unop(Iop_ZeroHI64ofV128, mkexpr(m0n0m1n1)));
8783 /* Returns 2.0 ^ (-n) for n in 1 .. 64 */
8784 static Double two_to_the_minus ( Int n )
8786 if (n == 1) return 0.5;
8787 vassert(n >= 2 && n <= 64);
8788 Int half = n / 2;
8789 return two_to_the_minus(half) * two_to_the_minus(n - half);
8793 /* Returns 2.0 ^ n for n in 1 .. 64 */
8794 static Double two_to_the_plus ( Int n )
8796 if (n == 1) return 2.0;
8797 vassert(n >= 2 && n <= 64);
8798 Int half = n / 2;
8799 return two_to_the_plus(half) * two_to_the_plus(n - half);
8803 /*------------------------------------------------------------*/
8804 /*--- SIMD and FP instructions ---*/
8805 /*------------------------------------------------------------*/
8807 static
8808 Bool dis_AdvSIMD_EXT(/*MB_OUT*/DisResult* dres, UInt insn)
8810 /* 31 29 23 21 20 15 14 10 9 4
8811 0 q 101110 op2 0 m 0 imm4 0 n d
8812 Decode fields: op2
8814 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8815 if (INSN(31,31) != 0
8816 || INSN(29,24) != BITS6(1,0,1,1,1,0)
8817 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) {
8818 return False;
8820 UInt bitQ = INSN(30,30);
8821 UInt op2 = INSN(23,22);
8822 UInt mm = INSN(20,16);
8823 UInt imm4 = INSN(14,11);
8824 UInt nn = INSN(9,5);
8825 UInt dd = INSN(4,0);
8827 if (op2 == BITS2(0,0)) {
8828 /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */
8829 IRTemp sHi = newTempV128();
8830 IRTemp sLo = newTempV128();
8831 IRTemp res = newTempV128();
8832 assign(sHi, getQReg128(mm));
8833 assign(sLo, getQReg128(nn));
8834 if (bitQ == 1) {
8835 if (imm4 == 0) {
8836 assign(res, mkexpr(sLo));
8837 } else {
8838 vassert(imm4 >= 1 && imm4 <= 15);
8839 assign(res, triop(Iop_SliceV128,
8840 mkexpr(sHi), mkexpr(sLo), mkU8(imm4)));
8842 putQReg128(dd, mkexpr(res));
8843 DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4);
8844 } else {
8845 if (imm4 >= 8) return False;
8846 if (imm4 == 0) {
8847 assign(res, mkexpr(sLo));
8848 } else {
8849 vassert(imm4 >= 1 && imm4 <= 7);
8850 IRTemp hi64lo64 = newTempV128();
8851 assign(hi64lo64, binop(Iop_InterleaveLO64x2,
8852 mkexpr(sHi), mkexpr(sLo)));
8853 assign(res, triop(Iop_SliceV128,
8854 mkexpr(hi64lo64), mkexpr(hi64lo64), mkU8(imm4)));
8856 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
8857 DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4);
8859 return True;
8862 return False;
8863 # undef INSN
8867 static
8868 Bool dis_AdvSIMD_TBL_TBX(/*MB_OUT*/DisResult* dres, UInt insn)
8870 /* 31 29 23 21 20 15 14 12 11 9 4
8871 0 q 001110 op2 0 m 0 len op 00 n d
8872 Decode fields: op2,len,op
8874 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8875 if (INSN(31,31) != 0
8876 || INSN(29,24) != BITS6(0,0,1,1,1,0)
8877 || INSN(21,21) != 0
8878 || INSN(15,15) != 0
8879 || INSN(11,10) != BITS2(0,0)) {
8880 return False;
8882 UInt bitQ = INSN(30,30);
8883 UInt op2 = INSN(23,22);
8884 UInt mm = INSN(20,16);
8885 UInt len = INSN(14,13);
8886 UInt bitOP = INSN(12,12);
8887 UInt nn = INSN(9,5);
8888 UInt dd = INSN(4,0);
8890 if (op2 == X00) {
8891 /* -------- 00,xx,0 TBL, xx register table -------- */
8892 /* -------- 00,xx,1 TBX, xx register table -------- */
8893 /* 31 28 20 15 14 12 9 4
8894 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8895 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8896 where Ta = 16b(q=1) or 8b(q=0)
8898 Bool isTBX = bitOP == 1;
8899 /* The out-of-range values to use. */
8900 IRTemp oor_values = newTempV128();
8901 assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
8902 /* src value */
8903 IRTemp src = newTempV128();
8904 assign(src, getQReg128(mm));
8905 /* The table values */
8906 IRTemp tab[4];
8907 UInt i;
8908 for (i = 0; i <= len; i++) {
8909 vassert(i < 4);
8910 tab[i] = newTempV128();
8911 assign(tab[i], getQReg128((nn + i) % 32));
8913 IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
8914 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8915 const HChar* Ta = bitQ ==1 ? "16b" : "8b";
8916 const HChar* nm = isTBX ? "tbx" : "tbl";
8917 DIP("%s %s.%s, {v%u.16b .. v%u.16b}, %s.%s\n",
8918 nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
8919 return True;
8922 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8923 return False;
8924 # undef INSN
8928 static
8929 Bool dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult* dres, UInt insn)
8931 /* 31 29 23 21 20 15 14 11 9 4
8932 0 q 001110 size 0 m 0 opcode 10 n d
8933 Decode fields: opcode
8935 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8936 if (INSN(31,31) != 0
8937 || INSN(29,24) != BITS6(0,0,1,1,1,0)
8938 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(11,10) != BITS2(1,0)) {
8939 return False;
8941 UInt bitQ = INSN(30,30);
8942 UInt size = INSN(23,22);
8943 UInt mm = INSN(20,16);
8944 UInt opcode = INSN(14,12);
8945 UInt nn = INSN(9,5);
8946 UInt dd = INSN(4,0);
8948 if (opcode == BITS3(0,0,1) || opcode == BITS3(1,0,1)) {
8949 /* -------- 001 UZP1 std7_std7_std7 -------- */
8950 /* -------- 101 UZP2 std7_std7_std7 -------- */
8951 if (bitQ == 0 && size == X11) return False; // implied 1d case
8952 Bool isUZP1 = opcode == BITS3(0,0,1);
8953 IROp op = isUZP1 ? mkVecCATEVENLANES(size)
8954 : mkVecCATODDLANES(size);
8955 IRTemp preL = newTempV128();
8956 IRTemp preR = newTempV128();
8957 IRTemp res = newTempV128();
8958 if (bitQ == 0) {
8959 assign(preL, binop(Iop_InterleaveLO64x2, getQReg128(mm),
8960 getQReg128(nn)));
8961 assign(preR, mkexpr(preL));
8962 } else {
8963 assign(preL, getQReg128(mm));
8964 assign(preR, getQReg128(nn));
8966 assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
8967 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8968 const HChar* nm = isUZP1 ? "uzp1" : "uzp2";
8969 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8970 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8971 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8972 return True;
8975 if (opcode == BITS3(0,1,0) || opcode == BITS3(1,1,0)) {
8976 /* -------- 010 TRN1 std7_std7_std7 -------- */
8977 /* -------- 110 TRN2 std7_std7_std7 -------- */
8978 if (bitQ == 0 && size == X11) return False; // implied 1d case
8979 Bool isTRN1 = opcode == BITS3(0,1,0);
8980 IROp op1 = isTRN1 ? mkVecCATEVENLANES(size)
8981 : mkVecCATODDLANES(size);
8982 IROp op2 = mkVecINTERLEAVEHI(size);
8983 IRTemp srcM = newTempV128();
8984 IRTemp srcN = newTempV128();
8985 IRTemp res = newTempV128();
8986 assign(srcM, getQReg128(mm));
8987 assign(srcN, getQReg128(nn));
8988 assign(res, binop(op2, binop(op1, mkexpr(srcM), mkexpr(srcM)),
8989 binop(op1, mkexpr(srcN), mkexpr(srcN))));
8990 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8991 const HChar* nm = isTRN1 ? "trn1" : "trn2";
8992 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8993 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8994 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8995 return True;
8998 if (opcode == BITS3(0,1,1) || opcode == BITS3(1,1,1)) {
8999 /* -------- 011 ZIP1 std7_std7_std7 -------- */
9000 /* -------- 111 ZIP2 std7_std7_std7 -------- */
9001 if (bitQ == 0 && size == X11) return False; // implied 1d case
9002 Bool isZIP1 = opcode == BITS3(0,1,1);
9003 IROp op = isZIP1 ? mkVecINTERLEAVELO(size)
9004 : mkVecINTERLEAVEHI(size);
9005 IRTemp preL = newTempV128();
9006 IRTemp preR = newTempV128();
9007 IRTemp res = newTempV128();
9008 if (bitQ == 0 && !isZIP1) {
9009 IRTemp z128 = newTempV128();
9010 assign(z128, mkV128(0x0000));
9011 // preL = Vm shifted left 32 bits
9012 // preR = Vn shifted left 32 bits
9013 assign(preL, triop(Iop_SliceV128,
9014 getQReg128(mm), mkexpr(z128), mkU8(12)));
9015 assign(preR, triop(Iop_SliceV128,
9016 getQReg128(nn), mkexpr(z128), mkU8(12)));
9018 } else {
9019 assign(preL, getQReg128(mm));
9020 assign(preR, getQReg128(nn));
9022 assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
9023 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9024 const HChar* nm = isZIP1 ? "zip1" : "zip2";
9025 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9026 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9027 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9028 return True;
9031 return False;
9032 # undef INSN
9036 static
9037 Bool dis_AdvSIMD_across_lanes(/*MB_OUT*/DisResult* dres, UInt insn)
9039 /* 31 28 23 21 16 11 9 4
9040 0 q u 01110 size 11000 opcode 10 n d
9041 Decode fields: u,size,opcode
9043 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9044 if (INSN(31,31) != 0
9045 || INSN(28,24) != BITS5(0,1,1,1,0)
9046 || INSN(21,17) != BITS5(1,1,0,0,0) || INSN(11,10) != BITS2(1,0)) {
9047 return False;
9049 UInt bitQ = INSN(30,30);
9050 UInt bitU = INSN(29,29);
9051 UInt size = INSN(23,22);
9052 UInt opcode = INSN(16,12);
9053 UInt nn = INSN(9,5);
9054 UInt dd = INSN(4,0);
9056 if (opcode == BITS5(0,0,0,1,1)) {
9057 /* -------- 0,xx,00011 SADDLV -------- */
9058 /* -------- 1,xx,00011 UADDLV -------- */
9059 /* size is the narrow size */
9060 if (size == X11 || (size == X10 && bitQ == 0)) return False;
9061 Bool isU = bitU == 1;
9062 IRTemp src = newTempV128();
9063 assign(src, getQReg128(nn));
9064 /* The basic plan is to widen the lower half, and if Q = 1,
9065 the upper half too. Add them together (if Q = 1), and in
9066 either case fold with add at twice the lane width.
9068 IRExpr* widened
9069 = mkexpr(math_WIDEN_LO_OR_HI_LANES(
9070 isU, False/*!fromUpperHalf*/, size, mkexpr(src)));
9071 if (bitQ == 1) {
9072 widened
9073 = binop(mkVecADD(size+1),
9074 widened,
9075 mkexpr(math_WIDEN_LO_OR_HI_LANES(
9076 isU, True/*fromUpperHalf*/, size, mkexpr(src)))
9079 /* Now fold. */
9080 IRTemp tWi = newTempV128();
9081 assign(tWi, widened);
9082 IRTemp res = math_FOLDV(tWi, mkVecADD(size+1));
9083 putQReg128(dd, mkexpr(res));
9084 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9085 const HChar ch = "bhsd"[size];
9086 DIP("%s %s.%c, %s.%s\n", isU ? "uaddlv" : "saddlv",
9087 nameQReg128(dd), ch, nameQReg128(nn), arr);
9088 return True;
9091 UInt ix = 0;
9092 /**/ if (opcode == BITS5(0,1,0,1,0)) { ix = bitU == 0 ? 1 : 2; }
9093 else if (opcode == BITS5(1,1,0,1,0)) { ix = bitU == 0 ? 3 : 4; }
9094 else if (opcode == BITS5(1,1,0,1,1) && bitU == 0) { ix = 5; }
9095 /**/
9096 if (ix != 0) {
9097 /* -------- 0,xx,01010: SMAXV -------- (1) */
9098 /* -------- 1,xx,01010: UMAXV -------- (2) */
9099 /* -------- 0,xx,11010: SMINV -------- (3) */
9100 /* -------- 1,xx,11010: UMINV -------- (4) */
9101 /* -------- 0,xx,11011: ADDV -------- (5) */
9102 vassert(ix >= 1 && ix <= 5);
9103 if (size == X11) return False; // 1d,2d cases not allowed
9104 if (size == X10 && bitQ == 0) return False; // 2s case not allowed
9105 const IROp opMAXS[3]
9106 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
9107 const IROp opMAXU[3]
9108 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
9109 const IROp opMINS[3]
9110 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
9111 const IROp opMINU[3]
9112 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
9113 const IROp opADD[3]
9114 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4 };
9115 vassert(size < 3);
9116 IROp op = Iop_INVALID;
9117 const HChar* nm = NULL;
9118 switch (ix) {
9119 case 1: op = opMAXS[size]; nm = "smaxv"; break;
9120 case 2: op = opMAXU[size]; nm = "umaxv"; break;
9121 case 3: op = opMINS[size]; nm = "sminv"; break;
9122 case 4: op = opMINU[size]; nm = "uminv"; break;
9123 case 5: op = opADD[size]; nm = "addv"; break;
9124 default: vassert(0);
9126 vassert(op != Iop_INVALID && nm != NULL);
9127 IRTemp tN1 = newTempV128();
9128 assign(tN1, getQReg128(nn));
9129 /* If Q == 0, we're just folding lanes in the lower half of
9130 the value. In which case, copy the lower half of the
9131 source into the upper half, so we can then treat it the
9132 same as the full width case. Except for the addition case,
9133 in which we have to zero out the upper half. */
9134 IRTemp tN2 = newTempV128();
9135 assign(tN2, bitQ == 0
9136 ? (ix == 5 ? unop(Iop_ZeroHI64ofV128, mkexpr(tN1))
9137 : mk_CatEvenLanes64x2(tN1,tN1))
9138 : mkexpr(tN1));
9139 IRTemp res = math_FOLDV(tN2, op);
9140 if (res == IRTemp_INVALID)
9141 return False; /* means math_FOLDV
9142 doesn't handle this case yet */
9143 putQReg128(dd, mkexpr(res));
9144 const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
9145 IRType laneTy = tys[size];
9146 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9147 DIP("%s %s, %s.%s\n", nm,
9148 nameQRegLO(dd, laneTy), nameQReg128(nn), arr);
9149 return True;
9152 if ((size == X00 || size == X10)
9153 && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
9154 /* -------- 0,00,01100: FMAXMNV s_4s -------- */
9155 /* -------- 0,10,01100: FMINMNV s_4s -------- */
9156 /* -------- 1,00,01111: FMAXV s_4s -------- */
9157 /* -------- 1,10,01111: FMINV s_4s -------- */
9158 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
9159 if (bitQ == 0) return False; // Only 4s is allowed
9160 Bool isMIN = (size & 2) == 2;
9161 Bool isNM = opcode == BITS5(0,1,1,0,0);
9162 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(2);
9163 IRTemp src = newTempV128();
9164 assign(src, getQReg128(nn));
9165 IRTemp res = math_FOLDV(src, opMXX);
9166 putQReg128(dd, mkexpr(res));
9167 DIP("%s%sv s%u, %u.4s\n",
9168 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", dd, nn);
9169 return True;
9172 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9173 return False;
9174 # undef INSN
9178 static
9179 Bool dis_AdvSIMD_copy(/*MB_OUT*/DisResult* dres, UInt insn)
9181 /* 31 28 20 15 14 10 9 4
9182 0 q op 01110000 imm5 0 imm4 1 n d
9183 Decode fields: q,op,imm4
9185 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9186 if (INSN(31,31) != 0
9187 || INSN(28,21) != BITS8(0,1,1,1,0,0,0,0)
9188 || INSN(15,15) != 0 || INSN(10,10) != 1) {
9189 return False;
9191 UInt bitQ = INSN(30,30);
9192 UInt bitOP = INSN(29,29);
9193 UInt imm5 = INSN(20,16);
9194 UInt imm4 = INSN(14,11);
9195 UInt nn = INSN(9,5);
9196 UInt dd = INSN(4,0);
9198 /* -------- x,0,0000: DUP (element, vector) -------- */
9199 /* 31 28 20 15 9 4
9200 0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index]
9202 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
9203 UInt laneNo = 0;
9204 UInt laneSzLg2 = 0;
9205 HChar laneCh = '?';
9206 IRTemp res = handle_DUP_VEC_ELEM(&laneNo, &laneSzLg2, &laneCh,
9207 getQReg128(nn), imm5);
9208 if (res == IRTemp_INVALID)
9209 return False;
9210 if (bitQ == 0 && laneSzLg2 == X11)
9211 return False; /* .1d case */
9212 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9213 const HChar* arT = nameArr_Q_SZ(bitQ, laneSzLg2);
9214 DIP("dup %s.%s, %s.%c[%u]\n",
9215 nameQReg128(dd), arT, nameQReg128(nn), laneCh, laneNo);
9216 return True;
9219 /* -------- x,0,0001: DUP (general, vector) -------- */
9220 /* 31 28 20 15 9 4
9221 0q0 01110000 imm5 0 0001 1 n d DUP Vd.T, Rn
9222 Q=0 writes 64, Q=1 writes 128
9223 imm5: xxxx1 8B(q=0) or 16b(q=1), R=W
9224 xxx10 4H(q=0) or 8H(q=1), R=W
9225 xx100 2S(q=0) or 4S(q=1), R=W
9226 x1000 Invalid(q=0) or 2D(q=1), R=X
9227 x0000 Invalid(q=0) or Invalid(q=1)
9228 Require op=0, imm4=0001
9230 if (bitOP == 0 && imm4 == BITS4(0,0,0,1)) {
9231 Bool isQ = bitQ == 1;
9232 IRTemp w0 = newTemp(Ity_I64);
9233 const HChar* arT = "??";
9234 IRType laneTy = Ity_INVALID;
9235 if (imm5 & 1) {
9236 arT = isQ ? "16b" : "8b";
9237 laneTy = Ity_I8;
9238 assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
9240 else if (imm5 & 2) {
9241 arT = isQ ? "8h" : "4h";
9242 laneTy = Ity_I16;
9243 assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
9245 else if (imm5 & 4) {
9246 arT = isQ ? "4s" : "2s";
9247 laneTy = Ity_I32;
9248 assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
9250 else if ((imm5 & 8) && isQ) {
9251 arT = "2d";
9252 laneTy = Ity_I64;
9253 assign(w0, getIReg64orZR(nn));
9255 else {
9256 /* invalid; leave laneTy unchanged. */
9258 /* */
9259 if (laneTy != Ity_INVALID) {
9260 IRTemp w1 = math_DUP_TO_64(w0, laneTy);
9261 putQReg128(dd, binop(Iop_64HLtoV128,
9262 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
9263 DIP("dup %s.%s, %s\n",
9264 nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
9265 return True;
9267 /* invalid */
9268 return False;
9271 /* -------- 1,0,0011: INS (general) -------- */
9272 /* 31 28 20 15 9 4
9273 010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn
9274 where Ts,ix = case imm5 of xxxx1 -> B, xxxx
9275 xxx10 -> H, xxx
9276 xx100 -> S, xx
9277 x1000 -> D, x
9279 if (bitQ == 1 && bitOP == 0 && imm4 == BITS4(0,0,1,1)) {
9280 HChar ts = '?';
9281 UInt laneNo = 16;
9282 IRExpr* src = NULL;
9283 if (imm5 & 1) {
9284 src = unop(Iop_64to8, getIReg64orZR(nn));
9285 laneNo = (imm5 >> 1) & 15;
9286 ts = 'b';
9288 else if (imm5 & 2) {
9289 src = unop(Iop_64to16, getIReg64orZR(nn));
9290 laneNo = (imm5 >> 2) & 7;
9291 ts = 'h';
9293 else if (imm5 & 4) {
9294 src = unop(Iop_64to32, getIReg64orZR(nn));
9295 laneNo = (imm5 >> 3) & 3;
9296 ts = 's';
9298 else if (imm5 & 8) {
9299 src = getIReg64orZR(nn);
9300 laneNo = (imm5 >> 4) & 1;
9301 ts = 'd';
9303 /* */
9304 if (src) {
9305 vassert(laneNo < 16);
9306 putQRegLane(dd, laneNo, src);
9307 DIP("ins %s.%c[%u], %s\n",
9308 nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn));
9309 return True;
9311 /* invalid */
9312 return False;
9315 /* -------- x,0,0101: SMOV -------- */
9316 /* -------- x,0,0111: UMOV -------- */
9317 /* 31 28 20 15 9 4
9318 0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index]
9319 0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index]
9320 dest is Xd when q==1, Wd when q==0
9321 UMOV:
9322 Ts,index,ops = case q:imm5 of
9323 0:xxxx1 -> B, xxxx, 8Uto64
9324 1:xxxx1 -> invalid
9325 0:xxx10 -> H, xxx, 16Uto64
9326 1:xxx10 -> invalid
9327 0:xx100 -> S, xx, 32Uto64
9328 1:xx100 -> invalid
9329 1:x1000 -> D, x, copy64
9330 other -> invalid
9331 SMOV:
9332 Ts,index,ops = case q:imm5 of
9333 0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
9334 1:xxxx1 -> B, xxxx, 8Sto64
9335 0:xxx10 -> H, xxx, (32Uto64 . 16Sto32)
9336 1:xxx10 -> H, xxx, 16Sto64
9337 0:xx100 -> invalid
9338 1:xx100 -> S, xx, 32Sto64
9339 1:x1000 -> invalid
9340 other -> invalid
9342 if (bitOP == 0 && (imm4 == BITS4(0,1,0,1) || imm4 == BITS4(0,1,1,1))) {
9343 Bool isU = (imm4 & 2) == 2;
9344 const HChar* arTs = "??";
9345 UInt laneNo = 16; /* invalid */
9346 // Setting 'res' to non-NULL determines valid/invalid
9347 IRExpr* res = NULL;
9348 if (!bitQ && (imm5 & 1)) { // 0:xxxx1
9349 laneNo = (imm5 >> 1) & 15;
9350 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
9351 res = isU ? unop(Iop_8Uto64, lane)
9352 : unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
9353 arTs = "b";
9355 else if (bitQ && (imm5 & 1)) { // 1:xxxx1
9356 laneNo = (imm5 >> 1) & 15;
9357 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
9358 res = isU ? NULL
9359 : unop(Iop_8Sto64, lane);
9360 arTs = "b";
9362 else if (!bitQ && (imm5 & 2)) { // 0:xxx10
9363 laneNo = (imm5 >> 2) & 7;
9364 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
9365 res = isU ? unop(Iop_16Uto64, lane)
9366 : unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
9367 arTs = "h";
9369 else if (bitQ && (imm5 & 2)) { // 1:xxx10
9370 laneNo = (imm5 >> 2) & 7;
9371 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
9372 res = isU ? NULL
9373 : unop(Iop_16Sto64, lane);
9374 arTs = "h";
9376 else if (!bitQ && (imm5 & 4)) { // 0:xx100
9377 laneNo = (imm5 >> 3) & 3;
9378 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
9379 res = isU ? unop(Iop_32Uto64, lane)
9380 : NULL;
9381 arTs = "s";
9383 else if (bitQ && (imm5 & 4)) { // 1:xxx10
9384 laneNo = (imm5 >> 3) & 3;
9385 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
9386 res = isU ? NULL
9387 : unop(Iop_32Sto64, lane);
9388 arTs = "s";
9390 else if (bitQ && (imm5 & 8)) { // 1:x1000
9391 laneNo = (imm5 >> 4) & 1;
9392 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
9393 res = isU ? lane
9394 : NULL;
9395 arTs = "d";
9397 /* */
9398 if (res) {
9399 vassert(laneNo < 16);
9400 putIReg64orZR(dd, res);
9401 DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
9402 nameIRegOrZR(bitQ == 1, dd),
9403 nameQReg128(nn), arTs, laneNo);
9404 return True;
9406 /* invalid */
9407 return False;
9410 /* -------- 1,1,xxxx: INS (element) -------- */
9411 /* 31 28 20 14 9 4
9412 011 01110000 imm5 0 imm4 n d INS Vd.Ts[ix1], Vn.Ts[ix2]
9413 where Ts,ix1,ix2
9414 = case imm5 of xxxx1 -> B, xxxx, imm4[3:0]
9415 xxx10 -> H, xxx, imm4[3:1]
9416 xx100 -> S, xx, imm4[3:2]
9417 x1000 -> D, x, imm4[3:3]
9419 if (bitQ == 1 && bitOP == 1) {
9420 HChar ts = '?';
9421 IRType ity = Ity_INVALID;
9422 UInt ix1 = 16;
9423 UInt ix2 = 16;
9424 if (imm5 & 1) {
9425 ts = 'b';
9426 ity = Ity_I8;
9427 ix1 = (imm5 >> 1) & 15;
9428 ix2 = (imm4 >> 0) & 15;
9430 else if (imm5 & 2) {
9431 ts = 'h';
9432 ity = Ity_I16;
9433 ix1 = (imm5 >> 2) & 7;
9434 ix2 = (imm4 >> 1) & 7;
9436 else if (imm5 & 4) {
9437 ts = 's';
9438 ity = Ity_I32;
9439 ix1 = (imm5 >> 3) & 3;
9440 ix2 = (imm4 >> 2) & 3;
9442 else if (imm5 & 8) {
9443 ts = 'd';
9444 ity = Ity_I64;
9445 ix1 = (imm5 >> 4) & 1;
9446 ix2 = (imm4 >> 3) & 1;
9448 /* */
9449 if (ity != Ity_INVALID) {
9450 vassert(ix1 < 16);
9451 vassert(ix2 < 16);
9452 putQRegLane(dd, ix1, getQRegLane(nn, ix2, ity));
9453 DIP("ins %s.%c[%u], %s.%c[%u]\n",
9454 nameQReg128(dd), ts, ix1, nameQReg128(nn), ts, ix2);
9455 return True;
9457 /* invalid */
9458 return False;
9461 return False;
9462 # undef INSN
9466 static
9467 Bool dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
9469 /* 31 28 18 15 11 9 4
9470 0q op 01111 00000 abc cmode 01 defgh d
9471 Decode fields: q,op,cmode
9472 Bit 11 is really "o2", but it is always zero.
9474 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9475 if (INSN(31,31) != 0
9476 || INSN(28,19) != BITS10(0,1,1,1,1,0,0,0,0,0)
9477 || INSN(11,10) != BITS2(0,1)) {
9478 return False;
9480 UInt bitQ = INSN(30,30);
9481 UInt bitOP = INSN(29,29);
9482 UInt cmode = INSN(15,12);
9483 UInt abcdefgh = (INSN(18,16) << 5) | INSN(9,5);
9484 UInt dd = INSN(4,0);
9486 ULong imm64lo = 0;
9487 UInt op_cmode = (bitOP << 4) | cmode;
9488 Bool ok = False;
9489 Bool isORR = False;
9490 Bool isBIC = False;
9491 Bool isMOV = False;
9492 Bool isMVN = False;
9493 Bool isFMOV = False;
9494 switch (op_cmode) {
9495 /* -------- x,0,0000 MOVI 32-bit shifted imm -------- */
9496 /* -------- x,0,0010 MOVI 32-bit shifted imm -------- */
9497 /* -------- x,0,0100 MOVI 32-bit shifted imm -------- */
9498 /* -------- x,0,0110 MOVI 32-bit shifted imm -------- */
9499 case BITS5(0,0,0,0,0): case BITS5(0,0,0,1,0):
9500 case BITS5(0,0,1,0,0): case BITS5(0,0,1,1,0): // 0:0xx0
9501 ok = True; isMOV = True; break;
9503 /* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */
9504 /* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */
9505 /* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */
9506 /* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */
9507 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1):
9508 case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1
9509 ok = True; isORR = True; break;
9511 /* -------- x,0,1000 MOVI 16-bit shifted imm -------- */
9512 /* -------- x,0,1010 MOVI 16-bit shifted imm -------- */
9513 case BITS5(0,1,0,0,0): case BITS5(0,1,0,1,0): // 0:10x0
9514 ok = True; isMOV = True; break;
9516 /* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */
9517 /* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */
9518 case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1
9519 ok = True; isORR = True; break;
9521 /* -------- x,0,1100 MOVI 32-bit shifting ones -------- */
9522 /* -------- x,0,1101 MOVI 32-bit shifting ones -------- */
9523 case BITS5(0,1,1,0,0): case BITS5(0,1,1,0,1): // 0:110x
9524 ok = True; isMOV = True; break;
9526 /* -------- x,0,1110 MOVI 8-bit -------- */
9527 case BITS5(0,1,1,1,0):
9528 ok = True; isMOV = True; break;
9530 /* -------- x,0,1111 FMOV (vector, immediate, F32) -------- */
9531 case BITS5(0,1,1,1,1): // 0:1111
9532 ok = True; isFMOV = True; break;
9534 /* -------- x,1,0000 MVNI 32-bit shifted imm -------- */
9535 /* -------- x,1,0010 MVNI 32-bit shifted imm -------- */
9536 /* -------- x,1,0100 MVNI 32-bit shifted imm -------- */
9537 /* -------- x,1,0110 MVNI 32-bit shifted imm -------- */
9538 case BITS5(1,0,0,0,0): case BITS5(1,0,0,1,0):
9539 case BITS5(1,0,1,0,0): case BITS5(1,0,1,1,0): // 1:0xx0
9540 ok = True; isMVN = True; break;
9542 /* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */
9543 /* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */
9544 /* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */
9545 /* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */
9546 case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1):
9547 case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1
9548 ok = True; isBIC = True; break;
9550 /* -------- x,1,1000 MVNI 16-bit shifted imm -------- */
9551 /* -------- x,1,1010 MVNI 16-bit shifted imm -------- */
9552 case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
9553 ok = True; isMVN = True; break;
9555 /* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */
9556 /* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */
9557 case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1
9558 ok = True; isBIC = True; break;
9560 /* -------- x,1,1100 MVNI 32-bit shifting ones -------- */
9561 /* -------- x,1,1101 MVNI 32-bit shifting ones -------- */
9562 case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
9563 ok = True; isMVN = True; break;
9565 /* -------- 0,1,1110 MOVI 64-bit scalar -------- */
9566 /* -------- 1,1,1110 MOVI 64-bit vector -------- */
9567 case BITS5(1,1,1,1,0):
9568 ok = True; isMOV = True; break;
9570 /* -------- 1,1,1111 FMOV (vector, immediate, F64) -------- */
9571 case BITS5(1,1,1,1,1): // 1:1111
9572 ok = bitQ == 1; isFMOV = True; break;
9574 default:
9575 break;
9577 if (ok) {
9578 vassert(1 == (isMOV ? 1 : 0) + (isMVN ? 1 : 0)
9579 + (isORR ? 1 : 0) + (isBIC ? 1 : 0) + (isFMOV ? 1 : 0));
9580 ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, abcdefgh);
9582 if (ok) {
9583 if (isORR || isBIC) {
9584 ULong inv
9585 = isORR ? 0ULL : ~0ULL;
9586 IRExpr* immV128
9587 = binop(Iop_64HLtoV128, mkU64(inv ^ imm64lo), mkU64(inv ^ imm64lo));
9588 IRExpr* res
9589 = binop(isORR ? Iop_OrV128 : Iop_AndV128, getQReg128(dd), immV128);
9590 const HChar* nm = isORR ? "orr" : "bic";
9591 if (bitQ == 0) {
9592 putQReg128(dd, unop(Iop_ZeroHI64ofV128, res));
9593 DIP("%s %s.1d, %016llx\n", nm, nameQReg128(dd), imm64lo);
9594 } else {
9595 putQReg128(dd, res);
9596 DIP("%s %s.2d, #0x%016llx'%016llx\n", nm,
9597 nameQReg128(dd), imm64lo, imm64lo);
9600 else if (isMOV || isMVN || isFMOV) {
9601 if (isMVN) imm64lo = ~imm64lo;
9602 ULong imm64hi = bitQ == 0 ? 0 : imm64lo;
9603 IRExpr* immV128 = binop(Iop_64HLtoV128, mkU64(imm64hi),
9604 mkU64(imm64lo));
9605 putQReg128(dd, immV128);
9606 DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
9608 return True;
9610 /* else fall through */
9612 return False;
9613 # undef INSN
9617 static
9618 Bool dis_AdvSIMD_scalar_copy(/*MB_OUT*/DisResult* dres, UInt insn)
9620 /* 31 28 20 15 14 10 9 4
9621 01 op 11110000 imm5 0 imm4 1 n d
9622 Decode fields: op,imm4
9624 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9625 if (INSN(31,30) != BITS2(0,1)
9626 || INSN(28,21) != BITS8(1,1,1,1,0,0,0,0)
9627 || INSN(15,15) != 0 || INSN(10,10) != 1) {
9628 return False;
9630 UInt bitOP = INSN(29,29);
9631 UInt imm5 = INSN(20,16);
9632 UInt imm4 = INSN(14,11);
9633 UInt nn = INSN(9,5);
9634 UInt dd = INSN(4,0);
9636 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
9637 /* -------- 0,0000 DUP (element, scalar) -------- */
9638 IRTemp w0 = newTemp(Ity_I64);
9639 const HChar* arTs = "??";
9640 IRType laneTy = Ity_INVALID;
9641 UInt laneNo = 16; /* invalid */
9642 if (imm5 & 1) {
9643 arTs = "b";
9644 laneNo = (imm5 >> 1) & 15;
9645 laneTy = Ity_I8;
9646 assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
9648 else if (imm5 & 2) {
9649 arTs = "h";
9650 laneNo = (imm5 >> 2) & 7;
9651 laneTy = Ity_I16;
9652 assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
9654 else if (imm5 & 4) {
9655 arTs = "s";
9656 laneNo = (imm5 >> 3) & 3;
9657 laneTy = Ity_I32;
9658 assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
9660 else if (imm5 & 8) {
9661 arTs = "d";
9662 laneNo = (imm5 >> 4) & 1;
9663 laneTy = Ity_I64;
9664 assign(w0, getQRegLane(nn, laneNo, laneTy));
9666 else {
9667 /* invalid; leave laneTy unchanged. */
9669 /* */
9670 if (laneTy != Ity_INVALID) {
9671 vassert(laneNo < 16);
9672 putQReg128(dd, binop(Iop_64HLtoV128, mkU64(0), mkexpr(w0)));
9673 DIP("dup %s, %s.%s[%u]\n",
9674 nameQRegLO(dd, laneTy), nameQReg128(nn), arTs, laneNo);
9675 return True;
9677 /* else fall through */
9680 return False;
9681 # undef INSN
9685 static
9686 Bool dis_AdvSIMD_scalar_pairwise(/*MB_OUT*/DisResult* dres, UInt insn)
9688 /* 31 28 23 21 16 11 9 4
9689 01 u 11110 sz 11000 opcode 10 n d
9690 Decode fields: u,sz,opcode
9692 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9693 if (INSN(31,30) != BITS2(0,1)
9694 || INSN(28,24) != BITS5(1,1,1,1,0)
9695 || INSN(21,17) != BITS5(1,1,0,0,0)
9696 || INSN(11,10) != BITS2(1,0)) {
9697 return False;
9699 UInt bitU = INSN(29,29);
9700 UInt sz = INSN(23,22);
9701 UInt opcode = INSN(16,12);
9702 UInt nn = INSN(9,5);
9703 UInt dd = INSN(4,0);
9705 if (bitU == 0 && sz == X11 && opcode == BITS5(1,1,0,1,1)) {
9706 /* -------- 0,11,11011 ADDP d_2d -------- */
9707 IRTemp xy = newTempV128();
9708 IRTemp xx = newTempV128();
9709 assign(xy, getQReg128(nn));
9710 assign(xx, binop(Iop_InterleaveHI64x2, mkexpr(xy), mkexpr(xy)));
9711 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9712 binop(Iop_Add64x2, mkexpr(xy), mkexpr(xx))));
9713 DIP("addp d%u, %s.2d\n", dd, nameQReg128(nn));
9714 return True;
9717 if (bitU == 1 && sz <= X01 && opcode == BITS5(0,1,1,0,1)) {
9718 /* -------- 1,00,01101 ADDP s_2s -------- */
9719 /* -------- 1,01,01101 ADDP d_2d -------- */
9720 Bool isD = sz == X01;
9721 IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
9722 IROp opADD = mkVecADDF(isD ? 3 : 2);
9723 IRTemp src = newTempV128();
9724 IRTemp argL = newTempV128();
9725 IRTemp argR = newTempV128();
9726 assign(src, getQReg128(nn));
9727 assign(argL, unop(opZHI, mkexpr(src)));
9728 assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
9729 mkU8(isD ? 8 : 4))));
9730 putQReg128(dd, unop(opZHI,
9731 triop(opADD, mkexpr(mk_get_IR_rounding_mode()),
9732 mkexpr(argL), mkexpr(argR))));
9733 DIP(isD ? "faddp d%u, v%u.2d\n" : "faddp s%u, v%u.2s\n", dd, nn);
9734 return True;
9737 if (bitU == 1
9738 && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
9739 /* -------- 1,0x,01100 FMAXNMP d_2d, s_2s -------- */
9740 /* -------- 1,1x,01100 FMINNMP d_2d, s_2s -------- */
9741 /* -------- 1,0x,01111 FMAXP d_2d, s_2s -------- */
9742 /* -------- 1,1x,01111 FMINP d_2d, s_2s -------- */
9743 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
9744 Bool isD = (sz & 1) == 1;
9745 Bool isMIN = (sz & 2) == 2;
9746 Bool isNM = opcode == BITS5(0,1,1,0,0);
9747 IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
9748 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
9749 IRTemp src = newTempV128();
9750 IRTemp argL = newTempV128();
9751 IRTemp argR = newTempV128();
9752 assign(src, getQReg128(nn));
9753 assign(argL, unop(opZHI, mkexpr(src)));
9754 assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
9755 mkU8(isD ? 8 : 4))));
9756 putQReg128(dd, unop(opZHI,
9757 binop(opMXX, mkexpr(argL), mkexpr(argR))));
9758 HChar c = isD ? 'd' : 's';
9759 DIP("%s%sp %c%u, v%u.2%c\n",
9760 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", c, dd, nn, c);
9761 return True;
9764 return False;
9765 # undef INSN
9769 static
9770 Bool dis_AdvSIMD_scalar_shift_by_imm(/*MB_OUT*/DisResult* dres, UInt insn)
9772 /* 31 28 22 18 15 10 9 4
9773 01 u 111110 immh immb opcode 1 n d
9774 Decode fields: u,immh,opcode
9776 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9777 if (INSN(31,30) != BITS2(0,1)
9778 || INSN(28,23) != BITS6(1,1,1,1,1,0) || INSN(10,10) != 1) {
9779 return False;
9781 UInt bitU = INSN(29,29);
9782 UInt immh = INSN(22,19);
9783 UInt immb = INSN(18,16);
9784 UInt opcode = INSN(15,11);
9785 UInt nn = INSN(9,5);
9786 UInt dd = INSN(4,0);
9787 UInt immhb = (immh << 3) | immb;
9789 if ((immh & 8) == 8
9790 && (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0))) {
9791 /* -------- 0,1xxx,00000 SSHR d_d_#imm -------- */
9792 /* -------- 1,1xxx,00000 USHR d_d_#imm -------- */
9793 /* -------- 0,1xxx,00010 SSRA d_d_#imm -------- */
9794 /* -------- 1,1xxx,00010 USRA d_d_#imm -------- */
9795 Bool isU = bitU == 1;
9796 Bool isAcc = opcode == BITS5(0,0,0,1,0);
9797 UInt sh = 128 - immhb;
9798 vassert(sh >= 1 && sh <= 64);
9799 IROp op = isU ? Iop_ShrN64x2 : Iop_SarN64x2;
9800 IRExpr* src = getQReg128(nn);
9801 IRTemp shf = newTempV128();
9802 IRTemp res = newTempV128();
9803 if (sh == 64 && isU) {
9804 assign(shf, mkV128(0x0000));
9805 } else {
9806 UInt nudge = 0;
9807 if (sh == 64) {
9808 vassert(!isU);
9809 nudge = 1;
9811 assign(shf, binop(op, src, mkU8(sh - nudge)));
9813 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
9814 : mkexpr(shf));
9815 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9816 const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
9817 : (isU ? "ushr" : "sshr");
9818 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
9819 return True;
9822 if ((immh & 8) == 8
9823 && (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0))) {
9824 /* -------- 0,1xxx,00100 SRSHR d_d_#imm -------- */
9825 /* -------- 1,1xxx,00100 URSHR d_d_#imm -------- */
9826 /* -------- 0,1xxx,00110 SRSRA d_d_#imm -------- */
9827 /* -------- 1,1xxx,00110 URSRA d_d_#imm -------- */
9828 Bool isU = bitU == 1;
9829 Bool isAcc = opcode == BITS5(0,0,1,1,0);
9830 UInt sh = 128 - immhb;
9831 vassert(sh >= 1 && sh <= 64);
9832 IROp op = isU ? Iop_Rsh64Ux2 : Iop_Rsh64Sx2;
9833 vassert(sh >= 1 && sh <= 64);
9834 IRExpr* src = getQReg128(nn);
9835 IRTemp imm8 = newTemp(Ity_I8);
9836 assign(imm8, mkU8((UChar)(-sh)));
9837 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
9838 IRTemp shf = newTempV128();
9839 IRTemp res = newTempV128();
9840 assign(shf, binop(op, src, amt));
9841 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
9842 : mkexpr(shf));
9843 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9844 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
9845 : (isU ? "urshr" : "srshr");
9846 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
9847 return True;
9850 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,0,0)) {
9851 /* -------- 1,1xxx,01000 SRI d_d_#imm -------- */
9852 UInt sh = 128 - immhb;
9853 vassert(sh >= 1 && sh <= 64);
9854 if (sh == 64) {
9855 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
9856 } else {
9857 /* sh is in range 1 .. 63 */
9858 ULong nmask = (ULong)(((Long)0x8000000000000000ULL) >> (sh-1));
9859 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
9860 IRTemp res = newTempV128();
9861 assign(res, binop(Iop_OrV128,
9862 binop(Iop_AndV128, getQReg128(dd), nmaskV),
9863 binop(Iop_ShrN64x2, getQReg128(nn), mkU8(sh))));
9864 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9866 DIP("sri d%u, d%u, #%u\n", dd, nn, sh);
9867 return True;
9870 if (bitU == 0 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
9871 /* -------- 0,1xxx,01010 SHL d_d_#imm -------- */
9872 UInt sh = immhb - 64;
9873 vassert(sh >= 0 && sh < 64);
9874 putQReg128(dd,
9875 unop(Iop_ZeroHI64ofV128,
9876 sh == 0 ? getQReg128(nn)
9877 : binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
9878 DIP("shl d%u, d%u, #%u\n", dd, nn, sh);
9879 return True;
9882 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
9883 /* -------- 1,1xxx,01010 SLI d_d_#imm -------- */
9884 UInt sh = immhb - 64;
9885 vassert(sh >= 0 && sh < 64);
9886 if (sh == 0) {
9887 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(nn)));
9888 } else {
9889 /* sh is in range 1 .. 63 */
9890 ULong nmask = (1ULL << sh) - 1;
9891 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
9892 IRTemp res = newTempV128();
9893 assign(res, binop(Iop_OrV128,
9894 binop(Iop_AndV128, getQReg128(dd), nmaskV),
9895 binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
9896 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9898 DIP("sli d%u, d%u, #%u\n", dd, nn, sh);
9899 return True;
9902 if (opcode == BITS5(0,1,1,1,0)
9903 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
9904 /* -------- 0,01110 SQSHL #imm -------- */
9905 /* -------- 1,01110 UQSHL #imm -------- */
9906 /* -------- 1,01100 SQSHLU #imm -------- */
9907 UInt size = 0;
9908 UInt shift = 0;
9909 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
9910 if (!ok) return False;
9911 vassert(size >= 0 && size <= 3);
9912 /* The shift encoding has opposite sign for the leftwards case.
9913 Adjust shift to compensate. */
9914 UInt lanebits = 8 << size;
9915 shift = lanebits - shift;
9916 vassert(shift >= 0 && shift < lanebits);
9917 const HChar* nm = NULL;
9918 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
9919 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
9920 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
9921 else vassert(0);
9922 IRTemp qDiff1 = IRTemp_INVALID;
9923 IRTemp qDiff2 = IRTemp_INVALID;
9924 IRTemp res = IRTemp_INVALID;
9925 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn));
9926 /* This relies on the fact that the zeroed out lanes generate zeroed
9927 result lanes and don't saturate, so there's no point in trimming
9928 the resulting res, qDiff1 or qDiff2 values. */
9929 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
9930 putQReg128(dd, mkexpr(res));
9931 updateQCFLAGwithDifference(qDiff1, qDiff2);
9932 const HChar arr = "bhsd"[size];
9933 DIP("%s %c%u, %c%u, #%u\n", nm, arr, dd, arr, nn, shift);
9934 return True;
9937 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
9938 || (bitU == 1
9939 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
9940 /* -------- 0,10010 SQSHRN #imm -------- */
9941 /* -------- 1,10010 UQSHRN #imm -------- */
9942 /* -------- 0,10011 SQRSHRN #imm -------- */
9943 /* -------- 1,10011 UQRSHRN #imm -------- */
9944 /* -------- 1,10000 SQSHRUN #imm -------- */
9945 /* -------- 1,10001 SQRSHRUN #imm -------- */
9946 UInt size = 0;
9947 UInt shift = 0;
9948 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
9949 if (!ok || size == X11) return False;
9950 vassert(size >= X00 && size <= X10);
9951 vassert(shift >= 1 && shift <= (8 << size));
9952 const HChar* nm = "??";
9953 IROp op = Iop_INVALID;
9954 /* Decide on the name and the operation. */
9955 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
9956 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
9958 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
9959 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
9961 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
9962 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
9964 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
9965 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
9967 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
9968 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
9970 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
9971 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
9973 else vassert(0);
9974 /* Compute the result (Q, shifted value) pair. */
9975 IRTemp src128 = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size+1, getQReg128(nn));
9976 IRTemp pair = newTempV128();
9977 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
9978 /* Update the result reg */
9979 IRTemp res64in128 = newTempV128();
9980 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
9981 putQReg128(dd, mkexpr(res64in128));
9982 /* Update the Q flag. */
9983 IRTemp q64q64 = newTempV128();
9984 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
9985 IRTemp z128 = newTempV128();
9986 assign(z128, mkV128(0x0000));
9987 updateQCFLAGwithDifference(q64q64, z128);
9988 /* */
9989 const HChar arrNarrow = "bhsd"[size];
9990 const HChar arrWide = "bhsd"[size+1];
9991 DIP("%s %c%u, %c%u, #%u\n", nm, arrNarrow, dd, arrWide, nn, shift);
9992 return True;
9995 if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,0,0)) {
9996 /* -------- 0,!=00xx,11100 SCVTF d_d_imm, s_s_imm -------- */
9997 /* -------- 1,!=00xx,11100 UCVTF d_d_imm, s_s_imm -------- */
9998 UInt size = 0;
9999 UInt fbits = 0;
10000 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
10001 /* The following holds because immh is never zero. */
10002 vassert(ok);
10003 /* The following holds because immh >= 0100. */
10004 vassert(size == X10 || size == X11);
10005 Bool isD = size == X11;
10006 Bool isU = bitU == 1;
10007 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
10008 Double scale = two_to_the_minus(fbits);
10009 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
10010 : IRExpr_Const(IRConst_F32( (Float)scale ));
10011 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
10012 IROp opCVT = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
10013 : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
10014 IRType tyF = isD ? Ity_F64 : Ity_F32;
10015 IRType tyI = isD ? Ity_I64 : Ity_I32;
10016 IRTemp src = newTemp(tyI);
10017 IRTemp res = newTemp(tyF);
10018 IRTemp rm = mk_get_IR_rounding_mode();
10019 assign(src, getQRegLane(nn, 0, tyI));
10020 assign(res, triop(opMUL, mkexpr(rm),
10021 binop(opCVT, mkexpr(rm), mkexpr(src)), scaleE));
10022 putQRegLane(dd, 0, mkexpr(res));
10023 if (!isD) {
10024 putQRegLane(dd, 1, mkU32(0));
10026 putQRegLane(dd, 1, mkU64(0));
10027 const HChar ch = isD ? 'd' : 's';
10028 DIP("%s %c%u, %c%u, #%u\n", isU ? "ucvtf" : "scvtf",
10029 ch, dd, ch, nn, fbits);
10030 return True;
10033 if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,1,1)) {
10034 /* -------- 0,!=00xx,11111 FCVTZS d_d_imm, s_s_imm -------- */
10035 /* -------- 1,!=00xx,11111 FCVTZU d_d_imm, s_s_imm -------- */
10036 UInt size = 0;
10037 UInt fbits = 0;
10038 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
10039 /* The following holds because immh is never zero. */
10040 vassert(ok);
10041 /* The following holds because immh >= 0100. */
10042 vassert(size == X10 || size == X11);
10043 Bool isD = size == X11;
10044 Bool isU = bitU == 1;
10045 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
10046 Double scale = two_to_the_plus(fbits);
10047 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
10048 : IRExpr_Const(IRConst_F32( (Float)scale ));
10049 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
10050 IROp opCVT = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
10051 : (isD ? Iop_F64toI64S : Iop_F32toI32S);
10052 IRType tyF = isD ? Ity_F64 : Ity_F32;
10053 IRType tyI = isD ? Ity_I64 : Ity_I32;
10054 IRTemp src = newTemp(tyF);
10055 IRTemp res = newTemp(tyI);
10056 IRTemp rm = newTemp(Ity_I32);
10057 assign(src, getQRegLane(nn, 0, tyF));
10058 assign(rm, mkU32(Irrm_ZERO));
10059 assign(res, binop(opCVT, mkexpr(rm),
10060 triop(opMUL, mkexpr(rm), mkexpr(src), scaleE)));
10061 putQRegLane(dd, 0, mkexpr(res));
10062 if (!isD) {
10063 putQRegLane(dd, 1, mkU32(0));
10065 putQRegLane(dd, 1, mkU64(0));
10066 const HChar ch = isD ? 'd' : 's';
10067 DIP("%s %c%u, %c%u, #%u\n", isU ? "fcvtzu" : "fcvtzs",
10068 ch, dd, ch, nn, fbits);
10069 return True;
10072 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10073 return False;
10074 # undef INSN
10078 static
10079 Bool dis_AdvSIMD_scalar_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
10081 /* 31 29 28 23 21 20 15 11 9 4
10082 01 U 11110 size 1 m opcode 00 n d
10083 Decode fields: u,opcode
10085 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10086 if (INSN(31,30) != BITS2(0,1)
10087 || INSN(28,24) != BITS5(1,1,1,1,0)
10088 || INSN(21,21) != 1
10089 || INSN(11,10) != BITS2(0,0)) {
10090 return False;
10092 UInt bitU = INSN(29,29);
10093 UInt size = INSN(23,22);
10094 UInt mm = INSN(20,16);
10095 UInt opcode = INSN(15,12);
10096 UInt nn = INSN(9,5);
10097 UInt dd = INSN(4,0);
10098 vassert(size < 4);
10100 if (bitU == 0
10101 && (opcode == BITS4(1,1,0,1)
10102 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
10103 /* -------- 0,1101 SQDMULL -------- */ // 0 (ks)
10104 /* -------- 0,1001 SQDMLAL -------- */ // 1
10105 /* -------- 0,1011 SQDMLSL -------- */ // 2
10106 /* Widens, and size refers to the narrowed lanes. */
10107 UInt ks = 3;
10108 switch (opcode) {
10109 case BITS4(1,1,0,1): ks = 0; break;
10110 case BITS4(1,0,0,1): ks = 1; break;
10111 case BITS4(1,0,1,1): ks = 2; break;
10112 default: vassert(0);
10114 vassert(ks >= 0 && ks <= 2);
10115 if (size == X00 || size == X11) return False;
10116 vassert(size <= 2);
10117 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
10118 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
10119 newTempsV128_3(&vecN, &vecM, &vecD);
10120 assign(vecN, getQReg128(nn));
10121 assign(vecM, getQReg128(mm));
10122 assign(vecD, getQReg128(dd));
10123 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
10124 False/*!is2*/, size, "mas"[ks],
10125 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
10126 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
10127 putQReg128(dd, unop(opZHI, mkexpr(res)));
10128 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
10129 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10130 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
10131 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
10133 const HChar* nm = ks == 0 ? "sqdmull"
10134 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
10135 const HChar arrNarrow = "bhsd"[size];
10136 const HChar arrWide = "bhsd"[size+1];
10137 DIP("%s %c%u, %c%u, %c%u\n",
10138 nm, arrWide, dd, arrNarrow, nn, arrNarrow, mm);
10139 return True;
10142 return False;
10143 # undef INSN
10147 static
10148 Bool dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
10150 /* 31 29 28 23 21 20 15 10 9 4
10151 01 U 11110 size 1 m opcode 1 n d
10152 Decode fields: u,size,opcode
10154 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10155 if (INSN(31,30) != BITS2(0,1)
10156 || INSN(28,24) != BITS5(1,1,1,1,0)
10157 || INSN(21,21) != 1
10158 || INSN(10,10) != 1) {
10159 return False;
10161 UInt bitU = INSN(29,29);
10162 UInt size = INSN(23,22);
10163 UInt mm = INSN(20,16);
10164 UInt opcode = INSN(15,11);
10165 UInt nn = INSN(9,5);
10166 UInt dd = INSN(4,0);
10167 vassert(size < 4);
10169 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
10170 /* -------- 0,xx,00001 SQADD std4_std4_std4 -------- */
10171 /* -------- 1,xx,00001 UQADD std4_std4_std4 -------- */
10172 /* -------- 0,xx,00101 SQSUB std4_std4_std4 -------- */
10173 /* -------- 1,xx,00101 UQSUB std4_std4_std4 -------- */
10174 Bool isADD = opcode == BITS5(0,0,0,0,1);
10175 Bool isU = bitU == 1;
10176 IROp qop = Iop_INVALID;
10177 IROp nop = Iop_INVALID;
10178 if (isADD) {
10179 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
10180 nop = mkVecADD(size);
10181 } else {
10182 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
10183 nop = mkVecSUB(size);
10185 IRTemp argL = newTempV128();
10186 IRTemp argR = newTempV128();
10187 IRTemp qres = newTempV128();
10188 IRTemp nres = newTempV128();
10189 assign(argL, getQReg128(nn));
10190 assign(argR, getQReg128(mm));
10191 assign(qres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10192 size, binop(qop, mkexpr(argL), mkexpr(argR)))));
10193 assign(nres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10194 size, binop(nop, mkexpr(argL), mkexpr(argR)))));
10195 putQReg128(dd, mkexpr(qres));
10196 updateQCFLAGwithDifference(qres, nres);
10197 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
10198 : (isU ? "uqsub" : "sqsub");
10199 const HChar arr = "bhsd"[size];
10200 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
10201 return True;
10204 if (size == X11 && opcode == BITS5(0,0,1,1,0)) {
10205 /* -------- 0,11,00110 CMGT d_d_d -------- */ // >s
10206 /* -------- 1,11,00110 CMHI d_d_d -------- */ // >u
10207 Bool isGT = bitU == 0;
10208 IRExpr* argL = getQReg128(nn);
10209 IRExpr* argR = getQReg128(mm);
10210 IRTemp res = newTempV128();
10211 assign(res,
10212 isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
10213 : binop(Iop_CmpGT64Ux2, argL, argR));
10214 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10215 DIP("%s %s, %s, %s\n",isGT ? "cmgt" : "cmhi",
10216 nameQRegLO(dd, Ity_I64),
10217 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
10218 return True;
10221 if (size == X11 && opcode == BITS5(0,0,1,1,1)) {
10222 /* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s
10223 /* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u
10224 Bool isGE = bitU == 0;
10225 IRExpr* argL = getQReg128(nn);
10226 IRExpr* argR = getQReg128(mm);
10227 IRTemp res = newTempV128();
10228 assign(res,
10229 isGE ? unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL))
10230 : unop(Iop_NotV128, binop(Iop_CmpGT64Ux2, argR, argL)));
10231 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10232 DIP("%s %s, %s, %s\n", isGE ? "cmge" : "cmhs",
10233 nameQRegLO(dd, Ity_I64),
10234 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
10235 return True;
10238 if (size == X11 && (opcode == BITS5(0,1,0,0,0)
10239 || opcode == BITS5(0,1,0,1,0))) {
10240 /* -------- 0,xx,01000 SSHL d_d_d -------- */
10241 /* -------- 0,xx,01010 SRSHL d_d_d -------- */
10242 /* -------- 1,xx,01000 USHL d_d_d -------- */
10243 /* -------- 1,xx,01010 URSHL d_d_d -------- */
10244 Bool isU = bitU == 1;
10245 Bool isR = opcode == BITS5(0,1,0,1,0);
10246 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
10247 : (isU ? mkVecSHU(size) : mkVecSHS(size));
10248 IRTemp res = newTempV128();
10249 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
10250 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10251 const HChar* nm = isR ? (isU ? "urshl" : "srshl")
10252 : (isU ? "ushl" : "sshl");
10253 DIP("%s %s, %s, %s\n", nm,
10254 nameQRegLO(dd, Ity_I64),
10255 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
10256 return True;
10259 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
10260 /* -------- 0,xx,01001 SQSHL std4_std4_std4 -------- */
10261 /* -------- 0,xx,01011 SQRSHL std4_std4_std4 -------- */
10262 /* -------- 1,xx,01001 UQSHL std4_std4_std4 -------- */
10263 /* -------- 1,xx,01011 UQRSHL std4_std4_std4 -------- */
10264 Bool isU = bitU == 1;
10265 Bool isR = opcode == BITS5(0,1,0,1,1);
10266 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
10267 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
10268 /* This is a bit tricky. Since we're only interested in the lowest
10269 lane of the result, we zero out all the rest in the operands, so
10270 as to ensure that other lanes don't pollute the returned Q value.
10271 This works because it means, for the lanes we don't care about, we
10272 are shifting zero by zero, which can never saturate. */
10273 IRTemp res256 = newTemp(Ity_V256);
10274 IRTemp resSH = newTempV128();
10275 IRTemp resQ = newTempV128();
10276 IRTemp zero = newTempV128();
10277 assign(
10278 res256,
10279 binop(op,
10280 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn))),
10281 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(mm)))));
10282 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
10283 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
10284 assign(zero, mkV128(0x0000));
10285 putQReg128(dd, mkexpr(resSH));
10286 updateQCFLAGwithDifference(resQ, zero);
10287 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
10288 : (isU ? "uqshl" : "sqshl");
10289 const HChar arr = "bhsd"[size];
10290 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
10291 return True;
10294 if (size == X11 && opcode == BITS5(1,0,0,0,0)) {
10295 /* -------- 0,11,10000 ADD d_d_d -------- */
10296 /* -------- 1,11,10000 SUB d_d_d -------- */
10297 Bool isSUB = bitU == 1;
10298 IRTemp res = newTemp(Ity_I64);
10299 assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
10300 getQRegLane(nn, 0, Ity_I64),
10301 getQRegLane(mm, 0, Ity_I64)));
10302 putQRegLane(dd, 0, mkexpr(res));
10303 putQRegLane(dd, 1, mkU64(0));
10304 DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
10305 nameQRegLO(dd, Ity_I64),
10306 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
10307 return True;
10310 if (size == X11 && opcode == BITS5(1,0,0,0,1)) {
10311 /* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0
10312 /* -------- 1,11,10001 CMEQ d_d_d -------- */ // ==
10313 Bool isEQ = bitU == 1;
10314 IRExpr* argL = getQReg128(nn);
10315 IRExpr* argR = getQReg128(mm);
10316 IRTemp res = newTempV128();
10317 assign(res,
10318 isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
10319 : unop(Iop_NotV128, binop(Iop_CmpEQ64x2,
10320 binop(Iop_AndV128, argL, argR),
10321 mkV128(0x0000))));
10322 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10323 DIP("%s %s, %s, %s\n", isEQ ? "cmeq" : "cmtst",
10324 nameQRegLO(dd, Ity_I64),
10325 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
10326 return True;
10329 if (opcode == BITS5(1,0,1,1,0)) {
10330 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
10331 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
10332 if (size == X00 || size == X11) return False;
10333 Bool isR = bitU == 1;
10334 IRTemp res, sat1q, sat1n, vN, vM;
10335 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
10336 newTempsV128_2(&vN, &vM);
10337 assign(vN, getQReg128(nn));
10338 assign(vM, getQReg128(mm));
10339 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
10340 putQReg128(dd,
10341 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res))));
10342 updateQCFLAGwithDifference(
10343 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1q)),
10344 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1n)));
10345 const HChar arr = "bhsd"[size];
10346 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
10347 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
10348 return True;
10351 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
10352 /* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */
10353 IRType ity = size == X11 ? Ity_F64 : Ity_F32;
10354 IRTemp res = newTemp(ity);
10355 assign(res, unop(mkABSF(ity),
10356 triop(mkSUBF(ity),
10357 mkexpr(mk_get_IR_rounding_mode()),
10358 getQRegLO(nn,ity), getQRegLO(mm,ity))));
10359 putQReg128(dd, mkV128(0x0000));
10360 putQRegLO(dd, mkexpr(res));
10361 DIP("fabd %s, %s, %s\n",
10362 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10363 return True;
10366 if (bitU == 0 && size <= X01 && opcode == BITS5(1,1,0,1,1)) {
10367 /* -------- 0,0x,11011 FMULX d_d_d, s_s_s -------- */
10368 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
10369 IRType ity = size == X01 ? Ity_F64 : Ity_F32;
10370 IRTemp res = newTemp(ity);
10371 assign(res, triop(mkMULF(ity),
10372 mkexpr(mk_get_IR_rounding_mode()),
10373 getQRegLO(nn,ity), getQRegLO(mm,ity)));
10374 putQReg128(dd, mkV128(0x0000));
10375 putQRegLO(dd, mkexpr(res));
10376 DIP("fmulx %s, %s, %s\n",
10377 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10378 return True;
10381 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
10382 /* -------- 0,0x,11100 FCMEQ d_d_d, s_s_s -------- */
10383 /* -------- 1,0x,11100 FCMGE d_d_d, s_s_s -------- */
10384 Bool isD = size == X01;
10385 IRType ity = isD ? Ity_F64 : Ity_F32;
10386 Bool isGE = bitU == 1;
10387 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
10388 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
10389 IRTemp res = newTempV128();
10390 assign(res, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
10391 : binop(opCMP, getQReg128(nn), getQReg128(mm)));
10392 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10393 mkexpr(res))));
10394 DIP("%s %s, %s, %s\n", isGE ? "fcmge" : "fcmeq",
10395 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10396 return True;
10399 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
10400 /* -------- 1,1x,11100 FCMGT d_d_d, s_s_s -------- */
10401 Bool isD = size == X11;
10402 IRType ity = isD ? Ity_F64 : Ity_F32;
10403 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
10404 IRTemp res = newTempV128();
10405 assign(res, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
10406 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10407 mkexpr(res))));
10408 DIP("%s %s, %s, %s\n", "fcmgt",
10409 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10410 return True;
10413 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
10414 /* -------- 1,0x,11101 FACGE d_d_d, s_s_s -------- */
10415 /* -------- 1,1x,11101 FACGT d_d_d, s_s_s -------- */
10416 Bool isD = (size & 1) == 1;
10417 IRType ity = isD ? Ity_F64 : Ity_F32;
10418 Bool isGT = (size & 2) == 2;
10419 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
10420 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
10421 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
10422 IRTemp res = newTempV128();
10423 assign(res, binop(opCMP, unop(opABS, getQReg128(mm)),
10424 unop(opABS, getQReg128(nn)))); // swapd
10425 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10426 mkexpr(res))));
10427 DIP("%s %s, %s, %s\n", isGT ? "facgt" : "facge",
10428 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10429 return True;
10432 if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
10433 /* -------- 0,0x,11111: FRECPS d_d_d, s_s_s -------- */
10434 /* -------- 0,1x,11111: FRSQRTS d_d_d, s_s_s -------- */
10435 Bool isSQRT = (size & 2) == 2;
10436 Bool isD = (size & 1) == 1;
10437 IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
10438 : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
10439 IRTemp res = newTempV128();
10440 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
10441 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10442 mkexpr(res))));
10443 HChar c = isD ? 'd' : 's';
10444 DIP("%s %c%u, %c%u, %c%u\n", isSQRT ? "frsqrts" : "frecps",
10445 c, dd, c, nn, c, mm);
10446 return True;
10449 return False;
10450 # undef INSN
10453 static
10454 Bool dis_AdvSIMD_scalar_three_same_extra(/*MB_OUT*/DisResult* dres, UInt insn)
10456 /* 31 29 28 23 21 20 15 10 9 4
10457 01 U 11110 size 0 m opcode 1 n d
10458 Decode fields: u,size,opcode
10460 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10461 if (INSN(31,30) != BITS2(0,1)
10462 || INSN(28,24) != BITS5(1,1,1,1,0)
10463 || INSN(21,21) != 0
10464 || INSN(10,10) != 1) {
10465 return False;
10467 UInt bitU = INSN(29,29);
10468 UInt size = INSN(23,22);
10469 UInt mm = INSN(20,16);
10470 UInt opcode = INSN(15,11);
10471 UInt nn = INSN(9,5);
10472 UInt dd = INSN(4,0);
10473 vassert(size < 4);
10474 vassert(mm < 32 && nn < 32 && dd < 32);
10476 if (bitU == 1 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) {
10477 /* -------- xx,10000 SQRDMLAH s and h variants only -------- */
10478 /* -------- xx,10001 SQRDMLSH s and h variants only -------- */
10479 if (size == X00 || size == X11) return False;
10480 Bool isAdd = opcode == BITS5(1,0,0,0,0);
10482 IRTemp res, res_nosat, vD, vN, vM;
10483 res = res_nosat = vD = vN = vM = IRTemp_INVALID;
10484 newTempsV128_3(&vD, &vN, &vM);
10485 assign(vD, getQReg128(dd));
10486 assign(vN, getQReg128(nn));
10487 assign(vM, getQReg128(mm));
10489 math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM);
10490 putQReg128(dd,
10491 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res))));
10492 updateQCFLAGwithDifference(
10493 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res)),
10494 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res_nosat)));
10496 const HChar arr = "hs"[size];
10497 const HChar* nm = isAdd ? "sqrdmlah" : "sqrdmlsh";
10498 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
10499 return True;
10502 return False;
10503 # undef INSN
10507 static
10508 Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
10510 /* 31 29 28 23 21 16 11 9 4
10511 01 U 11110 size 10000 opcode 10 n d
10512 Decode fields: u,size,opcode
10514 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10515 if (INSN(31,30) != BITS2(0,1)
10516 || INSN(28,24) != BITS5(1,1,1,1,0)
10517 || INSN(21,17) != BITS5(1,0,0,0,0)
10518 || INSN(11,10) != BITS2(1,0)) {
10519 return False;
10521 UInt bitU = INSN(29,29);
10522 UInt size = INSN(23,22);
10523 UInt opcode = INSN(16,12);
10524 UInt nn = INSN(9,5);
10525 UInt dd = INSN(4,0);
10526 vassert(size < 4);
10528 if (opcode == BITS5(0,0,0,1,1)) {
10529 /* -------- 0,xx,00011: SUQADD std4_std4 -------- */
10530 /* -------- 1,xx,00011: USQADD std4_std4 -------- */
10531 /* These are a bit tricky (to say the least). See comments on
10532 the vector variants (in dis_AdvSIMD_two_reg_misc) below for
10533 details. */
10534 Bool isUSQADD = bitU == 1;
10535 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
10536 : mkVecQADDEXTUSSATSS(size);
10537 IROp nop = mkVecADD(size);
10538 IRTemp argL = newTempV128();
10539 IRTemp argR = newTempV128();
10540 assign(argL, getQReg128(nn));
10541 assign(argR, getQReg128(dd));
10542 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10543 size, binop(qop, mkexpr(argL), mkexpr(argR)));
10544 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10545 size, binop(nop, mkexpr(argL), mkexpr(argR)));
10546 putQReg128(dd, mkexpr(qres));
10547 updateQCFLAGwithDifference(qres, nres);
10548 const HChar arr = "bhsd"[size];
10549 DIP("%s %c%u, %c%u\n", isUSQADD ? "usqadd" : "suqadd", arr, dd, arr, nn);
10550 return True;
10553 if (opcode == BITS5(0,0,1,1,1)) {
10554 /* -------- 0,xx,00111 SQABS std4_std4 -------- */
10555 /* -------- 1,xx,00111 SQNEG std4_std4 -------- */
10556 Bool isNEG = bitU == 1;
10557 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
10558 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
10559 getQReg128(nn), size );
10560 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(qresFW));
10561 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(nresFW));
10562 putQReg128(dd, mkexpr(qres));
10563 updateQCFLAGwithDifference(qres, nres);
10564 const HChar arr = "bhsd"[size];
10565 DIP("%s %c%u, %c%u\n", isNEG ? "sqneg" : "sqabs", arr, dd, arr, nn);
10566 return True;
10569 if (size == X11 && opcode == BITS5(0,1,0,0,0)) {
10570 /* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0
10571 /* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0
10572 Bool isGT = bitU == 0;
10573 IRExpr* argL = getQReg128(nn);
10574 IRExpr* argR = mkV128(0x0000);
10575 IRTemp res = newTempV128();
10576 assign(res, isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
10577 : unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL)));
10578 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10579 DIP("cm%s d%u, d%u, #0\n", isGT ? "gt" : "ge", dd, nn);
10580 return True;
10583 if (size == X11 && opcode == BITS5(0,1,0,0,1)) {
10584 /* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0
10585 /* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0
10586 Bool isEQ = bitU == 0;
10587 IRExpr* argL = getQReg128(nn);
10588 IRExpr* argR = mkV128(0x0000);
10589 IRTemp res = newTempV128();
10590 assign(res, isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
10591 : unop(Iop_NotV128,
10592 binop(Iop_CmpGT64Sx2, argL, argR)));
10593 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10594 DIP("cm%s d%u, d%u, #0\n", isEQ ? "eq" : "le", dd, nn);
10595 return True;
10598 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,0)) {
10599 /* -------- 0,11,01010: CMLT d_d_#0 -------- */ // <s 0
10600 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
10601 binop(Iop_CmpGT64Sx2, mkV128(0x0000),
10602 getQReg128(nn))));
10603 DIP("cm%s d%u, d%u, #0\n", "lt", dd, nn);
10604 return True;
10607 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
10608 /* -------- 0,11,01011 ABS d_d -------- */
10609 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
10610 unop(Iop_Abs64x2, getQReg128(nn))));
10611 DIP("abs d%u, d%u\n", dd, nn);
10612 return True;
10615 if (bitU == 1 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
10616 /* -------- 1,11,01011 NEG d_d -------- */
10617 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
10618 binop(Iop_Sub64x2, mkV128(0x0000), getQReg128(nn))));
10619 DIP("neg d%u, d%u\n", dd, nn);
10620 return True;
10623 UInt ix = 0; /*INVALID*/
10624 if (size >= X10) {
10625 switch (opcode) {
10626 case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
10627 case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
10628 case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
10629 default: break;
10632 if (ix > 0) {
10633 /* -------- 0,1x,01100 FCMGT d_d_#0.0, s_s_#0.0 (ix 1) -------- */
10634 /* -------- 0,1x,01101 FCMEQ d_d_#0.0, s_s_#0.0 (ix 2) -------- */
10635 /* -------- 0,1x,01110 FCMLT d_d_#0.0, s_s_#0.0 (ix 3) -------- */
10636 /* -------- 1,1x,01100 FCMGE d_d_#0.0, s_s_#0.0 (ix 4) -------- */
10637 /* -------- 1,1x,01101 FCMLE d_d_#0.0, s_s_#0.0 (ix 5) -------- */
10638 Bool isD = size == X11;
10639 IRType ity = isD ? Ity_F64 : Ity_F32;
10640 IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
10641 IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
10642 IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
10643 IROp opCmp = Iop_INVALID;
10644 Bool swap = False;
10645 const HChar* nm = "??";
10646 switch (ix) {
10647 case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
10648 case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
10649 case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
10650 case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
10651 case 5: nm = "fcmle"; opCmp = opCmpLE; break;
10652 default: vassert(0);
10654 IRExpr* zero = mkV128(0x0000);
10655 IRTemp res = newTempV128();
10656 assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
10657 : binop(opCmp, getQReg128(nn), zero));
10658 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10659 mkexpr(res))));
10661 DIP("%s %s, %s, #0.0\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
10662 return True;
10665 if (opcode == BITS5(1,0,1,0,0)
10666 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
10667 /* -------- 0,xx,10100: SQXTN -------- */
10668 /* -------- 1,xx,10100: UQXTN -------- */
10669 /* -------- 1,xx,10010: SQXTUN -------- */
10670 if (size == X11) return False;
10671 vassert(size < 3);
10672 IROp opN = Iop_INVALID;
10673 Bool zWiden = True;
10674 const HChar* nm = "??";
10675 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
10676 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
10678 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
10679 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
10681 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
10682 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
10684 else vassert(0);
10685 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10686 size+1, getQReg128(nn));
10687 IRTemp resN = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10688 size, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
10689 putQReg128(dd, mkexpr(resN));
10690 /* This widens zero lanes to zero, and compares it against zero, so all
10691 of the non-participating lanes make no contribution to the
10692 Q flag state. */
10693 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
10694 size, mkexpr(resN));
10695 updateQCFLAGwithDifference(src, resW);
10696 const HChar arrNarrow = "bhsd"[size];
10697 const HChar arrWide = "bhsd"[size+1];
10698 DIP("%s %c%u, %c%u\n", nm, arrNarrow, dd, arrWide, nn);
10699 return True;
10702 if (opcode == BITS5(1,0,1,1,0) && bitU == 1 && size == X01) {
10703 /* -------- 1,01,10110 FCVTXN s_d -------- */
10704 /* Using Irrm_NEAREST here isn't right. The docs say "round to
10705 odd" but I don't know what that really means. */
10706 putQRegLO(dd,
10707 binop(Iop_F64toF32, mkU32(Irrm_NEAREST),
10708 getQRegLO(nn, Ity_F64)));
10709 putQRegLane(dd, 1, mkU32(0));
10710 putQRegLane(dd, 1, mkU64(0));
10711 DIP("fcvtxn s%u, d%u\n", dd, nn);
10712 return True;
10715 ix = 0; /*INVALID*/
10716 switch (opcode) {
10717 case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
10718 case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
10719 case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
10720 default: break;
10722 if (ix > 0) {
10723 /* -------- 0,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
10724 /* -------- 0,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
10725 /* -------- 0,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
10726 /* -------- 0,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
10727 /* -------- 0,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
10728 /* -------- 1,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
10729 /* -------- 1,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
10730 /* -------- 1,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
10731 /* -------- 1,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
10732 /* -------- 1,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
10733 Bool isD = (size & 1) == 1;
10734 IRType tyF = isD ? Ity_F64 : Ity_F32;
10735 IRType tyI = isD ? Ity_I64 : Ity_I32;
10736 IRRoundingMode irrm = 8; /*impossible*/
10737 HChar ch = '?';
10738 switch (ix) {
10739 case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
10740 case 2: ch = 'm'; irrm = Irrm_NegINF; break;
10741 case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
10742 case 4: ch = 'p'; irrm = Irrm_PosINF; break;
10743 case 5: ch = 'z'; irrm = Irrm_ZERO; break;
10744 default: vassert(0);
10746 IROp cvt = Iop_INVALID;
10747 if (bitU == 1) {
10748 cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
10749 } else {
10750 cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
10752 IRTemp src = newTemp(tyF);
10753 IRTemp res = newTemp(tyI);
10754 assign(src, getQRegLane(nn, 0, tyF));
10755 assign(res, binop(cvt, mkU32(irrm), mkexpr(src)));
10756 putQRegLane(dd, 0, mkexpr(res)); /* bits 31-0 or 63-0 */
10757 if (!isD) {
10758 putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
10760 putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */
10761 HChar sOrD = isD ? 'd' : 's';
10762 DIP("fcvt%c%c %c%u, %c%u\n", ch, bitU == 1 ? 'u' : 's',
10763 sOrD, dd, sOrD, nn);
10764 return True;
10767 if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
10768 /* -------- 0,0x,11101: SCVTF d_d, s_s -------- */
10769 /* -------- 1,0x,11101: UCVTF d_d, s_s -------- */
10770 Bool isU = bitU == 1;
10771 Bool isD = (size & 1) == 1;
10772 IRType tyI = isD ? Ity_I64 : Ity_I32;
10773 IROp iop = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
10774 : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
10775 IRTemp rm = mk_get_IR_rounding_mode();
10776 putQRegLO(dd, binop(iop, mkexpr(rm), getQRegLO(nn, tyI)));
10777 if (!isD) {
10778 putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
10780 putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */
10781 HChar c = isD ? 'd' : 's';
10782 DIP("%ccvtf %c%u, %c%u\n", isU ? 'u' : 's', c, dd, c, nn);
10783 return True;
10786 if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
10787 /* -------- 0,1x,11101: FRECPE d_d, s_s -------- */
10788 /* -------- 1,1x,11101: FRSQRTE d_d, s_s -------- */
10789 Bool isSQRT = bitU == 1;
10790 Bool isD = (size & 1) == 1;
10791 IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
10792 : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
10793 IRTemp resV = newTempV128();
10794 assign(resV, unop(op, getQReg128(nn)));
10795 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10796 mkexpr(resV))));
10797 HChar c = isD ? 'd' : 's';
10798 DIP("%s %c%u, %c%u\n", isSQRT ? "frsqrte" : "frecpe", c, dd, c, nn);
10799 return True;
10802 if (bitU == 0 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
10803 /* -------- 0,1x,11111: FRECPX d_d, s_s -------- */
10804 Bool isD = (size & 1) == 1;
10805 IRType ty = isD ? Ity_F64 : Ity_F32;
10806 IROp op = isD ? Iop_RecpExpF64 : Iop_RecpExpF32;
10807 IRTemp res = newTemp(ty);
10808 IRTemp rm = mk_get_IR_rounding_mode();
10809 assign(res, binop(op, mkexpr(rm), getQRegLane(nn, 0, ty)));
10810 putQReg128(dd, mkV128(0x0000));
10811 putQRegLane(dd, 0, mkexpr(res));
10812 HChar c = isD ? 'd' : 's';
10813 DIP("%s %c%u, %c%u\n", "frecpx", c, dd, c, nn);
10814 return True;
10817 return False;
10818 # undef INSN
10822 static
10823 Bool dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult* dres, UInt insn)
10825 /* 31 28 23 21 20 19 15 11 9 4
10826 01 U 11111 size L M m opcode H 0 n d
10827 Decode fields are: u,size,opcode
10828 M is really part of the mm register number. Individual
10829 cases need to inspect L and H though.
10831 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10832 if (INSN(31,30) != BITS2(0,1)
10833 || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) != 0) {
10834 return False;
10836 UInt bitU = INSN(29,29);
10837 UInt size = INSN(23,22);
10838 UInt bitL = INSN(21,21);
10839 UInt bitM = INSN(20,20);
10840 UInt mmLO4 = INSN(19,16);
10841 UInt opcode = INSN(15,12);
10842 UInt bitH = INSN(11,11);
10843 UInt nn = INSN(9,5);
10844 UInt dd = INSN(4,0);
10845 vassert(size < 4);
10846 vassert(bitH < 2 && bitM < 2 && bitL < 2);
10848 if (bitU == 0 && size >= X10
10849 && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
10850 /* -------- 0,1x,0001 FMLA d_d_d[], s_s_s[] -------- */
10851 /* -------- 0,1x,0101 FMLS d_d_d[], s_s_s[] -------- */
10852 Bool isD = (size & 1) == 1;
10853 Bool isSUB = opcode == BITS4(0,1,0,1);
10854 UInt index;
10855 if (!isD) index = (bitH << 1) | bitL;
10856 else if (isD && bitL == 0) index = bitH;
10857 else return False; // sz:L == x11 => unallocated encoding
10858 vassert(index < (isD ? 2 : 4));
10859 IRType ity = isD ? Ity_F64 : Ity_F32;
10860 IRTemp elem = newTemp(ity);
10861 UInt mm = (bitM << 4) | mmLO4;
10862 assign(elem, getQRegLane(mm, index, ity));
10863 IRTemp dupd = math_DUP_TO_V128(elem, ity);
10864 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
10865 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
10866 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
10867 IRTemp rm = mk_get_IR_rounding_mode();
10868 IRTemp t1 = newTempV128();
10869 IRTemp t2 = newTempV128();
10870 // FIXME: double rounding; use FMA primops instead
10871 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
10872 assign(t2, triop(isSUB ? opSUB : opADD,
10873 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
10874 putQReg128(dd,
10875 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
10876 mkexpr(t2))));
10877 const HChar c = isD ? 'd' : 's';
10878 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
10879 c, dd, c, nn, nameQReg128(mm), c, index);
10880 return True;
10883 if (size >= X10 && opcode == BITS4(1,0,0,1)) {
10884 /* -------- 0,1x,1001 FMUL d_d_d[], s_s_s[] -------- */
10885 /* -------- 1,1x,1001 FMULX d_d_d[], s_s_s[] -------- */
10886 Bool isD = (size & 1) == 1;
10887 Bool isMULX = bitU == 1;
10888 UInt index;
10889 if (!isD) index = (bitH << 1) | bitL;
10890 else if (isD && bitL == 0) index = bitH;
10891 else return False; // sz:L == x11 => unallocated encoding
10892 vassert(index < (isD ? 2 : 4));
10893 IRType ity = isD ? Ity_F64 : Ity_F32;
10894 IRTemp elem = newTemp(ity);
10895 UInt mm = (bitM << 4) | mmLO4;
10896 assign(elem, getQRegLane(mm, index, ity));
10897 IRTemp dupd = math_DUP_TO_V128(elem, ity);
10898 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
10899 IRTemp rm = mk_get_IR_rounding_mode();
10900 IRTemp t1 = newTempV128();
10901 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
10902 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
10903 putQReg128(dd,
10904 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
10905 mkexpr(t1))));
10906 const HChar c = isD ? 'd' : 's';
10907 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isMULX ? "fmulx" : "fmul",
10908 c, dd, c, nn, nameQReg128(mm), c, index);
10909 return True;
10912 if (bitU == 0
10913 && (opcode == BITS4(1,0,1,1)
10914 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
10915 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
10916 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
10917 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
10918 /* Widens, and size refers to the narrowed lanes. */
10919 UInt ks = 3;
10920 switch (opcode) {
10921 case BITS4(1,0,1,1): ks = 0; break;
10922 case BITS4(0,0,1,1): ks = 1; break;
10923 case BITS4(0,1,1,1): ks = 2; break;
10924 default: vassert(0);
10926 vassert(ks >= 0 && ks <= 2);
10927 UInt mm = 32; // invalid
10928 UInt ix = 16; // invalid
10929 switch (size) {
10930 case X00:
10931 return False; // h_b_b[] case is not allowed
10932 case X01:
10933 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10934 case X10:
10935 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10936 case X11:
10937 return False; // q_d_d[] case is not allowed
10938 default:
10939 vassert(0);
10941 vassert(mm < 32 && ix < 16);
10942 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
10943 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
10944 newTempsV128_2(&vecN, &vecD);
10945 assign(vecN, getQReg128(nn));
10946 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
10947 assign(vecD, getQReg128(dd));
10948 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
10949 False/*!is2*/, size, "mas"[ks],
10950 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
10951 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
10952 putQReg128(dd, unop(opZHI, mkexpr(res)));
10953 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
10954 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10955 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
10956 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
10958 const HChar* nm = ks == 0 ? "sqmull"
10959 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
10960 const HChar arrNarrow = "bhsd"[size];
10961 const HChar arrWide = "bhsd"[size+1];
10962 DIP("%s %c%u, %c%u, v%u.%c[%u]\n",
10963 nm, arrWide, dd, arrNarrow, nn, dd, arrNarrow, ix);
10964 return True;
10967 if (bitU == 0 && (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1))) {
10968 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
10969 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
10970 UInt mm = 32; // invalid
10971 UInt ix = 16; // invalid
10972 switch (size) {
10973 case X00:
10974 return False; // b case is not allowed
10975 case X01:
10976 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10977 case X10:
10978 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10979 case X11:
10980 return False; // q case is not allowed
10981 default:
10982 vassert(0);
10984 vassert(mm < 32 && ix < 16);
10985 Bool isR = opcode == BITS4(1,1,0,1);
10986 IRTemp res, sat1q, sat1n, vN, vM;
10987 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
10988 vN = newTempV128();
10989 assign(vN, getQReg128(nn));
10990 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
10991 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
10992 IROp opZHI = mkVecZEROHIxxOFV128(size);
10993 putQReg128(dd, unop(opZHI, mkexpr(res)));
10994 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10995 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
10996 HChar ch = size == X01 ? 'h' : 's';
10997 DIP("%s %c%u, %c%u, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, (Int)dd, ix);
10998 return True;
11001 if (bitU == 1 && (opcode == BITS4(1,1,0,1) || opcode == BITS4(1,1,1,1))) {
11002 /* -------- 0,xx,1101 SQRDMLAH s and h variants only -------- */
11003 /* -------- 0,xx,1111 SQRDMLSH s and h variants only -------- */
11004 UInt mm = 32; // invalid
11005 UInt ix = 16; // invalid
11006 switch (size) {
11007 case X00:
11008 return False; // b case is not allowed
11009 case X01:
11010 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
11011 case X10:
11012 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
11013 case X11:
11014 return False; // d case is not allowed
11015 default:
11016 vassert(0);
11018 vassert(size < 4);
11019 vassert(mm < 32 && ix < 16);
11020 Bool isAdd = opcode == BITS4(1,1,0,1);
11022 IRTemp res, res_nosat, vD, vN, vM;
11023 res = res_nosat = vD = vN = vM = IRTemp_INVALID;
11024 newTempsV128_2(&vD, &vN);
11025 assign(vD, getQReg128(dd));
11026 assign(vN, getQReg128(nn));
11027 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
11029 math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM);
11030 IROp opZHI = mkVecZEROHIxxOFV128(size);
11031 putQReg128(dd, unop(opZHI, mkexpr(res)));
11032 updateQCFLAGwithDifferenceZHI(res, res_nosat, opZHI);
11034 const HChar* nm = isAdd ? "sqrdmlah" : "sqrdmlsh";
11035 HChar ch = size == X01 ? 'h' : 's';
11036 DIP("%s %c%u, %c%u, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, (Int)dd, ix);
11037 return True;
11040 return False;
11041 # undef INSN
11045 static
11046 Bool dis_AdvSIMD_shift_by_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
11048 /* 31 28 22 18 15 10 9 4
11049 0 q u 011110 immh immb opcode 1 n d
11050 Decode fields: u,opcode
11052 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11053 if (INSN(31,31) != 0
11054 || INSN(28,23) != BITS6(0,1,1,1,1,0) || INSN(10,10) != 1) {
11055 return False;
11057 UInt bitQ = INSN(30,30);
11058 UInt bitU = INSN(29,29);
11059 UInt immh = INSN(22,19);
11060 UInt immb = INSN(18,16);
11061 UInt opcode = INSN(15,11);
11062 UInt nn = INSN(9,5);
11063 UInt dd = INSN(4,0);
11065 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0)) {
11066 /* -------- 0,00000 SSHR std7_std7_#imm -------- */
11067 /* -------- 1,00000 USHR std7_std7_#imm -------- */
11068 /* -------- 0,00010 SSRA std7_std7_#imm -------- */
11069 /* -------- 1,00010 USRA std7_std7_#imm -------- */
11070 /* laneTy, shift = case immh:immb of
11071 0001:xxx -> B, SHR:8-xxx
11072 001x:xxx -> H, SHR:16-xxxx
11073 01xx:xxx -> S, SHR:32-xxxxx
11074 1xxx:xxx -> D, SHR:64-xxxxxx
11075 other -> invalid
11077 UInt size = 0;
11078 UInt shift = 0;
11079 Bool isQ = bitQ == 1;
11080 Bool isU = bitU == 1;
11081 Bool isAcc = opcode == BITS5(0,0,0,1,0);
11082 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
11083 if (!ok || (bitQ == 0 && size == X11)) return False;
11084 vassert(size >= 0 && size <= 3);
11085 UInt lanebits = 8 << size;
11086 vassert(shift >= 1 && shift <= lanebits);
11087 IROp op = isU ? mkVecSHRN(size) : mkVecSARN(size);
11088 IRExpr* src = getQReg128(nn);
11089 IRTemp shf = newTempV128();
11090 IRTemp res = newTempV128();
11091 if (shift == lanebits && isU) {
11092 assign(shf, mkV128(0x0000));
11093 } else {
11094 UInt nudge = 0;
11095 if (shift == lanebits) {
11096 vassert(!isU);
11097 nudge = 1;
11099 assign(shf, binop(op, src, mkU8(shift - nudge)));
11101 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
11102 : mkexpr(shf));
11103 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11104 HChar laneCh = "bhsd"[size];
11105 UInt nLanes = (isQ ? 128 : 64) / lanebits;
11106 const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
11107 : (isU ? "ushr" : "sshr");
11108 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
11109 nameQReg128(dd), nLanes, laneCh,
11110 nameQReg128(nn), nLanes, laneCh, shift);
11111 return True;
11114 if (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0)) {
11115 /* -------- 0,00100 SRSHR std7_std7_#imm -------- */
11116 /* -------- 1,00100 URSHR std7_std7_#imm -------- */
11117 /* -------- 0,00110 SRSRA std7_std7_#imm -------- */
11118 /* -------- 1,00110 URSRA std7_std7_#imm -------- */
11119 /* laneTy, shift = case immh:immb of
11120 0001:xxx -> B, SHR:8-xxx
11121 001x:xxx -> H, SHR:16-xxxx
11122 01xx:xxx -> S, SHR:32-xxxxx
11123 1xxx:xxx -> D, SHR:64-xxxxxx
11124 other -> invalid
11126 UInt size = 0;
11127 UInt shift = 0;
11128 Bool isQ = bitQ == 1;
11129 Bool isU = bitU == 1;
11130 Bool isAcc = opcode == BITS5(0,0,1,1,0);
11131 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
11132 if (!ok || (bitQ == 0 && size == X11)) return False;
11133 vassert(size >= 0 && size <= 3);
11134 UInt lanebits = 8 << size;
11135 vassert(shift >= 1 && shift <= lanebits);
11136 IROp op = isU ? mkVecRSHU(size) : mkVecRSHS(size);
11137 IRExpr* src = getQReg128(nn);
11138 IRTemp imm8 = newTemp(Ity_I8);
11139 assign(imm8, mkU8((UChar)(-shift)));
11140 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
11141 IRTemp shf = newTempV128();
11142 IRTemp res = newTempV128();
11143 assign(shf, binop(op, src, amt));
11144 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
11145 : mkexpr(shf));
11146 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11147 HChar laneCh = "bhsd"[size];
11148 UInt nLanes = (isQ ? 128 : 64) / lanebits;
11149 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
11150 : (isU ? "urshr" : "srshr");
11151 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
11152 nameQReg128(dd), nLanes, laneCh,
11153 nameQReg128(nn), nLanes, laneCh, shift);
11154 return True;
11157 if (bitU == 1 && opcode == BITS5(0,1,0,0,0)) {
11158 /* -------- 1,01000 SRI std7_std7_#imm -------- */
11159 /* laneTy, shift = case immh:immb of
11160 0001:xxx -> B, SHR:8-xxx
11161 001x:xxx -> H, SHR:16-xxxx
11162 01xx:xxx -> S, SHR:32-xxxxx
11163 1xxx:xxx -> D, SHR:64-xxxxxx
11164 other -> invalid
11166 UInt size = 0;
11167 UInt shift = 0;
11168 Bool isQ = bitQ == 1;
11169 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
11170 if (!ok || (bitQ == 0 && size == X11)) return False;
11171 vassert(size >= 0 && size <= 3);
11172 UInt lanebits = 8 << size;
11173 vassert(shift >= 1 && shift <= lanebits);
11174 IRExpr* src = getQReg128(nn);
11175 IRTemp res = newTempV128();
11176 if (shift == lanebits) {
11177 assign(res, getQReg128(dd));
11178 } else {
11179 assign(res, binop(mkVecSHRN(size), src, mkU8(shift)));
11180 IRExpr* nmask = binop(mkVecSHLN(size),
11181 mkV128(0xFFFF), mkU8(lanebits - shift));
11182 IRTemp tmp = newTempV128();
11183 assign(tmp, binop(Iop_OrV128,
11184 mkexpr(res),
11185 binop(Iop_AndV128, getQReg128(dd), nmask)));
11186 res = tmp;
11188 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11189 HChar laneCh = "bhsd"[size];
11190 UInt nLanes = (isQ ? 128 : 64) / lanebits;
11191 DIP("%s %s.%u%c, %s.%u%c, #%u\n", "sri",
11192 nameQReg128(dd), nLanes, laneCh,
11193 nameQReg128(nn), nLanes, laneCh, shift);
11194 return True;
11197 if (opcode == BITS5(0,1,0,1,0)) {
11198 /* -------- 0,01010 SHL std7_std7_#imm -------- */
11199 /* -------- 1,01010 SLI std7_std7_#imm -------- */
11200 /* laneTy, shift = case immh:immb of
11201 0001:xxx -> B, xxx
11202 001x:xxx -> H, xxxx
11203 01xx:xxx -> S, xxxxx
11204 1xxx:xxx -> D, xxxxxx
11205 other -> invalid
11207 UInt size = 0;
11208 UInt shift = 0;
11209 Bool isSLI = bitU == 1;
11210 Bool isQ = bitQ == 1;
11211 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
11212 if (!ok || (bitQ == 0 && size == X11)) return False;
11213 vassert(size >= 0 && size <= 3);
11214 /* The shift encoding has opposite sign for the leftwards case.
11215 Adjust shift to compensate. */
11216 UInt lanebits = 8 << size;
11217 shift = lanebits - shift;
11218 vassert(shift >= 0 && shift < lanebits);
11219 IROp op = mkVecSHLN(size);
11220 IRExpr* src = getQReg128(nn);
11221 IRTemp res = newTempV128();
11222 if (shift == 0) {
11223 assign(res, src);
11224 } else {
11225 assign(res, binop(op, src, mkU8(shift)));
11226 if (isSLI) {
11227 IRExpr* nmask = binop(mkVecSHRN(size),
11228 mkV128(0xFFFF), mkU8(lanebits - shift));
11229 IRTemp tmp = newTempV128();
11230 assign(tmp, binop(Iop_OrV128,
11231 mkexpr(res),
11232 binop(Iop_AndV128, getQReg128(dd), nmask)));
11233 res = tmp;
11236 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11237 HChar laneCh = "bhsd"[size];
11238 UInt nLanes = (isQ ? 128 : 64) / lanebits;
11239 const HChar* nm = isSLI ? "sli" : "shl";
11240 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
11241 nameQReg128(dd), nLanes, laneCh,
11242 nameQReg128(nn), nLanes, laneCh, shift);
11243 return True;
11246 if (opcode == BITS5(0,1,1,1,0)
11247 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
11248 /* -------- 0,01110 SQSHL std7_std7_#imm -------- */
11249 /* -------- 1,01110 UQSHL std7_std7_#imm -------- */
11250 /* -------- 1,01100 SQSHLU std7_std7_#imm -------- */
11251 UInt size = 0;
11252 UInt shift = 0;
11253 Bool isQ = bitQ == 1;
11254 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
11255 if (!ok || (bitQ == 0 && size == X11)) return False;
11256 vassert(size >= 0 && size <= 3);
11257 /* The shift encoding has opposite sign for the leftwards case.
11258 Adjust shift to compensate. */
11259 UInt lanebits = 8 << size;
11260 shift = lanebits - shift;
11261 vassert(shift >= 0 && shift < lanebits);
11262 const HChar* nm = NULL;
11263 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
11264 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
11265 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
11266 else vassert(0);
11267 IRTemp qDiff1 = IRTemp_INVALID;
11268 IRTemp qDiff2 = IRTemp_INVALID;
11269 IRTemp res = IRTemp_INVALID;
11270 IRTemp src = newTempV128();
11271 assign(src, getQReg128(nn));
11272 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
11273 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11274 updateQCFLAGwithDifferenceZHI(qDiff1, qDiff2,
11275 isQ ? Iop_INVALID : Iop_ZeroHI64ofV128);
11276 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11277 DIP("%s %s.%s, %s.%s, #%u\n", nm,
11278 nameQReg128(dd), arr, nameQReg128(nn), arr, shift);
11279 return True;
11282 if (bitU == 0
11283 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) {
11284 /* -------- 0,10000 SHRN{,2} #imm -------- */
11285 /* -------- 0,10001 RSHRN{,2} #imm -------- */
11286 /* Narrows, and size is the narrow size. */
11287 UInt size = 0;
11288 UInt shift = 0;
11289 Bool is2 = bitQ == 1;
11290 Bool isR = opcode == BITS5(1,0,0,0,1);
11291 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
11292 if (!ok || size == X11) return False;
11293 vassert(shift >= 1);
11294 IRTemp t1 = newTempV128();
11295 IRTemp t2 = newTempV128();
11296 IRTemp t3 = newTempV128();
11297 assign(t1, getQReg128(nn));
11298 assign(t2, isR ? binop(mkVecADD(size+1),
11299 mkexpr(t1),
11300 mkexpr(math_VEC_DUP_IMM(size+1, 1ULL<<(shift-1))))
11301 : mkexpr(t1));
11302 assign(t3, binop(mkVecSHRN(size+1), mkexpr(t2), mkU8(shift)));
11303 IRTemp t4 = math_NARROW_LANES(t3, t3, size);
11304 putLO64andZUorPutHI64(is2, dd, t4);
11305 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11306 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11307 DIP("%s %s.%s, %s.%s, #%u\n", isR ? "rshrn" : "shrn",
11308 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
11309 return True;
11312 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
11313 || (bitU == 1
11314 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
11315 /* -------- 0,10010 SQSHRN{,2} #imm -------- */
11316 /* -------- 1,10010 UQSHRN{,2} #imm -------- */
11317 /* -------- 0,10011 SQRSHRN{,2} #imm -------- */
11318 /* -------- 1,10011 UQRSHRN{,2} #imm -------- */
11319 /* -------- 1,10000 SQSHRUN{,2} #imm -------- */
11320 /* -------- 1,10001 SQRSHRUN{,2} #imm -------- */
11321 UInt size = 0;
11322 UInt shift = 0;
11323 Bool is2 = bitQ == 1;
11324 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
11325 if (!ok || size == X11) return False;
11326 vassert(shift >= 1 && shift <= (8 << size));
11327 const HChar* nm = "??";
11328 IROp op = Iop_INVALID;
11329 /* Decide on the name and the operation. */
11330 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
11331 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
11333 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
11334 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
11336 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
11337 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
11339 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
11340 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
11342 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
11343 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
11345 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
11346 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
11348 else vassert(0);
11349 /* Compute the result (Q, shifted value) pair. */
11350 IRTemp src128 = newTempV128();
11351 assign(src128, getQReg128(nn));
11352 IRTemp pair = newTempV128();
11353 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
11354 /* Update the result reg */
11355 IRTemp res64in128 = newTempV128();
11356 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
11357 putLO64andZUorPutHI64(is2, dd, res64in128);
11358 /* Update the Q flag. */
11359 IRTemp q64q64 = newTempV128();
11360 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
11361 IRTemp z128 = newTempV128();
11362 assign(z128, mkV128(0x0000));
11363 updateQCFLAGwithDifference(q64q64, z128);
11364 /* */
11365 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11366 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11367 DIP("%s %s.%s, %s.%s, #%u\n", nm,
11368 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
11369 return True;
11372 if (opcode == BITS5(1,0,1,0,0)) {
11373 /* -------- 0,10100 SSHLL{,2} #imm -------- */
11374 /* -------- 1,10100 USHLL{,2} #imm -------- */
11375 /* 31 28 22 18 15 9 4
11376 0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh
11377 0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh
11378 where Ta,Tb,sh
11379 = case immh of 1xxx -> invalid
11380 01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31)
11381 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15)
11382 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7)
11383 0000 -> AdvSIMD modified immediate (???)
11385 Bool isQ = bitQ == 1;
11386 Bool isU = bitU == 1;
11387 UInt immhb = (immh << 3) | immb;
11388 IRTemp src = newTempV128();
11389 IRTemp zero = newTempV128();
11390 IRExpr* res = NULL;
11391 UInt sh = 0;
11392 const HChar* ta = "??";
11393 const HChar* tb = "??";
11394 assign(src, getQReg128(nn));
11395 assign(zero, mkV128(0x0000));
11396 if (immh & 8) {
11397 /* invalid; don't assign to res */
11399 else if (immh & 4) {
11400 sh = immhb - 32;
11401 vassert(sh < 32); /* so 32-sh is 1..32 */
11402 ta = "2d";
11403 tb = isQ ? "4s" : "2s";
11404 IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
11405 : mk_InterleaveLO32x4(src, zero);
11406 res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
11408 else if (immh & 2) {
11409 sh = immhb - 16;
11410 vassert(sh < 16); /* so 16-sh is 1..16 */
11411 ta = "4s";
11412 tb = isQ ? "8h" : "4h";
11413 IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero)
11414 : mk_InterleaveLO16x8(src, zero);
11415 res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
11417 else if (immh & 1) {
11418 sh = immhb - 8;
11419 vassert(sh < 8); /* so 8-sh is 1..8 */
11420 ta = "8h";
11421 tb = isQ ? "16b" : "8b";
11422 IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
11423 : mk_InterleaveLO8x16(src, zero);
11424 res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
11425 } else {
11426 vassert(immh == 0);
11427 /* invalid; don't assign to res */
11429 /* */
11430 if (res) {
11431 putQReg128(dd, res);
11432 DIP("%cshll%s %s.%s, %s.%s, #%u\n",
11433 isU ? 'u' : 's', isQ ? "2" : "",
11434 nameQReg128(dd), ta, nameQReg128(nn), tb, sh);
11435 return True;
11437 return False;
11440 if (opcode == BITS5(1,1,1,0,0)) {
11441 /* -------- 0,11100 SCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
11442 /* -------- 1,11100 UCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
11443 /* If immh is of the form 00xx, the insn is invalid. */
11444 if (immh < BITS4(0,1,0,0)) return False;
11445 UInt size = 0;
11446 UInt fbits = 0;
11447 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
11448 /* The following holds because immh is never zero. */
11449 vassert(ok);
11450 /* The following holds because immh >= 0100. */
11451 vassert(size == X10 || size == X11);
11452 Bool isD = size == X11;
11453 Bool isU = bitU == 1;
11454 Bool isQ = bitQ == 1;
11455 if (isD && !isQ) return False; /* reject .1d case */
11456 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
11457 Double scale = two_to_the_minus(fbits);
11458 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
11459 : IRExpr_Const(IRConst_F32( (Float)scale ));
11460 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
11461 IROp opCVT = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
11462 : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
11463 IRType tyF = isD ? Ity_F64 : Ity_F32;
11464 IRType tyI = isD ? Ity_I64 : Ity_I32;
11465 UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
11466 vassert(nLanes == 2 || nLanes == 4);
11467 for (UInt i = 0; i < nLanes; i++) {
11468 IRTemp src = newTemp(tyI);
11469 IRTemp res = newTemp(tyF);
11470 IRTemp rm = mk_get_IR_rounding_mode();
11471 assign(src, getQRegLane(nn, i, tyI));
11472 assign(res, triop(opMUL, mkexpr(rm),
11473 binop(opCVT, mkexpr(rm), mkexpr(src)),
11474 scaleE));
11475 putQRegLane(dd, i, mkexpr(res));
11477 if (!isQ) {
11478 putQRegLane(dd, 1, mkU64(0));
11480 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11481 DIP("%s %s.%s, %s.%s, #%u\n", isU ? "ucvtf" : "scvtf",
11482 nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
11483 return True;
11486 if (opcode == BITS5(1,1,1,1,1)) {
11487 /* -------- 0,11111 FCVTZS {2d_2d,4s_4s,2s_2s}_imm -------- */
11488 /* -------- 1,11111 FCVTZU {2d_2d,4s_4s,2s_2s}_imm -------- */
11489 /* If immh is of the form 00xx, the insn is invalid. */
11490 if (immh < BITS4(0,1,0,0)) return False;
11491 UInt size = 0;
11492 UInt fbits = 0;
11493 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
11494 /* The following holds because immh is never zero. */
11495 vassert(ok);
11496 /* The following holds because immh >= 0100. */
11497 vassert(size == X10 || size == X11);
11498 Bool isD = size == X11;
11499 Bool isU = bitU == 1;
11500 Bool isQ = bitQ == 1;
11501 if (isD && !isQ) return False; /* reject .1d case */
11502 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
11503 Double scale = two_to_the_plus(fbits);
11504 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
11505 : IRExpr_Const(IRConst_F32( (Float)scale ));
11506 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
11507 IROp opCVT = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
11508 : (isD ? Iop_F64toI64S : Iop_F32toI32S);
11509 IRType tyF = isD ? Ity_F64 : Ity_F32;
11510 IRType tyI = isD ? Ity_I64 : Ity_I32;
11511 UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
11512 vassert(nLanes == 2 || nLanes == 4);
11513 for (UInt i = 0; i < nLanes; i++) {
11514 IRTemp src = newTemp(tyF);
11515 IRTemp res = newTemp(tyI);
11516 IRTemp rm = newTemp(Ity_I32);
11517 assign(src, getQRegLane(nn, i, tyF));
11518 assign(rm, mkU32(Irrm_ZERO));
11519 assign(res, binop(opCVT, mkexpr(rm),
11520 triop(opMUL, mkexpr(rm),
11521 mkexpr(src), scaleE)));
11522 putQRegLane(dd, i, mkexpr(res));
11524 if (!isQ) {
11525 putQRegLane(dd, 1, mkU64(0));
11527 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11528 DIP("%s %s.%s, %s.%s, #%u\n", isU ? "fcvtzu" : "fcvtzs",
11529 nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
11530 return True;
11533 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11534 return False;
11535 # undef INSN
11539 static
11540 Bool dis_AdvSIMD_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
11542 /* 31 30 29 28 23 21 20 15 11 9 4
11543 0 Q U 01110 size 1 m opcode 00 n d
11544 Decode fields: u,opcode
11546 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11547 if (INSN(31,31) != 0
11548 || INSN(28,24) != BITS5(0,1,1,1,0)
11549 || INSN(21,21) != 1
11550 || INSN(11,10) != BITS2(0,0)) {
11551 return False;
11553 UInt bitQ = INSN(30,30);
11554 UInt bitU = INSN(29,29);
11555 UInt size = INSN(23,22);
11556 UInt mm = INSN(20,16);
11557 UInt opcode = INSN(15,12);
11558 UInt nn = INSN(9,5);
11559 UInt dd = INSN(4,0);
11560 vassert(size < 4);
11561 Bool is2 = bitQ == 1;
11563 if (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,0,1,0)) {
11564 /* -------- 0,0000 SADDL{2} -------- */
11565 /* -------- 1,0000 UADDL{2} -------- */
11566 /* -------- 0,0010 SSUBL{2} -------- */
11567 /* -------- 1,0010 USUBL{2} -------- */
11568 /* Widens, and size refers to the narrow lanes. */
11569 if (size == X11) return False;
11570 vassert(size <= 2);
11571 Bool isU = bitU == 1;
11572 Bool isADD = opcode == BITS4(0,0,0,0);
11573 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
11574 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
11575 IRTemp res = newTempV128();
11576 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
11577 mkexpr(argL), mkexpr(argR)));
11578 putQReg128(dd, mkexpr(res));
11579 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11580 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11581 const HChar* nm = isADD ? (isU ? "uaddl" : "saddl")
11582 : (isU ? "usubl" : "ssubl");
11583 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11584 nameQReg128(dd), arrWide,
11585 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11586 return True;
11589 if (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,0,1,1)) {
11590 /* -------- 0,0001 SADDW{2} -------- */
11591 /* -------- 1,0001 UADDW{2} -------- */
11592 /* -------- 0,0011 SSUBW{2} -------- */
11593 /* -------- 1,0011 USUBW{2} -------- */
11594 /* Widens, and size refers to the narrow lanes. */
11595 if (size == X11) return False;
11596 vassert(size <= 2);
11597 Bool isU = bitU == 1;
11598 Bool isADD = opcode == BITS4(0,0,0,1);
11599 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
11600 IRTemp res = newTempV128();
11601 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
11602 getQReg128(nn), mkexpr(argR)));
11603 putQReg128(dd, mkexpr(res));
11604 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11605 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11606 const HChar* nm = isADD ? (isU ? "uaddw" : "saddw")
11607 : (isU ? "usubw" : "ssubw");
11608 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11609 nameQReg128(dd), arrWide,
11610 nameQReg128(nn), arrWide, nameQReg128(mm), arrNarrow);
11611 return True;
11614 if (opcode == BITS4(0,1,0,0) || opcode == BITS4(0,1,1,0)) {
11615 /* -------- 0,0100 ADDHN{2} -------- */
11616 /* -------- 1,0100 RADDHN{2} -------- */
11617 /* -------- 0,0110 SUBHN{2} -------- */
11618 /* -------- 1,0110 RSUBHN{2} -------- */
11619 /* Narrows, and size refers to the narrowed lanes. */
11620 if (size == X11) return False;
11621 vassert(size <= 2);
11622 const UInt shift[3] = { 8, 16, 32 };
11623 Bool isADD = opcode == BITS4(0,1,0,0);
11624 Bool isR = bitU == 1;
11625 /* Combined elements in wide lanes */
11626 IRTemp wide = newTempV128();
11627 IRExpr* wideE = binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
11628 getQReg128(nn), getQReg128(mm));
11629 if (isR) {
11630 wideE = binop(mkVecADD(size+1),
11631 wideE,
11632 mkexpr(math_VEC_DUP_IMM(size+1,
11633 1ULL << (shift[size]-1))));
11635 assign(wide, wideE);
11636 /* Top halves of elements, still in wide lanes */
11637 IRTemp shrd = newTempV128();
11638 assign(shrd, binop(mkVecSHRN(size+1), mkexpr(wide), mkU8(shift[size])));
11639 /* Elements now compacted into lower 64 bits */
11640 IRTemp new64 = newTempV128();
11641 assign(new64, binop(mkVecCATEVENLANES(size), mkexpr(shrd), mkexpr(shrd)));
11642 putLO64andZUorPutHI64(is2, dd, new64);
11643 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11644 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11645 const HChar* nm = isADD ? (isR ? "raddhn" : "addhn")
11646 : (isR ? "rsubhn" : "subhn");
11647 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11648 nameQReg128(dd), arrNarrow,
11649 nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
11650 return True;
11653 if (opcode == BITS4(0,1,0,1) || opcode == BITS4(0,1,1,1)) {
11654 /* -------- 0,0101 SABAL{2} -------- */
11655 /* -------- 1,0101 UABAL{2} -------- */
11656 /* -------- 0,0111 SABDL{2} -------- */
11657 /* -------- 1,0111 UABDL{2} -------- */
11658 /* Widens, and size refers to the narrow lanes. */
11659 if (size == X11) return False;
11660 vassert(size <= 2);
11661 Bool isU = bitU == 1;
11662 Bool isACC = opcode == BITS4(0,1,0,1);
11663 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
11664 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
11665 IRTemp abd = math_ABD(isU, size+1, mkexpr(argL), mkexpr(argR));
11666 IRTemp res = newTempV128();
11667 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(abd), getQReg128(dd))
11668 : mkexpr(abd));
11669 putQReg128(dd, mkexpr(res));
11670 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11671 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11672 const HChar* nm = isACC ? (isU ? "uabal" : "sabal")
11673 : (isU ? "uabdl" : "sabdl");
11674 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11675 nameQReg128(dd), arrWide,
11676 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11677 return True;
11680 if (opcode == BITS4(1,1,0,0)
11681 || opcode == BITS4(1,0,0,0) || opcode == BITS4(1,0,1,0)) {
11682 /* -------- 0,1100 SMULL{2} -------- */ // 0 (ks)
11683 /* -------- 1,1100 UMULL{2} -------- */ // 0
11684 /* -------- 0,1000 SMLAL{2} -------- */ // 1
11685 /* -------- 1,1000 UMLAL{2} -------- */ // 1
11686 /* -------- 0,1010 SMLSL{2} -------- */ // 2
11687 /* -------- 1,1010 UMLSL{2} -------- */ // 2
11688 /* Widens, and size refers to the narrow lanes. */
11689 UInt ks = 3;
11690 switch (opcode) {
11691 case BITS4(1,1,0,0): ks = 0; break;
11692 case BITS4(1,0,0,0): ks = 1; break;
11693 case BITS4(1,0,1,0): ks = 2; break;
11694 default: vassert(0);
11696 vassert(ks >= 0 && ks <= 2);
11697 if (size == X11) return False;
11698 vassert(size <= 2);
11699 Bool isU = bitU == 1;
11700 IRTemp vecN = newTempV128();
11701 IRTemp vecM = newTempV128();
11702 IRTemp vecD = newTempV128();
11703 assign(vecN, getQReg128(nn));
11704 assign(vecM, getQReg128(mm));
11705 assign(vecD, getQReg128(dd));
11706 IRTemp res = IRTemp_INVALID;
11707 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
11708 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
11709 putQReg128(dd, mkexpr(res));
11710 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11711 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11712 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
11713 DIP("%c%s%s %s.%s, %s.%s, %s.%s\n", isU ? 'u' : 's', nm, is2 ? "2" : "",
11714 nameQReg128(dd), arrWide,
11715 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11716 return True;
11719 if (bitU == 0
11720 && (opcode == BITS4(1,1,0,1)
11721 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
11722 /* -------- 0,1101 SQDMULL{2} -------- */ // 0 (ks)
11723 /* -------- 0,1001 SQDMLAL{2} -------- */ // 1
11724 /* -------- 0,1011 SQDMLSL{2} -------- */ // 2
11725 /* Widens, and size refers to the narrow lanes. */
11726 UInt ks = 3;
11727 switch (opcode) {
11728 case BITS4(1,1,0,1): ks = 0; break;
11729 case BITS4(1,0,0,1): ks = 1; break;
11730 case BITS4(1,0,1,1): ks = 2; break;
11731 default: vassert(0);
11733 vassert(ks >= 0 && ks <= 2);
11734 if (size == X00 || size == X11) return False;
11735 vassert(size <= 2);
11736 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
11737 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
11738 newTempsV128_3(&vecN, &vecM, &vecD);
11739 assign(vecN, getQReg128(nn));
11740 assign(vecM, getQReg128(mm));
11741 assign(vecD, getQReg128(dd));
11742 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
11743 is2, size, "mas"[ks],
11744 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
11745 putQReg128(dd, mkexpr(res));
11746 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
11747 updateQCFLAGwithDifference(sat1q, sat1n);
11748 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
11749 updateQCFLAGwithDifference(sat2q, sat2n);
11751 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11752 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11753 const HChar* nm = ks == 0 ? "sqdmull"
11754 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
11755 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11756 nameQReg128(dd), arrWide,
11757 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11758 return True;
11761 if (bitU == 0 && opcode == BITS4(1,1,1,0)) {
11762 /* -------- 0,1110 PMULL{2} -------- */
11763 /* Widens, and size refers to the narrow lanes. */
11764 if (size != X00 && size != X11) return False;
11765 IRTemp res = IRTemp_INVALID;
11766 IRExpr* srcN = getQReg128(nn);
11767 IRExpr* srcM = getQReg128(mm);
11768 const HChar* arrNarrow = NULL;
11769 const HChar* arrWide = NULL;
11770 if (size == X00) {
11771 res = math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8,
11772 srcN, srcM);
11773 arrNarrow = nameArr_Q_SZ(bitQ, size);
11774 arrWide = nameArr_Q_SZ(1, size+1);
11775 } else {
11776 /* The same thing as the X00 case, except we have to call
11777 a helper to do it. */
11778 vassert(size == X11);
11779 res = newTemp(Ity_V128);
11780 IROp slice
11781 = is2 ? Iop_V128HIto64 : Iop_V128to64;
11782 IRExpr** args
11783 = mkIRExprVec_3( IRExpr_VECRET(),
11784 unop(slice, srcN), unop(slice, srcM));
11785 IRDirty* di
11786 = unsafeIRDirty_1_N( res, 0/*regparms*/,
11787 "arm64g_dirtyhelper_PMULLQ",
11788 &arm64g_dirtyhelper_PMULLQ, args);
11789 stmt(IRStmt_Dirty(di));
11790 /* We can't use nameArr_Q_SZ for this because it can't deal with
11791 Q-sized (128 bit) results. Hence do it by hand. */
11792 arrNarrow = bitQ == 0 ? "1d" : "2d";
11793 arrWide = "1q";
11795 putQReg128(dd, mkexpr(res));
11796 DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2 ? "2" : "",
11797 nameQReg128(dd), arrWide,
11798 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11799 return True;
11802 return False;
11803 # undef INSN
11807 static
11808 Bool dis_AdvSIMD_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
11810 /* 31 30 29 28 23 21 20 15 10 9 4
11811 0 Q U 01110 size 1 m opcode 1 n d
11812 Decode fields: u,size,opcode
11814 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11815 if (INSN(31,31) != 0
11816 || INSN(28,24) != BITS5(0,1,1,1,0)
11817 || INSN(21,21) != 1
11818 || INSN(10,10) != 1) {
11819 return False;
11821 UInt bitQ = INSN(30,30);
11822 UInt bitU = INSN(29,29);
11823 UInt size = INSN(23,22);
11824 UInt mm = INSN(20,16);
11825 UInt opcode = INSN(15,11);
11826 UInt nn = INSN(9,5);
11827 UInt dd = INSN(4,0);
11828 vassert(size < 4);
11830 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,1,0,0)) {
11831 /* -------- 0,xx,00000 SHADD std6_std6_std6 -------- */
11832 /* -------- 1,xx,00000 UHADD std6_std6_std6 -------- */
11833 /* -------- 0,xx,00100 SHSUB std6_std6_std6 -------- */
11834 /* -------- 1,xx,00100 UHSUB std6_std6_std6 -------- */
11835 if (size == X11) return False;
11836 Bool isADD = opcode == BITS5(0,0,0,0,0);
11837 Bool isU = bitU == 1;
11838 /* Widen both args out, do the math, narrow to final result. */
11839 IRTemp argL = newTempV128();
11840 IRTemp argLhi = IRTemp_INVALID;
11841 IRTemp argLlo = IRTemp_INVALID;
11842 IRTemp argR = newTempV128();
11843 IRTemp argRhi = IRTemp_INVALID;
11844 IRTemp argRlo = IRTemp_INVALID;
11845 IRTemp resHi = newTempV128();
11846 IRTemp resLo = newTempV128();
11847 IRTemp res = IRTemp_INVALID;
11848 assign(argL, getQReg128(nn));
11849 argLlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argL));
11850 argLhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argL));
11851 assign(argR, getQReg128(mm));
11852 argRlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argR));
11853 argRhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argR));
11854 IROp opADDSUB = isADD ? mkVecADD(size+1) : mkVecSUB(size+1);
11855 IROp opSxR = isU ? mkVecSHRN(size+1) : mkVecSARN(size+1);
11856 assign(resHi, binop(opSxR,
11857 binop(opADDSUB, mkexpr(argLhi), mkexpr(argRhi)),
11858 mkU8(1)));
11859 assign(resLo, binop(opSxR,
11860 binop(opADDSUB, mkexpr(argLlo), mkexpr(argRlo)),
11861 mkU8(1)));
11862 res = math_NARROW_LANES ( resHi, resLo, size );
11863 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11864 const HChar* nm = isADD ? (isU ? "uhadd" : "shadd")
11865 : (isU ? "uhsub" : "shsub");
11866 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11867 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11868 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11869 return True;
11872 if (opcode == BITS5(0,0,0,1,0)) {
11873 /* -------- 0,xx,00010 SRHADD std7_std7_std7 -------- */
11874 /* -------- 1,xx,00010 URHADD std7_std7_std7 -------- */
11875 if (bitQ == 0 && size == X11) return False; // implied 1d case
11876 Bool isU = bitU == 1;
11877 IRTemp argL = newTempV128();
11878 IRTemp argR = newTempV128();
11879 assign(argL, getQReg128(nn));
11880 assign(argR, getQReg128(mm));
11881 IRTemp res = math_RHADD(size, isU, argL, argR);
11882 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11883 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11884 DIP("%s %s.%s, %s.%s, %s.%s\n", isU ? "urhadd" : "srhadd",
11885 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11886 return True;
11889 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
11890 /* -------- 0,xx,00001 SQADD std7_std7_std7 -------- */
11891 /* -------- 1,xx,00001 UQADD std7_std7_std7 -------- */
11892 /* -------- 0,xx,00101 SQSUB std7_std7_std7 -------- */
11893 /* -------- 1,xx,00101 UQSUB std7_std7_std7 -------- */
11894 if (bitQ == 0 && size == X11) return False; // implied 1d case
11895 Bool isADD = opcode == BITS5(0,0,0,0,1);
11896 Bool isU = bitU == 1;
11897 IROp qop = Iop_INVALID;
11898 IROp nop = Iop_INVALID;
11899 if (isADD) {
11900 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
11901 nop = mkVecADD(size);
11902 } else {
11903 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
11904 nop = mkVecSUB(size);
11906 IRTemp argL = newTempV128();
11907 IRTemp argR = newTempV128();
11908 IRTemp qres = newTempV128();
11909 IRTemp nres = newTempV128();
11910 assign(argL, getQReg128(nn));
11911 assign(argR, getQReg128(mm));
11912 assign(qres, math_MAYBE_ZERO_HI64_fromE(
11913 bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
11914 assign(nres, math_MAYBE_ZERO_HI64_fromE(
11915 bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
11916 putQReg128(dd, mkexpr(qres));
11917 updateQCFLAGwithDifference(qres, nres);
11918 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
11919 : (isU ? "uqsub" : "sqsub");
11920 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11921 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11922 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11923 return True;
11926 if (bitU == 0 && opcode == BITS5(0,0,0,1,1)) {
11927 /* -------- 0,00,00011 AND 16b_16b_16b, 8b_8b_8b -------- */
11928 /* -------- 0,01,00011 BIC 16b_16b_16b, 8b_8b_8b -------- */
11929 /* -------- 0,10,00011 ORR 16b_16b_16b, 8b_8b_8b -------- */
11930 /* -------- 0,10,00011 ORN 16b_16b_16b, 8b_8b_8b -------- */
11931 Bool isORx = (size & 2) == 2;
11932 Bool invert = (size & 1) == 1;
11933 IRTemp res = newTempV128();
11934 assign(res, binop(isORx ? Iop_OrV128 : Iop_AndV128,
11935 getQReg128(nn),
11936 invert ? unop(Iop_NotV128, getQReg128(mm))
11937 : getQReg128(mm)));
11938 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11939 const HChar* names[4] = { "and", "bic", "orr", "orn" };
11940 const HChar* ar = bitQ == 1 ? "16b" : "8b";
11941 DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)],
11942 nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar);
11943 return True;
11946 if (bitU == 1 && opcode == BITS5(0,0,0,1,1)) {
11947 /* -------- 1,00,00011 EOR 16b_16b_16b, 8b_8b_8b -------- */
11948 /* -------- 1,01,00011 BSL 16b_16b_16b, 8b_8b_8b -------- */
11949 /* -------- 1,10,00011 BIT 16b_16b_16b, 8b_8b_8b -------- */
11950 /* -------- 1,10,00011 BIF 16b_16b_16b, 8b_8b_8b -------- */
11951 IRTemp argD = newTempV128();
11952 IRTemp argN = newTempV128();
11953 IRTemp argM = newTempV128();
11954 assign(argD, getQReg128(dd));
11955 assign(argN, getQReg128(nn));
11956 assign(argM, getQReg128(mm));
11957 const IROp opXOR = Iop_XorV128;
11958 const IROp opAND = Iop_AndV128;
11959 const IROp opNOT = Iop_NotV128;
11960 IRTemp res = newTempV128();
11961 switch (size) {
11962 case BITS2(0,0): /* EOR */
11963 assign(res, binop(opXOR, mkexpr(argM), mkexpr(argN)));
11964 break;
11965 case BITS2(0,1): /* BSL */
11966 assign(res, binop(opXOR, mkexpr(argM),
11967 binop(opAND,
11968 binop(opXOR, mkexpr(argM), mkexpr(argN)),
11969 mkexpr(argD))));
11970 break;
11971 case BITS2(1,0): /* BIT */
11972 assign(res, binop(opXOR, mkexpr(argD),
11973 binop(opAND,
11974 binop(opXOR, mkexpr(argD), mkexpr(argN)),
11975 mkexpr(argM))));
11976 break;
11977 case BITS2(1,1): /* BIF */
11978 assign(res, binop(opXOR, mkexpr(argD),
11979 binop(opAND,
11980 binop(opXOR, mkexpr(argD), mkexpr(argN)),
11981 unop(opNOT, mkexpr(argM)))));
11982 break;
11983 default:
11984 vassert(0);
11986 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11987 const HChar* nms[4] = { "eor", "bsl", "bit", "bif" };
11988 const HChar* arr = bitQ == 1 ? "16b" : "8b";
11989 DIP("%s %s.%s, %s.%s, %s.%s\n", nms[size],
11990 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11991 return True;
11994 if (opcode == BITS5(0,0,1,1,0)) {
11995 /* -------- 0,xx,00110 CMGT std7_std7_std7 -------- */ // >s
11996 /* -------- 1,xx,00110 CMHI std7_std7_std7 -------- */ // >u
11997 if (bitQ == 0 && size == X11) return False; // implied 1d case
11998 Bool isGT = bitU == 0;
11999 IRExpr* argL = getQReg128(nn);
12000 IRExpr* argR = getQReg128(mm);
12001 IRTemp res = newTempV128();
12002 assign(res,
12003 isGT ? binop(mkVecCMPGTS(size), argL, argR)
12004 : binop(mkVecCMPGTU(size), argL, argR));
12005 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12006 const HChar* nm = isGT ? "cmgt" : "cmhi";
12007 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12008 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12009 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12010 return True;
12013 if (opcode == BITS5(0,0,1,1,1)) {
12014 /* -------- 0,xx,00111 CMGE std7_std7_std7 -------- */ // >=s
12015 /* -------- 1,xx,00111 CMHS std7_std7_std7 -------- */ // >=u
12016 if (bitQ == 0 && size == X11) return False; // implied 1d case
12017 Bool isGE = bitU == 0;
12018 IRExpr* argL = getQReg128(nn);
12019 IRExpr* argR = getQReg128(mm);
12020 IRTemp res = newTempV128();
12021 assign(res,
12022 isGE ? unop(Iop_NotV128, binop(mkVecCMPGTS(size), argR, argL))
12023 : unop(Iop_NotV128, binop(mkVecCMPGTU(size), argR, argL)));
12024 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12025 const HChar* nm = isGE ? "cmge" : "cmhs";
12026 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12027 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12028 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12029 return True;
12032 if (opcode == BITS5(0,1,0,0,0) || opcode == BITS5(0,1,0,1,0)) {
12033 /* -------- 0,xx,01000 SSHL std7_std7_std7 -------- */
12034 /* -------- 0,xx,01010 SRSHL std7_std7_std7 -------- */
12035 /* -------- 1,xx,01000 USHL std7_std7_std7 -------- */
12036 /* -------- 1,xx,01010 URSHL std7_std7_std7 -------- */
12037 if (bitQ == 0 && size == X11) return False; // implied 1d case
12038 Bool isU = bitU == 1;
12039 Bool isR = opcode == BITS5(0,1,0,1,0);
12040 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
12041 : (isU ? mkVecSHU(size) : mkVecSHS(size));
12042 IRTemp res = newTempV128();
12043 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
12044 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12045 const HChar* nm = isR ? (isU ? "urshl" : "srshl")
12046 : (isU ? "ushl" : "sshl");
12047 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12048 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12049 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12050 return True;
12053 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
12054 /* -------- 0,xx,01001 SQSHL std7_std7_std7 -------- */
12055 /* -------- 0,xx,01011 SQRSHL std7_std7_std7 -------- */
12056 /* -------- 1,xx,01001 UQSHL std7_std7_std7 -------- */
12057 /* -------- 1,xx,01011 UQRSHL std7_std7_std7 -------- */
12058 if (bitQ == 0 && size == X11) return False; // implied 1d case
12059 Bool isU = bitU == 1;
12060 Bool isR = opcode == BITS5(0,1,0,1,1);
12061 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
12062 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
12063 /* This is a bit tricky. If we're only interested in the lowest 64 bits
12064 of the result (viz, bitQ == 0), then we must adjust the operands to
12065 ensure that the upper part of the result, that we don't care about,
12066 doesn't pollute the returned Q value. To do this, zero out the upper
12067 operand halves beforehand. This works because it means, for the
12068 lanes we don't care about, we are shifting zero by zero, which can
12069 never saturate. */
12070 IRTemp res256 = newTemp(Ity_V256);
12071 IRTemp resSH = newTempV128();
12072 IRTemp resQ = newTempV128();
12073 IRTemp zero = newTempV128();
12074 assign(res256, binop(op,
12075 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(nn)),
12076 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(mm))));
12077 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
12078 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
12079 assign(zero, mkV128(0x0000));
12080 putQReg128(dd, mkexpr(resSH));
12081 updateQCFLAGwithDifference(resQ, zero);
12082 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
12083 : (isU ? "uqshl" : "sqshl");
12084 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12085 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12086 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12087 return True;
12090 if (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,0,1)) {
12091 /* -------- 0,xx,01100 SMAX std7_std7_std7 -------- */
12092 /* -------- 1,xx,01100 UMAX std7_std7_std7 -------- */
12093 /* -------- 0,xx,01101 SMIN std7_std7_std7 -------- */
12094 /* -------- 1,xx,01101 UMIN std7_std7_std7 -------- */
12095 if (bitQ == 0 && size == X11) return False; // implied 1d case
12096 Bool isU = bitU == 1;
12097 Bool isMAX = (opcode & 1) == 0;
12098 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
12099 : (isU ? mkVecMINU(size) : mkVecMINS(size));
12100 IRTemp t = newTempV128();
12101 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
12102 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
12103 const HChar* nm = isMAX ? (isU ? "umax" : "smax")
12104 : (isU ? "umin" : "smin");
12105 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12106 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12107 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12108 return True;
12111 if (opcode == BITS5(0,1,1,1,0) || opcode == BITS5(0,1,1,1,1)) {
12112 /* -------- 0,xx,01110 SABD std6_std6_std6 -------- */
12113 /* -------- 1,xx,01110 UABD std6_std6_std6 -------- */
12114 /* -------- 0,xx,01111 SABA std6_std6_std6 -------- */
12115 /* -------- 1,xx,01111 UABA std6_std6_std6 -------- */
12116 if (size == X11) return False; // 1d/2d cases not allowed
12117 Bool isU = bitU == 1;
12118 Bool isACC = opcode == BITS5(0,1,1,1,1);
12119 vassert(size <= 2);
12120 IRTemp t1 = math_ABD(isU, size, getQReg128(nn), getQReg128(mm));
12121 IRTemp t2 = newTempV128();
12122 assign(t2, isACC ? binop(mkVecADD(size), mkexpr(t1), getQReg128(dd))
12123 : mkexpr(t1));
12124 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
12125 const HChar* nm = isACC ? (isU ? "uaba" : "saba")
12126 : (isU ? "uabd" : "sabd");
12127 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12128 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12129 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12130 return True;
12133 if (opcode == BITS5(1,0,0,0,0)) {
12134 /* -------- 0,xx,10000 ADD std7_std7_std7 -------- */
12135 /* -------- 1,xx,10000 SUB std7_std7_std7 -------- */
12136 if (bitQ == 0 && size == X11) return False; // implied 1d case
12137 Bool isSUB = bitU == 1;
12138 IROp op = isSUB ? mkVecSUB(size) : mkVecADD(size);
12139 IRTemp t = newTempV128();
12140 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
12141 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
12142 const HChar* nm = isSUB ? "sub" : "add";
12143 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12144 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12145 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12146 return True;
12149 if (opcode == BITS5(1,0,0,0,1)) {
12150 /* -------- 0,xx,10001 CMTST std7_std7_std7 -------- */ // &, != 0
12151 /* -------- 1,xx,10001 CMEQ std7_std7_std7 -------- */ // ==
12152 if (bitQ == 0 && size == X11) return False; // implied 1d case
12153 Bool isEQ = bitU == 1;
12154 IRExpr* argL = getQReg128(nn);
12155 IRExpr* argR = getQReg128(mm);
12156 IRTemp res = newTempV128();
12157 assign(res,
12158 isEQ ? binop(mkVecCMPEQ(size), argL, argR)
12159 : unop(Iop_NotV128, binop(mkVecCMPEQ(size),
12160 binop(Iop_AndV128, argL, argR),
12161 mkV128(0x0000))));
12162 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12163 const HChar* nm = isEQ ? "cmeq" : "cmtst";
12164 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12165 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12166 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12167 return True;
12170 if (opcode == BITS5(1,0,0,1,0)) {
12171 /* -------- 0,xx,10010 MLA std7_std7_std7 -------- */
12172 /* -------- 1,xx,10010 MLS std7_std7_std7 -------- */
12173 if (bitQ == 0 && size == X11) return False; // implied 1d case
12174 Bool isMLS = bitU == 1;
12175 IROp opMUL = mkVecMUL(size);
12176 IROp opADDSUB = isMLS ? mkVecSUB(size) : mkVecADD(size);
12177 IRTemp res = newTempV128();
12178 if (opMUL != Iop_INVALID && opADDSUB != Iop_INVALID) {
12179 assign(res, binop(opADDSUB,
12180 getQReg128(dd),
12181 binop(opMUL, getQReg128(nn), getQReg128(mm))));
12182 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12183 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12184 DIP("%s %s.%s, %s.%s, %s.%s\n", isMLS ? "mls" : "mla",
12185 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12186 return True;
12188 return False;
12191 if (opcode == BITS5(1,0,0,1,1)) {
12192 /* -------- 0,xx,10011 MUL std7_std7_std7 -------- */
12193 /* -------- 1,xx,10011 PMUL 16b_16b_16b, 8b_8b_8b -------- */
12194 if (bitQ == 0 && size == X11) return False; // implied 1d case
12195 Bool isPMUL = bitU == 1;
12196 const IROp opsPMUL[4]
12197 = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
12198 IROp opMUL = isPMUL ? opsPMUL[size] : mkVecMUL(size);
12199 IRTemp res = newTempV128();
12200 if (opMUL != Iop_INVALID) {
12201 assign(res, binop(opMUL, getQReg128(nn), getQReg128(mm)));
12202 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12203 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12204 DIP("%s %s.%s, %s.%s, %s.%s\n", isPMUL ? "pmul" : "mul",
12205 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12206 return True;
12208 return False;
12211 if (opcode == BITS5(1,0,1,0,0) || opcode == BITS5(1,0,1,0,1)) {
12212 /* -------- 0,xx,10100 SMAXP std6_std6_std6 -------- */
12213 /* -------- 1,xx,10100 UMAXP std6_std6_std6 -------- */
12214 /* -------- 0,xx,10101 SMINP std6_std6_std6 -------- */
12215 /* -------- 1,xx,10101 UMINP std6_std6_std6 -------- */
12216 if (size == X11) return False;
12217 Bool isU = bitU == 1;
12218 Bool isMAX = opcode == BITS5(1,0,1,0,0);
12219 IRTemp vN = newTempV128();
12220 IRTemp vM = newTempV128();
12221 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
12222 : (isU ? mkVecMINU(size) : mkVecMINS(size));
12223 assign(vN, getQReg128(nn));
12224 assign(vM, getQReg128(mm));
12225 IRTemp res128 = newTempV128();
12226 assign(res128,
12227 binop(op,
12228 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
12229 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
12230 /* In the half-width case, use CatEL32x4 to extract the half-width
12231 result from the full-width result. */
12232 IRExpr* res
12233 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
12234 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
12235 mkexpr(res128)))
12236 : mkexpr(res128);
12237 putQReg128(dd, res);
12238 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12239 const HChar* nm = isMAX ? (isU ? "umaxp" : "smaxp")
12240 : (isU ? "uminp" : "sminp");
12241 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12242 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12243 return True;
12246 if (opcode == BITS5(1,0,1,1,0)) {
12247 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
12248 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
12249 if (size == X00 || size == X11) return False;
12250 Bool isR = bitU == 1;
12251 IRTemp res, sat1q, sat1n, vN, vM;
12252 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
12253 newTempsV128_2(&vN, &vM);
12254 assign(vN, getQReg128(nn));
12255 assign(vM, getQReg128(mm));
12256 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
12257 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12258 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
12259 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
12260 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12261 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
12262 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12263 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12264 return True;
12267 if (bitU == 0 && opcode == BITS5(1,0,1,1,1)) {
12268 /* -------- 0,xx,10111 ADDP std7_std7_std7 -------- */
12269 if (bitQ == 0 && size == X11) return False; // implied 1d case
12270 IRTemp vN = newTempV128();
12271 IRTemp vM = newTempV128();
12272 assign(vN, getQReg128(nn));
12273 assign(vM, getQReg128(mm));
12274 IRTemp res128 = newTempV128();
12275 assign(res128,
12276 binop(mkVecADD(size),
12277 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
12278 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
12279 /* In the half-width case, use CatEL32x4 to extract the half-width
12280 result from the full-width result. */
12281 IRExpr* res
12282 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
12283 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
12284 mkexpr(res128)))
12285 : mkexpr(res128);
12286 putQReg128(dd, res);
12287 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12288 DIP("addp %s.%s, %s.%s, %s.%s\n",
12289 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12290 return True;
12293 if (bitU == 0
12294 && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
12295 /* -------- 0,0x,11000 FMAXNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12296 /* -------- 0,1x,11000 FMINNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12297 /* -------- 0,0x,11110 FMAX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12298 /* -------- 0,1x,11110 FMIN 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12299 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
12300 Bool isD = (size & 1) == 1;
12301 if (bitQ == 0 && isD) return False; // implied 1d case
12302 Bool isMIN = (size & 2) == 2;
12303 Bool isNM = opcode == BITS5(1,1,0,0,0);
12304 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? X11 : X10);
12305 IRTemp res = newTempV128();
12306 assign(res, binop(opMXX, getQReg128(nn), getQReg128(mm)));
12307 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12308 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12309 DIP("%s%s %s.%s, %s.%s, %s.%s\n",
12310 isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
12311 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12312 return True;
12315 if (bitU == 0 && opcode == BITS5(1,1,0,0,1)) {
12316 /* -------- 0,0x,11001 FMLA 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12317 /* -------- 0,1x,11001 FMLS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12318 Bool isD = (size & 1) == 1;
12319 Bool isSUB = (size & 2) == 2;
12320 if (bitQ == 0 && isD) return False; // implied 1d case
12321 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
12322 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
12323 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
12324 IRTemp rm = mk_get_IR_rounding_mode();
12325 IRTemp t1 = newTempV128();
12326 IRTemp t2 = newTempV128();
12327 // FIXME: double rounding; use FMA primops instead
12328 assign(t1, triop(opMUL,
12329 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
12330 assign(t2, triop(isSUB ? opSUB : opADD,
12331 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
12332 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
12333 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12334 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fmls" : "fmla",
12335 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12336 return True;
12339 if (bitU == 0 && opcode == BITS5(1,1,0,1,0)) {
12340 /* -------- 0,0x,11010 FADD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12341 /* -------- 0,1x,11010 FSUB 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12342 Bool isD = (size & 1) == 1;
12343 Bool isSUB = (size & 2) == 2;
12344 if (bitQ == 0 && isD) return False; // implied 1d case
12345 const IROp ops[4]
12346 = { Iop_Add32Fx4, Iop_Add64Fx2, Iop_Sub32Fx4, Iop_Sub64Fx2 };
12347 IROp op = ops[size];
12348 IRTemp rm = mk_get_IR_rounding_mode();
12349 IRTemp t1 = newTempV128();
12350 IRTemp t2 = newTempV128();
12351 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
12352 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
12353 putQReg128(dd, mkexpr(t2));
12354 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12355 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fsub" : "fadd",
12356 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12357 return True;
12360 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
12361 /* -------- 1,1x,11010 FABD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12362 Bool isD = (size & 1) == 1;
12363 if (bitQ == 0 && isD) return False; // implied 1d case
12364 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
12365 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
12366 IRTemp rm = mk_get_IR_rounding_mode();
12367 IRTemp t1 = newTempV128();
12368 IRTemp t2 = newTempV128();
12369 // FIXME: use Abd primop instead?
12370 assign(t1, triop(opSUB, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
12371 assign(t2, unop(opABS, mkexpr(t1)));
12372 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
12373 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12374 DIP("fabd %s.%s, %s.%s, %s.%s\n",
12375 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12376 return True;
12379 if (size <= X01 && opcode == BITS5(1,1,0,1,1)) {
12380 /* -------- 0,0x,11011 FMULX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12381 /* -------- 1,0x,11011 FMUL 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12382 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
12383 Bool isD = (size & 1) == 1;
12384 Bool isMULX = bitU == 0;
12385 if (bitQ == 0 && isD) return False; // implied 1d case
12386 IRTemp rm = mk_get_IR_rounding_mode();
12387 IRTemp t1 = newTempV128();
12388 assign(t1, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
12389 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
12390 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
12391 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12392 DIP("%s %s.%s, %s.%s, %s.%s\n", isMULX ? "fmulx" : "fmul",
12393 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12394 return True;
12397 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
12398 /* -------- 0,0x,11100 FCMEQ 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12399 /* -------- 1,0x,11100 FCMGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12400 Bool isD = (size & 1) == 1;
12401 if (bitQ == 0 && isD) return False; // implied 1d case
12402 Bool isGE = bitU == 1;
12403 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
12404 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
12405 IRTemp t1 = newTempV128();
12406 assign(t1, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
12407 : binop(opCMP, getQReg128(nn), getQReg128(mm)));
12408 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
12409 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12410 DIP("%s %s.%s, %s.%s, %s.%s\n", isGE ? "fcmge" : "fcmeq",
12411 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12412 return True;
12415 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
12416 /* -------- 1,1x,11100 FCMGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12417 Bool isD = (size & 1) == 1;
12418 if (bitQ == 0 && isD) return False; // implied 1d case
12419 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
12420 IRTemp t1 = newTempV128();
12421 assign(t1, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
12422 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
12423 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12424 DIP("%s %s.%s, %s.%s, %s.%s\n", "fcmgt",
12425 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12426 return True;
12429 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
12430 /* -------- 1,0x,11101 FACGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12431 /* -------- 1,1x,11101 FACGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12432 Bool isD = (size & 1) == 1;
12433 Bool isGT = (size & 2) == 2;
12434 if (bitQ == 0 && isD) return False; // implied 1d case
12435 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
12436 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
12437 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
12438 IRTemp t1 = newTempV128();
12439 assign(t1, binop(opCMP, unop(opABS, getQReg128(mm)),
12440 unop(opABS, getQReg128(nn)))); // swapd
12441 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
12442 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12443 DIP("%s %s.%s, %s.%s, %s.%s\n", isGT ? "facgt" : "facge",
12444 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12445 return True;
12448 if (bitU == 1
12449 && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
12450 /* -------- 1,0x,11000 FMAXNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12451 /* -------- 1,1x,11000 FMINNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12452 /* -------- 1,0x,11110 FMAXP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12453 /* -------- 1,1x,11110 FMINP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12454 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
12455 Bool isD = (size & 1) == 1;
12456 if (bitQ == 0 && isD) return False; // implied 1d case
12457 Bool isMIN = (size & 2) == 2;
12458 Bool isNM = opcode == BITS5(1,1,0,0,0);
12459 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
12460 IRTemp srcN = newTempV128();
12461 IRTemp srcM = newTempV128();
12462 IRTemp preL = IRTemp_INVALID;
12463 IRTemp preR = IRTemp_INVALID;
12464 assign(srcN, getQReg128(nn));
12465 assign(srcM, getQReg128(mm));
12466 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
12467 srcM, srcN, isD, bitQ);
12468 putQReg128(
12469 dd, math_MAYBE_ZERO_HI64_fromE(
12470 bitQ,
12471 binop(opMXX, mkexpr(preL), mkexpr(preR))));
12472 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12473 DIP("%s%sp %s.%s, %s.%s, %s.%s\n",
12474 isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
12475 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12476 return True;
12479 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,0,1,0)) {
12480 /* -------- 1,0x,11010 FADDP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12481 Bool isD = size == X01;
12482 if (bitQ == 0 && isD) return False; // implied 1d case
12483 IRTemp srcN = newTempV128();
12484 IRTemp srcM = newTempV128();
12485 IRTemp preL = IRTemp_INVALID;
12486 IRTemp preR = IRTemp_INVALID;
12487 assign(srcN, getQReg128(nn));
12488 assign(srcM, getQReg128(mm));
12489 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
12490 srcM, srcN, isD, bitQ);
12491 putQReg128(
12492 dd, math_MAYBE_ZERO_HI64_fromE(
12493 bitQ,
12494 triop(mkVecADDF(isD ? 3 : 2),
12495 mkexpr(mk_get_IR_rounding_mode()),
12496 mkexpr(preL), mkexpr(preR))));
12497 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12498 DIP("%s %s.%s, %s.%s, %s.%s\n", "faddp",
12499 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12500 return True;
12503 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,1,1,1)) {
12504 /* -------- 1,0x,11111 FDIV 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12505 Bool isD = (size & 1) == 1;
12506 if (bitQ == 0 && isD) return False; // implied 1d case
12507 vassert(size <= 1);
12508 const IROp ops[2] = { Iop_Div32Fx4, Iop_Div64Fx2 };
12509 IROp op = ops[size];
12510 IRTemp rm = mk_get_IR_rounding_mode();
12511 IRTemp t1 = newTempV128();
12512 IRTemp t2 = newTempV128();
12513 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
12514 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
12515 putQReg128(dd, mkexpr(t2));
12516 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12517 DIP("%s %s.%s, %s.%s, %s.%s\n", "fdiv",
12518 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12519 return True;
12522 if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
12523 /* -------- 0,0x,11111: FRECPS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12524 /* -------- 0,1x,11111: FRSQRTS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12525 Bool isSQRT = (size & 2) == 2;
12526 Bool isD = (size & 1) == 1;
12527 if (bitQ == 0 && isD) return False; // implied 1d case
12528 IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
12529 : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
12530 IRTemp res = newTempV128();
12531 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
12532 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12533 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12534 DIP("%s %s.%s, %s.%s, %s.%s\n", isSQRT ? "frsqrts" : "frecps",
12535 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12536 return True;
12539 return False;
12540 # undef INSN
12544 static
12545 Bool dis_AdvSIMD_three_same_extra(/*MB_OUT*/DisResult* dres, UInt insn)
12547 /* 31 30 29 28 23 21 20 15 14 10 9 4
12548 0 Q U 01110 size 0 m 1 opcode 1 n d
12549 Decode fields: u,size,opcode
12551 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12552 if (INSN(31,31) != 0
12553 || INSN(28,24) != BITS5(0,1,1,1,0)
12554 || INSN(21,21) != 0
12555 || INSN(15,15) != 1
12556 || INSN(10,10) != 1) {
12557 return False;
12559 UInt bitQ = INSN(30,30);
12560 UInt bitU = INSN(29,29);
12561 UInt size = INSN(23,22);
12562 UInt mm = INSN(20,16);
12563 UInt opcode = INSN(14,11);
12564 UInt nn = INSN(9,5);
12565 UInt dd = INSN(4,0);
12566 vassert(size < 4);
12567 vassert(mm < 32 && nn < 32 && dd < 32);
12569 if (bitU == 1 && (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,0,0,1))) {
12570 /* -------- 0,xx,10110 SQRDMLAH s and h variants only -------- */
12571 /* -------- 1,xx,10110 SQRDMLSH s and h variants only -------- */
12572 if (size == X00 || size == X11) return False;
12573 Bool isAdd = opcode == BITS4(0,0,0,0);
12575 IRTemp res, res_nosat, vD, vN, vM;
12576 res = res_nosat = vD = vN = vM = IRTemp_INVALID;
12577 newTempsV128_3(&vD, &vN, &vM);
12578 assign(vD, getQReg128(dd));
12579 assign(vN, getQReg128(nn));
12580 assign(vM, getQReg128(mm));
12582 math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM);
12583 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
12584 updateQCFLAGwithDifferenceZHI(res, res_nosat, opZHI);
12585 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12587 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12588 const HChar* nm = isAdd ? "sqrdmlah" : "sqrdmlsh";
12589 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12590 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12591 return True;
12594 return False;
12595 # undef INSN
12599 static
12600 Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
12602 /* 31 30 29 28 23 21 16 11 9 4
12603 0 Q U 01110 size 10000 opcode 10 n d
12604 Decode fields: U,size,opcode
12606 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12607 if (INSN(31,31) != 0
12608 || INSN(28,24) != BITS5(0,1,1,1,0)
12609 || INSN(21,17) != BITS5(1,0,0,0,0)
12610 || INSN(11,10) != BITS2(1,0)) {
12611 return False;
12613 UInt bitQ = INSN(30,30);
12614 UInt bitU = INSN(29,29);
12615 UInt size = INSN(23,22);
12616 UInt opcode = INSN(16,12);
12617 UInt nn = INSN(9,5);
12618 UInt dd = INSN(4,0);
12619 vassert(size < 4);
12621 if (bitU == 0 && size <= X10 && opcode == BITS5(0,0,0,0,0)) {
12622 /* -------- 0,00,00000: REV64 16b_16b, 8b_8b -------- */
12623 /* -------- 0,01,00000: REV64 8h_8h, 4h_4h -------- */
12624 /* -------- 0,10,00000: REV64 4s_4s, 2s_2s -------- */
12625 const IROp iops[3] = { Iop_Reverse8sIn64_x2,
12626 Iop_Reverse16sIn64_x2, Iop_Reverse32sIn64_x2 };
12627 vassert(size <= 2);
12628 IRTemp res = newTempV128();
12629 assign(res, unop(iops[size], getQReg128(nn)));
12630 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12631 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12632 DIP("%s %s.%s, %s.%s\n", "rev64",
12633 nameQReg128(dd), arr, nameQReg128(nn), arr);
12634 return True;
12637 if (bitU == 1 && size <= X01 && opcode == BITS5(0,0,0,0,0)) {
12638 /* -------- 1,00,00000: REV32 16b_16b, 8b_8b -------- */
12639 /* -------- 1,01,00000: REV32 8h_8h, 4h_4h -------- */
12640 Bool isH = size == X01;
12641 IRTemp res = newTempV128();
12642 IROp iop = isH ? Iop_Reverse16sIn32_x4 : Iop_Reverse8sIn32_x4;
12643 assign(res, unop(iop, getQReg128(nn)));
12644 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12645 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12646 DIP("%s %s.%s, %s.%s\n", "rev32",
12647 nameQReg128(dd), arr, nameQReg128(nn), arr);
12648 return True;
12651 if (bitU == 0 && size == X00 && opcode == BITS5(0,0,0,0,1)) {
12652 /* -------- 0,00,00001: REV16 16b_16b, 8b_8b -------- */
12653 IRTemp res = newTempV128();
12654 assign(res, unop(Iop_Reverse8sIn16_x8, getQReg128(nn)));
12655 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12656 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12657 DIP("%s %s.%s, %s.%s\n", "rev16",
12658 nameQReg128(dd), arr, nameQReg128(nn), arr);
12659 return True;
12662 if (opcode == BITS5(0,0,0,1,0) || opcode == BITS5(0,0,1,1,0)) {
12663 /* -------- 0,xx,00010: SADDLP std6_std6 -------- */
12664 /* -------- 1,xx,00010: UADDLP std6_std6 -------- */
12665 /* -------- 0,xx,00110: SADALP std6_std6 -------- */
12666 /* -------- 1,xx,00110: UADALP std6_std6 -------- */
12667 /* Widens, and size refers to the narrow size. */
12668 if (size == X11) return False; // no 1d or 2d cases
12669 Bool isU = bitU == 1;
12670 Bool isACC = opcode == BITS5(0,0,1,1,0);
12671 IRTemp src = newTempV128();
12672 IRTemp sum = newTempV128();
12673 IRTemp res = newTempV128();
12674 assign(src, getQReg128(nn));
12675 assign(sum,
12676 binop(mkVecADD(size+1),
12677 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
12678 isU, True/*fromOdd*/, size, mkexpr(src))),
12679 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
12680 isU, False/*!fromOdd*/, size, mkexpr(src)))));
12681 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(sum), getQReg128(dd))
12682 : mkexpr(sum));
12683 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12684 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12685 const HChar* arrWide = nameArr_Q_SZ(bitQ, size+1);
12686 DIP("%s %s.%s, %s.%s\n", isACC ? (isU ? "uadalp" : "sadalp")
12687 : (isU ? "uaddlp" : "saddlp"),
12688 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
12689 return True;
12692 if (opcode == BITS5(0,0,0,1,1)) {
12693 /* -------- 0,xx,00011: SUQADD std7_std7 -------- */
12694 /* -------- 1,xx,00011: USQADD std7_std7 -------- */
12695 if (bitQ == 0 && size == X11) return False; // implied 1d case
12696 Bool isUSQADD = bitU == 1;
12697 /* This is switched (in the US vs SU sense) deliberately.
12698 SUQADD corresponds to the ExtUSsatSS variants and
12699 USQADD corresponds to the ExtSUsatUU variants.
12700 See libvex_ir for more details. */
12701 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
12702 : mkVecQADDEXTUSSATSS(size);
12703 IROp nop = mkVecADD(size);
12704 IRTemp argL = newTempV128();
12705 IRTemp argR = newTempV128();
12706 IRTemp qres = newTempV128();
12707 IRTemp nres = newTempV128();
12708 /* Because the two arguments to the addition are implicitly
12709 extended differently (one signedly, the other unsignedly) it is
12710 important to present them to the primop in the correct order. */
12711 assign(argL, getQReg128(nn));
12712 assign(argR, getQReg128(dd));
12713 assign(qres, math_MAYBE_ZERO_HI64_fromE(
12714 bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
12715 assign(nres, math_MAYBE_ZERO_HI64_fromE(
12716 bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
12717 putQReg128(dd, mkexpr(qres));
12718 updateQCFLAGwithDifference(qres, nres);
12719 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12720 DIP("%s %s.%s, %s.%s\n", isUSQADD ? "usqadd" : "suqadd",
12721 nameQReg128(dd), arr, nameQReg128(nn), arr);
12722 return True;
12725 if (opcode == BITS5(0,0,1,0,0)) {
12726 /* -------- 0,xx,00100: CLS std6_std6 -------- */
12727 /* -------- 1,xx,00100: CLZ std6_std6 -------- */
12728 if (size == X11) return False; // no 1d or 2d cases
12729 const IROp opsCLS[3] = { Iop_Cls8x16, Iop_Cls16x8, Iop_Cls32x4 };
12730 const IROp opsCLZ[3] = { Iop_Clz8x16, Iop_Clz16x8, Iop_Clz32x4 };
12731 Bool isCLZ = bitU == 1;
12732 IRTemp res = newTempV128();
12733 vassert(size <= 2);
12734 assign(res, unop(isCLZ ? opsCLZ[size] : opsCLS[size], getQReg128(nn)));
12735 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12736 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12737 DIP("%s %s.%s, %s.%s\n", isCLZ ? "clz" : "cls",
12738 nameQReg128(dd), arr, nameQReg128(nn), arr);
12739 return True;
12742 if (size == X00 && opcode == BITS5(0,0,1,0,1)) {
12743 /* -------- 0,00,00101: CNT 16b_16b, 8b_8b -------- */
12744 /* -------- 1,00,00101: NOT 16b_16b, 8b_8b -------- */
12745 IRTemp res = newTempV128();
12746 assign(res, unop(bitU == 0 ? Iop_Cnt8x16 : Iop_NotV128, getQReg128(nn)));
12747 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12748 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
12749 DIP("%s %s.%s, %s.%s\n", bitU == 0 ? "cnt" : "not",
12750 nameQReg128(dd), arr, nameQReg128(nn), arr);
12751 return True;
12754 if (bitU == 1 && size == X01 && opcode == BITS5(0,0,1,0,1)) {
12755 /* -------- 1,01,00101 RBIT 16b_16b, 8b_8b -------- */
12756 IRTemp res = newTempV128();
12757 assign(res, unop(Iop_Reverse1sIn8_x16, getQReg128(nn)));
12758 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12759 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
12760 DIP("%s %s.%s, %s.%s\n", "rbit",
12761 nameQReg128(dd), arr, nameQReg128(nn), arr);
12762 return True;
12765 if (opcode == BITS5(0,0,1,1,1)) {
12766 /* -------- 0,xx,00111 SQABS std7_std7 -------- */
12767 /* -------- 1,xx,00111 SQNEG std7_std7 -------- */
12768 if (bitQ == 0 && size == X11) return False; // implied 1d case
12769 Bool isNEG = bitU == 1;
12770 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
12771 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
12772 getQReg128(nn), size );
12773 IRTemp qres = newTempV128(), nres = newTempV128();
12774 assign(qres, math_MAYBE_ZERO_HI64(bitQ, qresFW));
12775 assign(nres, math_MAYBE_ZERO_HI64(bitQ, nresFW));
12776 putQReg128(dd, mkexpr(qres));
12777 updateQCFLAGwithDifference(qres, nres);
12778 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12779 DIP("%s %s.%s, %s.%s\n", isNEG ? "sqneg" : "sqabs",
12780 nameQReg128(dd), arr, nameQReg128(nn), arr);
12781 return True;
12784 if (opcode == BITS5(0,1,0,0,0)) {
12785 /* -------- 0,xx,01000: CMGT std7_std7_#0 -------- */ // >s 0
12786 /* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0
12787 if (bitQ == 0 && size == X11) return False; // implied 1d case
12788 Bool isGT = bitU == 0;
12789 IRExpr* argL = getQReg128(nn);
12790 IRExpr* argR = mkV128(0x0000);
12791 IRTemp res = newTempV128();
12792 IROp opGTS = mkVecCMPGTS(size);
12793 assign(res, isGT ? binop(opGTS, argL, argR)
12794 : unop(Iop_NotV128, binop(opGTS, argR, argL)));
12795 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12796 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12797 DIP("cm%s %s.%s, %s.%s, #0\n", isGT ? "gt" : "ge",
12798 nameQReg128(dd), arr, nameQReg128(nn), arr);
12799 return True;
12802 if (opcode == BITS5(0,1,0,0,1)) {
12803 /* -------- 0,xx,01001: CMEQ std7_std7_#0 -------- */ // == 0
12804 /* -------- 1,xx,01001: CMLE std7_std7_#0 -------- */ // <=s 0
12805 if (bitQ == 0 && size == X11) return False; // implied 1d case
12806 Bool isEQ = bitU == 0;
12807 IRExpr* argL = getQReg128(nn);
12808 IRExpr* argR = mkV128(0x0000);
12809 IRTemp res = newTempV128();
12810 assign(res, isEQ ? binop(mkVecCMPEQ(size), argL, argR)
12811 : unop(Iop_NotV128,
12812 binop(mkVecCMPGTS(size), argL, argR)));
12813 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12814 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12815 DIP("cm%s %s.%s, %s.%s, #0\n", isEQ ? "eq" : "le",
12816 nameQReg128(dd), arr, nameQReg128(nn), arr);
12817 return True;
12820 if (bitU == 0 && opcode == BITS5(0,1,0,1,0)) {
12821 /* -------- 0,xx,01010: CMLT std7_std7_#0 -------- */ // <s 0
12822 if (bitQ == 0 && size == X11) return False; // implied 1d case
12823 IRExpr* argL = getQReg128(nn);
12824 IRExpr* argR = mkV128(0x0000);
12825 IRTemp res = newTempV128();
12826 assign(res, binop(mkVecCMPGTS(size), argR, argL));
12827 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12828 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12829 DIP("cm%s %s.%s, %s.%s, #0\n", "lt",
12830 nameQReg128(dd), arr, nameQReg128(nn), arr);
12831 return True;
12834 if (bitU == 0 && opcode == BITS5(0,1,0,1,1)) {
12835 /* -------- 0,xx,01011: ABS std7_std7 -------- */
12836 if (bitQ == 0 && size == X11) return False; // implied 1d case
12837 IRTemp res = newTempV128();
12838 assign(res, unop(mkVecABS(size), getQReg128(nn)));
12839 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12840 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12841 DIP("abs %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
12842 return True;
12845 if (bitU == 1 && opcode == BITS5(0,1,0,1,1)) {
12846 /* -------- 1,xx,01011: NEG std7_std7 -------- */
12847 if (bitQ == 0 && size == X11) return False; // implied 1d case
12848 IRTemp res = newTempV128();
12849 assign(res, binop(mkVecSUB(size), mkV128(0x0000), getQReg128(nn)));
12850 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12851 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12852 DIP("neg %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
12853 return True;
12856 UInt ix = 0; /*INVALID*/
12857 if (size >= X10) {
12858 switch (opcode) {
12859 case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
12860 case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
12861 case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
12862 default: break;
12865 if (ix > 0) {
12866 /* -------- 0,1x,01100 FCMGT 2d_2d,4s_4s,2s_2s _#0.0 (ix 1) -------- */
12867 /* -------- 0,1x,01101 FCMEQ 2d_2d,4s_4s,2s_2s _#0.0 (ix 2) -------- */
12868 /* -------- 0,1x,01110 FCMLT 2d_2d,4s_4s,2s_2s _#0.0 (ix 3) -------- */
12869 /* -------- 1,1x,01100 FCMGE 2d_2d,4s_4s,2s_2s _#0.0 (ix 4) -------- */
12870 /* -------- 1,1x,01101 FCMLE 2d_2d,4s_4s,2s_2s _#0.0 (ix 5) -------- */
12871 if (bitQ == 0 && size == X11) return False; // implied 1d case
12872 Bool isD = size == X11;
12873 IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
12874 IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
12875 IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
12876 IROp opCmp = Iop_INVALID;
12877 Bool swap = False;
12878 const HChar* nm = "??";
12879 switch (ix) {
12880 case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
12881 case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
12882 case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
12883 case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
12884 case 5: nm = "fcmle"; opCmp = opCmpLE; break;
12885 default: vassert(0);
12887 IRExpr* zero = mkV128(0x0000);
12888 IRTemp res = newTempV128();
12889 assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
12890 : binop(opCmp, getQReg128(nn), zero));
12891 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12892 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12893 DIP("%s %s.%s, %s.%s, #0.0\n", nm,
12894 nameQReg128(dd), arr, nameQReg128(nn), arr);
12895 return True;
12898 if (size >= X10 && opcode == BITS5(0,1,1,1,1)) {
12899 /* -------- 0,1x,01111: FABS 2d_2d, 4s_4s, 2s_2s -------- */
12900 /* -------- 1,1x,01111: FNEG 2d_2d, 4s_4s, 2s_2s -------- */
12901 if (bitQ == 0 && size == X11) return False; // implied 1d case
12902 Bool isFNEG = bitU == 1;
12903 IROp op = isFNEG ? (size == X10 ? Iop_Neg32Fx4 : Iop_Neg64Fx2)
12904 : (size == X10 ? Iop_Abs32Fx4 : Iop_Abs64Fx2);
12905 IRTemp res = newTempV128();
12906 assign(res, unop(op, getQReg128(nn)));
12907 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12908 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12909 DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
12910 nameQReg128(dd), arr, nameQReg128(nn), arr);
12911 return True;
12914 if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
12915 /* -------- 0,xx,10010: XTN{,2} -------- */
12916 if (size == X11) return False;
12917 vassert(size < 3);
12918 Bool is2 = bitQ == 1;
12919 IROp opN = mkVecNARROWUN(size);
12920 IRTemp resN = newTempV128();
12921 assign(resN, unop(Iop_64UtoV128, unop(opN, getQReg128(nn))));
12922 putLO64andZUorPutHI64(is2, dd, resN);
12923 const HChar* nm = "xtn";
12924 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12925 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12926 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
12927 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12928 return True;
12931 if (opcode == BITS5(1,0,1,0,0)
12932 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
12933 /* -------- 0,xx,10100: SQXTN{,2} -------- */
12934 /* -------- 1,xx,10100: UQXTN{,2} -------- */
12935 /* -------- 1,xx,10010: SQXTUN{,2} -------- */
12936 if (size == X11) return False;
12937 vassert(size < 3);
12938 Bool is2 = bitQ == 1;
12939 IROp opN = Iop_INVALID;
12940 Bool zWiden = True;
12941 const HChar* nm = "??";
12942 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
12943 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
12945 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
12946 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
12948 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
12949 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
12951 else vassert(0);
12952 IRTemp src = newTempV128();
12953 assign(src, getQReg128(nn));
12954 IRTemp resN = newTempV128();
12955 assign(resN, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
12956 putLO64andZUorPutHI64(is2, dd, resN);
12957 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
12958 size, mkexpr(resN));
12959 updateQCFLAGwithDifference(src, resW);
12960 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12961 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12962 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
12963 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12964 return True;
12967 if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
12968 /* -------- 1,xx,10011 SHLL{2} #lane-width -------- */
12969 /* Widens, and size is the narrow size. */
12970 if (size == X11) return False;
12971 Bool is2 = bitQ == 1;
12972 IROp opINT = is2 ? mkVecINTERLEAVEHI(size) : mkVecINTERLEAVELO(size);
12973 IROp opSHL = mkVecSHLN(size+1);
12974 IRTemp src = newTempV128();
12975 IRTemp res = newTempV128();
12976 assign(src, getQReg128(nn));
12977 assign(res, binop(opSHL, binop(opINT, mkexpr(src), mkexpr(src)),
12978 mkU8(8 << size)));
12979 putQReg128(dd, mkexpr(res));
12980 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12981 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12982 DIP("shll%s %s.%s, %s.%s, #%d\n", is2 ? "2" : "",
12983 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow, 8 << size);
12984 return True;
12987 if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,0)) {
12988 /* -------- 0,0x,10110: FCVTN 4h/8h_4s, 2s/4s_2d -------- */
12989 UInt nLanes = size == X00 ? 4 : 2;
12990 IRType srcTy = size == X00 ? Ity_F32 : Ity_F64;
12991 IROp opCvt = size == X00 ? Iop_F32toF16 : Iop_F64toF32;
12992 IRTemp rm = mk_get_IR_rounding_mode();
12993 IRTemp src[nLanes];
12994 for (UInt i = 0; i < nLanes; i++) {
12995 src[i] = newTemp(srcTy);
12996 assign(src[i], getQRegLane(nn, i, srcTy));
12998 for (UInt i = 0; i < nLanes; i++) {
12999 putQRegLane(dd, nLanes * bitQ + i,
13000 binop(opCvt, mkexpr(rm), mkexpr(src[i])));
13002 if (bitQ == 0) {
13003 putQRegLane(dd, 1, mkU64(0));
13005 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
13006 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
13007 DIP("fcvtn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
13008 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
13009 return True;
13012 if (bitU == 1 && size == X01 && opcode == BITS5(1,0,1,1,0)) {
13013 /* -------- 1,01,10110: FCVTXN 2s/4s_2d -------- */
13014 /* Using Irrm_NEAREST here isn't right. The docs say "round to
13015 odd" but I don't know what that really means. */
13016 IRType srcTy = Ity_F64;
13017 IROp opCvt = Iop_F64toF32;
13018 IRTemp src[2];
13019 for (UInt i = 0; i < 2; i++) {
13020 src[i] = newTemp(srcTy);
13021 assign(src[i], getQRegLane(nn, i, srcTy));
13023 for (UInt i = 0; i < 2; i++) {
13024 putQRegLane(dd, 2 * bitQ + i,
13025 binop(opCvt, mkU32(Irrm_NEAREST), mkexpr(src[i])));
13027 if (bitQ == 0) {
13028 putQRegLane(dd, 1, mkU64(0));
13030 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
13031 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
13032 DIP("fcvtxn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
13033 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
13034 return True;
13037 if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,1)) {
13038 /* -------- 0,0x,10111: FCVTL 4s_4h/8h, 2d_2s/4s -------- */
13039 UInt nLanes = size == X00 ? 4 : 2;
13040 IRType srcTy = size == X00 ? Ity_F16 : Ity_F32;
13041 IROp opCvt = size == X00 ? Iop_F16toF32 : Iop_F32toF64;
13042 IRTemp src[nLanes];
13043 for (UInt i = 0; i < nLanes; i++) {
13044 src[i] = newTemp(srcTy);
13045 assign(src[i], getQRegLane(nn, nLanes * bitQ + i, srcTy));
13047 for (UInt i = 0; i < nLanes; i++) {
13048 putQRegLane(dd, i, unop(opCvt, mkexpr(src[i])));
13050 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
13051 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
13052 DIP("fcvtl%s %s.%s, %s.%s\n", bitQ ? "2" : "",
13053 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
13054 return True;
13057 ix = 0;
13058 if (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,0,0,1)) {
13059 ix = 1 + ((((bitU & 1) << 2) | ((size & 2) << 0)) | ((opcode & 1) << 0));
13060 // = 1 + bitU[0]:size[1]:opcode[0]
13061 vassert(ix >= 1 && ix <= 8);
13062 if (ix == 7) ix = 0;
13064 if (ix > 0) {
13065 /* -------- 0,0x,11000 FRINTN 2d_2d, 4s_4s, 2s_2s (1) -------- */
13066 /* -------- 0,0x,11001 FRINTM 2d_2d, 4s_4s, 2s_2s (2) -------- */
13067 /* -------- 0,1x,11000 FRINTP 2d_2d, 4s_4s, 2s_2s (3) -------- */
13068 /* -------- 0,1x,11001 FRINTZ 2d_2d, 4s_4s, 2s_2s (4) -------- */
13069 /* -------- 1,0x,11000 FRINTA 2d_2d, 4s_4s, 2s_2s (5) -------- */
13070 /* -------- 1,0x,11001 FRINTX 2d_2d, 4s_4s, 2s_2s (6) -------- */
13071 /* -------- 1,1x,11000 (apparently unassigned) (7) -------- */
13072 /* -------- 1,1x,11001 FRINTI 2d_2d, 4s_4s, 2s_2s (8) -------- */
13073 /* rm plan:
13074 FRINTN: tieeven -- !! FIXME KLUDGED !!
13075 FRINTM: -inf
13076 FRINTP: +inf
13077 FRINTZ: zero
13078 FRINTA: tieaway -- !! FIXME KLUDGED !!
13079 FRINTX: per FPCR + "exact = TRUE"
13080 FRINTI: per FPCR
13082 Bool isD = (size & 1) == 1;
13083 if (bitQ == 0 && isD) return False; // implied 1d case
13085 IRTemp irrmRM = mk_get_IR_rounding_mode();
13087 UChar ch = '?';
13088 IRTemp irrm = newTemp(Ity_I32);
13089 switch (ix) {
13090 case 1: ch = 'n'; assign(irrm, mkU32(Irrm_NEAREST)); break;
13091 case 2: ch = 'm'; assign(irrm, mkU32(Irrm_NegINF)); break;
13092 case 3: ch = 'p'; assign(irrm, mkU32(Irrm_PosINF)); break;
13093 case 4: ch = 'z'; assign(irrm, mkU32(Irrm_ZERO)); break;
13094 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
13095 case 5: ch = 'a'; assign(irrm, mkU32(Irrm_NEAREST)); break;
13096 // I am unsure about the following, due to the "integral exact"
13097 // description in the manual. What does it mean? (frintx, that is)
13098 case 6: ch = 'x'; assign(irrm, mkexpr(irrmRM)); break;
13099 case 8: ch = 'i'; assign(irrm, mkexpr(irrmRM)); break;
13100 default: vassert(0);
13103 IROp opRND = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt;
13104 if (isD) {
13105 for (UInt i = 0; i < 2; i++) {
13106 putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
13107 getQRegLane(nn, i, Ity_F64)));
13109 } else {
13110 UInt n = bitQ==1 ? 4 : 2;
13111 for (UInt i = 0; i < n; i++) {
13112 putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
13113 getQRegLane(nn, i, Ity_F32)));
13115 if (bitQ == 0)
13116 putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
13118 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13119 DIP("frint%c %s.%s, %s.%s\n", ch,
13120 nameQReg128(dd), arr, nameQReg128(nn), arr);
13121 return True;
13124 ix = 0; /*INVALID*/
13125 switch (opcode) {
13126 case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
13127 case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
13128 case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
13129 default: break;
13131 if (ix > 0) {
13132 /* -------- 0,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
13133 /* -------- 0,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
13134 /* -------- 0,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
13135 /* -------- 0,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
13136 /* -------- 0,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
13137 /* -------- 1,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
13138 /* -------- 1,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
13139 /* -------- 1,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
13140 /* -------- 1,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
13141 /* -------- 1,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
13142 Bool isD = (size & 1) == 1;
13143 if (bitQ == 0 && isD) return False; // implied 1d case
13145 IRRoundingMode irrm = 8; /*impossible*/
13146 HChar ch = '?';
13147 switch (ix) {
13148 case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
13149 case 2: ch = 'm'; irrm = Irrm_NegINF; break;
13150 case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
13151 case 4: ch = 'p'; irrm = Irrm_PosINF; break;
13152 case 5: ch = 'z'; irrm = Irrm_ZERO; break;
13153 default: vassert(0);
13155 IROp cvt = Iop_INVALID;
13156 if (bitU == 1) {
13157 cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
13158 } else {
13159 cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
13161 if (isD) {
13162 for (UInt i = 0; i < 2; i++) {
13163 putQRegLane(dd, i, binop(cvt, mkU32(irrm),
13164 getQRegLane(nn, i, Ity_F64)));
13166 } else {
13167 UInt n = bitQ==1 ? 4 : 2;
13168 for (UInt i = 0; i < n; i++) {
13169 putQRegLane(dd, i, binop(cvt, mkU32(irrm),
13170 getQRegLane(nn, i, Ity_F32)));
13172 if (bitQ == 0)
13173 putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
13175 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13176 DIP("fcvt%c%c %s.%s, %s.%s\n", ch, bitU == 1 ? 'u' : 's',
13177 nameQReg128(dd), arr, nameQReg128(nn), arr);
13178 return True;
13181 if (size == X10 && opcode == BITS5(1,1,1,0,0)) {
13182 /* -------- 0,10,11100: URECPE 4s_4s, 2s_2s -------- */
13183 /* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */
13184 Bool isREC = bitU == 0;
13185 IROp op = isREC ? Iop_RecipEst32Ux4 : Iop_RSqrtEst32Ux4;
13186 IRTemp res = newTempV128();
13187 assign(res, unop(op, getQReg128(nn)));
13188 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13189 const HChar* nm = isREC ? "urecpe" : "ursqrte";
13190 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13191 DIP("%s %s.%s, %s.%s\n", nm,
13192 nameQReg128(dd), arr, nameQReg128(nn), arr);
13193 return True;
13196 if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
13197 /* -------- 0,0x,11101: SCVTF -------- */
13198 /* -------- 1,0x,11101: UCVTF -------- */
13199 /* 31 28 22 21 15 9 4
13200 0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn
13201 0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn
13202 with laneage:
13203 case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
13205 Bool isQ = bitQ == 1;
13206 Bool isU = bitU == 1;
13207 Bool isF64 = (size & 1) == 1;
13208 if (isQ || !isF64) {
13209 IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
13210 UInt nLanes = 0;
13211 Bool zeroHI = False;
13212 const HChar* arrSpec = NULL;
13213 Bool ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
13214 isQ, isF64 );
13215 IROp iop = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
13216 : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
13217 IRTemp rm = mk_get_IR_rounding_mode();
13218 UInt i;
13219 vassert(ok); /* the 'if' above should ensure this */
13220 for (i = 0; i < nLanes; i++) {
13221 putQRegLane(dd, i,
13222 binop(iop, mkexpr(rm), getQRegLane(nn, i, tyI)));
13224 if (zeroHI) {
13225 putQRegLane(dd, 1, mkU64(0));
13227 DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
13228 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
13229 return True;
13231 /* else fall through */
13234 if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
13235 /* -------- 0,1x,11101: FRECPE 2d_2d, 4s_4s, 2s_2s -------- */
13236 /* -------- 1,1x,11101: FRSQRTE 2d_2d, 4s_4s, 2s_2s -------- */
13237 Bool isSQRT = bitU == 1;
13238 Bool isD = (size & 1) == 1;
13239 IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
13240 : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
13241 if (bitQ == 0 && isD) return False; // implied 1d case
13242 IRTemp resV = newTempV128();
13243 assign(resV, unop(op, getQReg128(nn)));
13244 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
13245 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
13246 DIP("%s %s.%s, %s.%s\n", isSQRT ? "frsqrte" : "frecpe",
13247 nameQReg128(dd), arr, nameQReg128(nn), arr);
13248 return True;
13251 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
13252 /* -------- 1,1x,11111: FSQRT 2d_2d, 4s_4s, 2s_2s -------- */
13253 Bool isD = (size & 1) == 1;
13254 IROp op = isD ? Iop_Sqrt64Fx2 : Iop_Sqrt32Fx4;
13255 if (bitQ == 0 && isD) return False; // implied 1d case
13256 IRTemp resV = newTempV128();
13257 assign(resV, binop(op, mkexpr(mk_get_IR_rounding_mode()),
13258 getQReg128(nn)));
13259 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
13260 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
13261 DIP("%s %s.%s, %s.%s\n", "fsqrt",
13262 nameQReg128(dd), arr, nameQReg128(nn), arr);
13263 return True;
13266 return False;
13267 # undef INSN
13271 static
13272 Bool dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult* dres, UInt insn)
13274 /* 31 28 23 21 20 19 15 11 9 4
13275 0 Q U 01111 size L M m opcode H 0 n d
13276 Decode fields are: u,size,opcode
13277 M is really part of the mm register number. Individual
13278 cases need to inspect L and H though.
13280 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13281 if (INSN(31,31) != 0
13282 || INSN(28,24) != BITS5(0,1,1,1,1) || INSN(10,10) !=0) {
13283 return False;
13285 UInt bitQ = INSN(30,30);
13286 UInt bitU = INSN(29,29);
13287 UInt size = INSN(23,22);
13288 UInt bitL = INSN(21,21);
13289 UInt bitM = INSN(20,20);
13290 UInt mmLO4 = INSN(19,16);
13291 UInt opcode = INSN(15,12);
13292 UInt bitH = INSN(11,11);
13293 UInt nn = INSN(9,5);
13294 UInt dd = INSN(4,0);
13295 vassert(size < 4);
13296 vassert(bitH < 2 && bitM < 2 && bitL < 2);
13298 if (bitU == 0 && size >= X10
13299 && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
13300 /* -------- 0,1x,0001 FMLA 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
13301 /* -------- 0,1x,0101 FMLS 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
13302 if (bitQ == 0 && size == X11) return False; // implied 1d case
13303 Bool isD = (size & 1) == 1;
13304 Bool isSUB = opcode == BITS4(0,1,0,1);
13305 UInt index;
13306 if (!isD) index = (bitH << 1) | bitL;
13307 else if (isD && bitL == 0) index = bitH;
13308 else return False; // sz:L == x11 => unallocated encoding
13309 vassert(index < (isD ? 2 : 4));
13310 IRType ity = isD ? Ity_F64 : Ity_F32;
13311 IRTemp elem = newTemp(ity);
13312 UInt mm = (bitM << 4) | mmLO4;
13313 assign(elem, getQRegLane(mm, index, ity));
13314 IRTemp dupd = math_DUP_TO_V128(elem, ity);
13315 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
13316 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
13317 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
13318 IRTemp rm = mk_get_IR_rounding_mode();
13319 IRTemp t1 = newTempV128();
13320 IRTemp t2 = newTempV128();
13321 // FIXME: double rounding; use FMA primops instead
13322 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
13323 assign(t2, triop(isSUB ? opSUB : opADD,
13324 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
13325 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
13326 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
13327 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
13328 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm),
13329 isD ? 'd' : 's', index);
13330 return True;
13333 if (size >= X10 && opcode == BITS4(1,0,0,1)) {
13334 /* -------- 0,1x,1001 FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
13335 /* -------- 1,1x,1001 FMULX 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
13336 if (bitQ == 0 && size == X11) return False; // implied 1d case
13337 Bool isD = (size & 1) == 1;
13338 Bool isMULX = bitU == 1;
13339 UInt index;
13340 if (!isD) index = (bitH << 1) | bitL;
13341 else if (isD && bitL == 0) index = bitH;
13342 else return False; // sz:L == x11 => unallocated encoding
13343 vassert(index < (isD ? 2 : 4));
13344 IRType ity = isD ? Ity_F64 : Ity_F32;
13345 IRTemp elem = newTemp(ity);
13346 UInt mm = (bitM << 4) | mmLO4;
13347 assign(elem, getQRegLane(mm, index, ity));
13348 IRTemp dupd = math_DUP_TO_V128(elem, ity);
13349 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
13350 IRTemp res = newTempV128();
13351 assign(res, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
13352 mkexpr(mk_get_IR_rounding_mode()),
13353 getQReg128(nn), mkexpr(dupd)));
13354 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13355 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
13356 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n",
13357 isMULX ? "fmulx" : "fmul", nameQReg128(dd), arr,
13358 nameQReg128(nn), arr, nameQReg128(mm), isD ? 'd' : 's', index);
13359 return True;
13362 if ((bitU == 1 && (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,1,0,0)))
13363 || (bitU == 0 && opcode == BITS4(1,0,0,0))) {
13364 /* -------- 1,xx,0000 MLA s/h variants only -------- */
13365 /* -------- 1,xx,0100 MLS s/h variants only -------- */
13366 /* -------- 0,xx,1000 MUL s/h variants only -------- */
13367 Bool isMLA = opcode == BITS4(0,0,0,0);
13368 Bool isMLS = opcode == BITS4(0,1,0,0);
13369 UInt mm = 32; // invalid
13370 UInt ix = 16; // invalid
13371 switch (size) {
13372 case X00:
13373 return False; // b case is not allowed
13374 case X01:
13375 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
13376 case X10:
13377 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
13378 case X11:
13379 return False; // d case is not allowed
13380 default:
13381 vassert(0);
13383 vassert(mm < 32 && ix < 16);
13384 IROp opMUL = mkVecMUL(size);
13385 IROp opADD = mkVecADD(size);
13386 IROp opSUB = mkVecSUB(size);
13387 HChar ch = size == X01 ? 'h' : 's';
13388 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
13389 IRTemp vecD = newTempV128();
13390 IRTemp vecN = newTempV128();
13391 IRTemp res = newTempV128();
13392 assign(vecD, getQReg128(dd));
13393 assign(vecN, getQReg128(nn));
13394 IRExpr* prod = binop(opMUL, mkexpr(vecN), mkexpr(vecM));
13395 if (isMLA || isMLS) {
13396 assign(res, binop(isMLA ? opADD : opSUB, mkexpr(vecD), prod));
13397 } else {
13398 assign(res, prod);
13400 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13401 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13402 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isMLA ? "mla"
13403 : (isMLS ? "mls" : "mul"),
13404 nameQReg128(dd), arr,
13405 nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
13406 return True;
13409 if (opcode == BITS4(1,0,1,0)
13410 || opcode == BITS4(0,0,1,0) || opcode == BITS4(0,1,1,0)) {
13411 /* -------- 0,xx,1010 SMULL s/h variants only -------- */ // 0 (ks)
13412 /* -------- 1,xx,1010 UMULL s/h variants only -------- */ // 0
13413 /* -------- 0,xx,0010 SMLAL s/h variants only -------- */ // 1
13414 /* -------- 1,xx,0010 UMLAL s/h variants only -------- */ // 1
13415 /* -------- 0,xx,0110 SMLSL s/h variants only -------- */ // 2
13416 /* -------- 1,xx,0110 SMLSL s/h variants only -------- */ // 2
13417 /* Widens, and size refers to the narrowed lanes. */
13418 UInt ks = 3;
13419 switch (opcode) {
13420 case BITS4(1,0,1,0): ks = 0; break;
13421 case BITS4(0,0,1,0): ks = 1; break;
13422 case BITS4(0,1,1,0): ks = 2; break;
13423 default: vassert(0);
13425 vassert(ks >= 0 && ks <= 2);
13426 Bool isU = bitU == 1;
13427 Bool is2 = bitQ == 1;
13428 UInt mm = 32; // invalid
13429 UInt ix = 16; // invalid
13430 switch (size) {
13431 case X00:
13432 return False; // h_b_b[] case is not allowed
13433 case X01:
13434 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
13435 case X10:
13436 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
13437 case X11:
13438 return False; // q_d_d[] case is not allowed
13439 default:
13440 vassert(0);
13442 vassert(mm < 32 && ix < 16);
13443 IRTemp vecN = newTempV128();
13444 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
13445 IRTemp vecD = newTempV128();
13446 assign(vecN, getQReg128(nn));
13447 assign(vecD, getQReg128(dd));
13448 IRTemp res = IRTemp_INVALID;
13449 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
13450 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
13451 putQReg128(dd, mkexpr(res));
13452 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
13453 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
13454 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
13455 HChar ch = size == X01 ? 'h' : 's';
13456 DIP("%c%s%s %s.%s, %s.%s, %s.%c[%u]\n",
13457 isU ? 'u' : 's', nm, is2 ? "2" : "",
13458 nameQReg128(dd), arrWide,
13459 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
13460 return True;
13463 if (bitU == 0
13464 && (opcode == BITS4(1,0,1,1)
13465 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
13466 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
13467 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
13468 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
13469 /* Widens, and size refers to the narrowed lanes. */
13470 UInt ks = 3;
13471 switch (opcode) {
13472 case BITS4(1,0,1,1): ks = 0; break;
13473 case BITS4(0,0,1,1): ks = 1; break;
13474 case BITS4(0,1,1,1): ks = 2; break;
13475 default: vassert(0);
13477 vassert(ks >= 0 && ks <= 2);
13478 Bool is2 = bitQ == 1;
13479 UInt mm = 32; // invalid
13480 UInt ix = 16; // invalid
13481 switch (size) {
13482 case X00:
13483 return False; // h_b_b[] case is not allowed
13484 case X01:
13485 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
13486 case X10:
13487 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
13488 case X11:
13489 return False; // q_d_d[] case is not allowed
13490 default:
13491 vassert(0);
13493 vassert(mm < 32 && ix < 16);
13494 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
13495 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
13496 newTempsV128_2(&vecN, &vecD);
13497 assign(vecN, getQReg128(nn));
13498 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
13499 assign(vecD, getQReg128(dd));
13500 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
13501 is2, size, "mas"[ks],
13502 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
13503 putQReg128(dd, mkexpr(res));
13504 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
13505 updateQCFLAGwithDifference(sat1q, sat1n);
13506 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
13507 updateQCFLAGwithDifference(sat2q, sat2n);
13509 const HChar* nm = ks == 0 ? "sqdmull"
13510 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
13511 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
13512 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
13513 HChar ch = size == X01 ? 'h' : 's';
13514 DIP("%s%s %s.%s, %s.%s, %s.%c[%u]\n",
13515 nm, is2 ? "2" : "",
13516 nameQReg128(dd), arrWide,
13517 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
13518 return True;
13521 if (bitU == 0 && (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1))) {
13522 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
13523 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
13524 UInt mm = 32; // invalid
13525 UInt ix = 16; // invalid
13526 switch (size) {
13527 case X00:
13528 return False; // b case is not allowed
13529 case X01:
13530 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
13531 case X10:
13532 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
13533 case X11:
13534 return False; // q case is not allowed
13535 default:
13536 vassert(0);
13538 vassert(mm < 32 && ix < 16);
13539 Bool isR = opcode == BITS4(1,1,0,1);
13540 IRTemp res, sat1q, sat1n, vN, vM;
13541 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
13542 vN = newTempV128();
13543 assign(vN, getQReg128(nn));
13544 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
13545 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
13546 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13547 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
13548 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
13549 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
13550 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13551 HChar ch = size == X01 ? 'h' : 's';
13552 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm,
13553 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
13554 return True;
13557 if (bitU == 1 && (opcode == BITS4(1,1,0,1) || opcode == BITS4(1,1,1,1))) {
13558 /* -------- 0,xx,1101 SQRDMLAH s and h variants only -------- */
13559 /* -------- 0,xx,1111 SQRDMLSH s and h variants only -------- */
13560 UInt mm = 32; // invalid
13561 UInt ix = 16; // invalid
13562 switch (size) {
13563 case X00:
13564 return False; // b case is not allowed
13565 case X01: // h
13566 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
13567 case X10: // s
13568 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
13569 case X11:
13570 return False; // d case is not allowed
13571 default:
13572 vassert(0);
13574 vassert(mm < 32 && ix < 16);
13576 IRTemp res, res_nosat, vD, vN, vM;
13577 res = res_nosat = vD = vN = vM = IRTemp_INVALID;
13578 newTempsV128_2(&vD, &vN);
13579 assign(vD, getQReg128(dd));
13580 assign(vN, getQReg128(nn));
13582 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
13583 Bool isAdd = opcode == BITS4(1,1,0,1);
13584 math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM);
13585 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
13586 updateQCFLAGwithDifferenceZHI(res, res_nosat, opZHI);
13587 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13589 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13590 const HChar* nm = isAdd ? "sqrdmlah" : "sqrdmlsh";
13591 HChar ch = size == X01 ? 'h' : 's';
13592 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm,
13593 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), ch, ix);
13594 return True;
13597 return False;
13598 # undef INSN
13602 static
13603 Bool dis_AdvSIMD_crypto_aes(/*MB_OUT*/DisResult* dres, UInt insn)
13605 /* 31 23 21 16 11 9 4
13606 0100 1110 size 10100 opcode 10 n d
13607 Decode fields are: size,opcode
13608 Size is always 00 in ARMv8, it appears.
13610 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13611 if (INSN(31,24) != BITS8(0,1,0,0,1,1,1,0)
13612 || INSN(21,17) != BITS5(1,0,1,0,0) || INSN(11,10) != BITS2(1,0)) {
13613 return False;
13615 UInt size = INSN(23,22);
13616 UInt opcode = INSN(16,12);
13617 UInt nn = INSN(9,5);
13618 UInt dd = INSN(4,0);
13620 if (size == BITS2(0,0)
13621 && (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,0,1))) {
13622 /* -------- 00,00100: AESE Vd.16b, Vn.16b -------- */
13623 /* -------- 00,00101: AESD Vd.16b, Vn.16b -------- */
13624 Bool isD = opcode == BITS5(0,0,1,0,1);
13625 IRTemp op1 = newTemp(Ity_V128);
13626 IRTemp op2 = newTemp(Ity_V128);
13627 IRTemp xord = newTemp(Ity_V128);
13628 IRTemp res = newTemp(Ity_V128);
13629 void* helper = isD ? &arm64g_dirtyhelper_AESD
13630 : &arm64g_dirtyhelper_AESE;
13631 const HChar* hname = isD ? "arm64g_dirtyhelper_AESD"
13632 : "arm64g_dirtyhelper_AESE";
13633 assign(op1, getQReg128(dd));
13634 assign(op2, getQReg128(nn));
13635 assign(xord, binop(Iop_XorV128, mkexpr(op1), mkexpr(op2)));
13636 IRDirty* di
13637 = unsafeIRDirty_1_N( res, 0/*regparms*/, hname, helper,
13638 mkIRExprVec_3(
13639 IRExpr_VECRET(),
13640 unop(Iop_V128HIto64, mkexpr(xord)),
13641 unop(Iop_V128to64, mkexpr(xord)) ) );
13642 stmt(IRStmt_Dirty(di));
13643 putQReg128(dd, mkexpr(res));
13644 DIP("aes%c %s.16b, %s.16b\n", isD ? 'd' : 'e',
13645 nameQReg128(dd), nameQReg128(nn));
13646 return True;
13649 if (size == BITS2(0,0)
13650 && (opcode == BITS5(0,0,1,1,0) || opcode == BITS5(0,0,1,1,1))) {
13651 /* -------- 00,00110: AESMC Vd.16b, Vn.16b -------- */
13652 /* -------- 00,00111: AESIMC Vd.16b, Vn.16b -------- */
13653 Bool isI = opcode == BITS5(0,0,1,1,1);
13654 IRTemp src = newTemp(Ity_V128);
13655 IRTemp res = newTemp(Ity_V128);
13656 void* helper = isI ? &arm64g_dirtyhelper_AESIMC
13657 : &arm64g_dirtyhelper_AESMC;
13658 const HChar* hname = isI ? "arm64g_dirtyhelper_AESIMC"
13659 : "arm64g_dirtyhelper_AESMC";
13660 assign(src, getQReg128(nn));
13661 IRDirty* di
13662 = unsafeIRDirty_1_N( res, 0/*regparms*/, hname, helper,
13663 mkIRExprVec_3(
13664 IRExpr_VECRET(),
13665 unop(Iop_V128HIto64, mkexpr(src)),
13666 unop(Iop_V128to64, mkexpr(src)) ) );
13667 stmt(IRStmt_Dirty(di));
13668 putQReg128(dd, mkexpr(res));
13669 DIP("aes%s %s.16b, %s.16b\n", isI ? "imc" : "mc",
13670 nameQReg128(dd), nameQReg128(nn));
13671 return True;
13674 return False;
13675 # undef INSN
13679 static
13680 Bool dis_AdvSIMD_crypto_three_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
13682 /* 31 28 23 21 20 15 14 11 9 4
13683 0101 1110 sz 0 m 0 opc 00 n d
13684 Decode fields are: sz,opc
13686 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13687 if (INSN(31,24) != BITS8(0,1,0,1,1,1,1,0) || INSN(21,21) != 0
13688 || INSN(15,15) != 0 || INSN(11,10) != BITS2(0,0)) {
13689 return False;
13691 UInt sz = INSN(23,22);
13692 UInt mm = INSN(20,16);
13693 UInt opc = INSN(14,12);
13694 UInt nn = INSN(9,5);
13695 UInt dd = INSN(4,0);
13696 if (sz == BITS2(0,0) && opc <= BITS3(1,1,0)) {
13697 /* -------- 00,000 SHA1C Qd, Sn, Vm.4S -------- */
13698 /* -------- 00,001 SHA1P Qd, Sn, Vm.4S -------- */
13699 /* -------- 00,010 SHA1M Qd, Sn, Vm.4S -------- */
13700 /* -------- 00,011 SHA1SU0 Vd.4S, Vn.4S, Vm.4S -------- */
13701 /* -------- 00,100 SHA256H Qd, Qn, Vm.4S -------- */
13702 /* -------- 00,101 SHA256H2 Qd, Qn, Vm.4S -------- */
13703 /* -------- 00,110 SHA256SU1 Vd.4S, Vn.4S, Vm.4S -------- */
13704 vassert(opc < 7);
13705 const HChar* inames[7]
13706 = { "sha1c", "sha1p", "sha1m", "sha1su0",
13707 "sha256h", "sha256h2", "sha256su1" };
13708 void(*helpers[7])(V128*,ULong,ULong,ULong,ULong,ULong,ULong)
13709 = { &arm64g_dirtyhelper_SHA1C, &arm64g_dirtyhelper_SHA1P,
13710 &arm64g_dirtyhelper_SHA1M, &arm64g_dirtyhelper_SHA1SU0,
13711 &arm64g_dirtyhelper_SHA256H, &arm64g_dirtyhelper_SHA256H2,
13712 &arm64g_dirtyhelper_SHA256SU1 };
13713 const HChar* hnames[7]
13714 = { "arm64g_dirtyhelper_SHA1C", "arm64g_dirtyhelper_SHA1P",
13715 "arm64g_dirtyhelper_SHA1M", "arm64g_dirtyhelper_SHA1SU0",
13716 "arm64g_dirtyhelper_SHA256H", "arm64g_dirtyhelper_SHA256H2",
13717 "arm64g_dirtyhelper_SHA256SU1" };
13718 IRTemp vD = newTemp(Ity_V128);
13719 IRTemp vN = newTemp(Ity_V128);
13720 IRTemp vM = newTemp(Ity_V128);
13721 IRTemp vDhi = newTemp(Ity_I64);
13722 IRTemp vDlo = newTemp(Ity_I64);
13723 IRTemp vNhiPre = newTemp(Ity_I64);
13724 IRTemp vNloPre = newTemp(Ity_I64);
13725 IRTemp vNhi = newTemp(Ity_I64);
13726 IRTemp vNlo = newTemp(Ity_I64);
13727 IRTemp vMhi = newTemp(Ity_I64);
13728 IRTemp vMlo = newTemp(Ity_I64);
13729 assign(vD, getQReg128(dd));
13730 assign(vN, getQReg128(nn));
13731 assign(vM, getQReg128(mm));
13732 assign(vDhi, unop(Iop_V128HIto64, mkexpr(vD)));
13733 assign(vDlo, unop(Iop_V128to64, mkexpr(vD)));
13734 assign(vNhiPre, unop(Iop_V128HIto64, mkexpr(vN)));
13735 assign(vNloPre, unop(Iop_V128to64, mkexpr(vN)));
13736 assign(vMhi, unop(Iop_V128HIto64, mkexpr(vM)));
13737 assign(vMlo, unop(Iop_V128to64, mkexpr(vM)));
13738 /* Mask off any bits of the N register operand that aren't actually
13739 needed, so that Memcheck doesn't complain unnecessarily. */
13740 switch (opc) {
13741 case BITS3(0,0,0): case BITS3(0,0,1): case BITS3(0,1,0):
13742 assign(vNhi, mkU64(0));
13743 assign(vNlo, unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(vNloPre))));
13744 break;
13745 case BITS3(0,1,1): case BITS3(1,0,0):
13746 case BITS3(1,0,1): case BITS3(1,1,0):
13747 assign(vNhi, mkexpr(vNhiPre));
13748 assign(vNlo, mkexpr(vNloPre));
13749 break;
13750 default:
13751 vassert(0);
13753 IRTemp res = newTemp(Ity_V128);
13754 IRDirty* di
13755 = unsafeIRDirty_1_N( res, 0/*regparms*/, hnames[opc], helpers[opc],
13756 mkIRExprVec_7(
13757 IRExpr_VECRET(),
13758 mkexpr(vDhi), mkexpr(vDlo), mkexpr(vNhi),
13759 mkexpr(vNlo), mkexpr(vMhi), mkexpr(vMlo)));
13760 stmt(IRStmt_Dirty(di));
13761 putQReg128(dd, mkexpr(res));
13762 switch (opc) {
13763 case BITS3(0,0,0): case BITS3(0,0,1): case BITS3(0,1,0):
13764 DIP("%s q%u, s%u, v%u.4s\n", inames[opc], dd, nn, mm);
13765 break;
13766 case BITS3(0,1,1): case BITS3(1,1,0):
13767 DIP("%s v%u.4s, v%u.4s, v%u.4s\n", inames[opc], dd, nn, mm);
13768 break;
13769 case BITS3(1,0,0): case BITS3(1,0,1):
13770 DIP("%s q%u, q%u, v%u.4s\n", inames[opc], dd, nn, mm);
13771 break;
13772 default:
13773 vassert(0);
13775 return True;
13778 return False;
13779 # undef INSN
13783 static
13784 Bool dis_AdvSIMD_crypto_two_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
13786 /* 31 28 23 21 16 11 9 4
13787 0101 1110 sz 10100 opc 10 n d
13788 Decode fields are: sz,opc
13790 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13791 if (INSN(31,24) != BITS8(0,1,0,1,1,1,1,0)
13792 || INSN(21,17) != BITS5(1,0,1,0,0) || INSN(11,10) != BITS2(1,0)) {
13793 return False;
13795 UInt sz = INSN(23,22);
13796 UInt opc = INSN(16,12);
13797 UInt nn = INSN(9,5);
13798 UInt dd = INSN(4,0);
13799 if (sz == BITS2(0,0) && opc <= BITS5(0,0,0,1,0)) {
13800 /* -------- 00,00000 SHA1H Sd, Sn -------- */
13801 /* -------- 00,00001 SHA1SU1 Vd.4S, Vn.4S -------- */
13802 /* -------- 00,00010 SHA256SU0 Vd.4S, Vn.4S -------- */
13803 vassert(opc < 3);
13804 const HChar* inames[3] = { "sha1h", "sha1su1", "sha256su0" };
13805 IRTemp vD = newTemp(Ity_V128);
13806 IRTemp vN = newTemp(Ity_V128);
13807 IRTemp vDhi = newTemp(Ity_I64);
13808 IRTemp vDlo = newTemp(Ity_I64);
13809 IRTemp vNhi = newTemp(Ity_I64);
13810 IRTemp vNlo = newTemp(Ity_I64);
13811 assign(vD, getQReg128(dd));
13812 assign(vN, getQReg128(nn));
13813 assign(vDhi, unop(Iop_V128HIto64, mkexpr(vD)));
13814 assign(vDlo, unop(Iop_V128to64, mkexpr(vD)));
13815 assign(vNhi, unop(Iop_V128HIto64, mkexpr(vN)));
13816 assign(vNlo, unop(Iop_V128to64, mkexpr(vN)));
13817 /* Mask off any bits of the N register operand that aren't actually
13818 needed, so that Memcheck doesn't complain unnecessarily. Also
13819 construct the calls, given that the helper functions don't take
13820 the same number of arguments. */
13821 IRDirty* di = NULL;
13822 IRTemp res = newTemp(Ity_V128);
13823 switch (opc) {
13824 case BITS5(0,0,0,0,0): {
13825 IRExpr* vNloMasked = unop(Iop_32Uto64,
13826 unop(Iop_64to32, mkexpr(vNlo)));
13827 di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13828 "arm64g_dirtyhelper_SHA1H",
13829 &arm64g_dirtyhelper_SHA1H,
13830 mkIRExprVec_3(
13831 IRExpr_VECRET(),
13832 mkU64(0), vNloMasked) );
13833 break;
13835 case BITS5(0,0,0,0,1):
13836 di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13837 "arm64g_dirtyhelper_SHA1SU1",
13838 &arm64g_dirtyhelper_SHA1SU1,
13839 mkIRExprVec_5(
13840 IRExpr_VECRET(),
13841 mkexpr(vDhi), mkexpr(vDlo),
13842 mkexpr(vNhi), mkexpr(vNlo)) );
13843 break;
13844 case BITS5(0,0,0,1,0):
13845 di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13846 "arm64g_dirtyhelper_SHA256SU0",
13847 &arm64g_dirtyhelper_SHA256SU0,
13848 mkIRExprVec_5(
13849 IRExpr_VECRET(),
13850 mkexpr(vDhi), mkexpr(vDlo),
13851 mkexpr(vNhi), mkexpr(vNlo)) );
13852 break;
13853 default:
13854 vassert(0);
13856 stmt(IRStmt_Dirty(di));
13857 putQReg128(dd, mkexpr(res));
13858 switch (opc) {
13859 case BITS5(0,0,0,0,0):
13860 DIP("%s s%u, s%u\n", inames[opc], dd, nn);
13861 break;
13862 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,0):
13863 DIP("%s v%u.4s, v%u.4s\n", inames[opc], dd, nn);
13864 break;
13865 default:
13866 vassert(0);
13868 return True;
13871 return False;
13872 # undef INSN
13876 static
13877 Bool dis_AdvSIMD_fp_compare(/*MB_OUT*/DisResult* dres, UInt insn)
13879 /* 31 28 23 21 20 15 13 9 4
13880 000 11110 ty 1 m op 1000 n opcode2
13881 The first 3 bits are really "M 0 S", but M and S are always zero.
13882 Decode fields are: ty,op,opcode2
13884 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13885 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13886 || INSN(21,21) != 1 || INSN(13,10) != BITS4(1,0,0,0)) {
13887 return False;
13889 UInt ty = INSN(23,22);
13890 UInt mm = INSN(20,16);
13891 UInt op = INSN(15,14);
13892 UInt nn = INSN(9,5);
13893 UInt opcode2 = INSN(4,0);
13894 vassert(ty < 4);
13896 if (ty <= X01 && op == X00
13897 && (opcode2 & BITS5(0,0,1,1,1)) == BITS5(0,0,0,0,0)) {
13898 /* -------- 0x,00,00000 FCMP d_d, s_s -------- */
13899 /* -------- 0x,00,01000 FCMP d_#0, s_#0 -------- */
13900 /* -------- 0x,00,10000 FCMPE d_d, s_s -------- */
13901 /* -------- 0x,00,11000 FCMPE d_#0, s_#0 -------- */
13902 /* 31 23 20 15 9 4
13903 000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm
13904 000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0
13905 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm
13906 000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0
13908 000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm
13909 000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0
13910 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm
13911 000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0
13913 FCMPE generates Invalid Operation exn if either arg is any kind
13914 of NaN. FCMP generates Invalid Operation exn if either arg is a
13915 signalling NaN. We ignore this detail here and produce the same
13916 IR for both.
13918 Bool isD = (ty & 1) == 1;
13919 Bool isCMPE = (opcode2 & 16) == 16;
13920 Bool cmpZero = (opcode2 & 8) == 8;
13921 IRType ity = isD ? Ity_F64 : Ity_F32;
13922 Bool valid = True;
13923 if (cmpZero && mm != 0) valid = False;
13924 if (valid) {
13925 IRTemp argL = newTemp(ity);
13926 IRTemp argR = newTemp(ity);
13927 IRTemp irRes = newTemp(Ity_I32);
13928 assign(argL, getQRegLO(nn, ity));
13929 assign(argR,
13930 cmpZero
13931 ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
13932 : getQRegLO(mm, ity));
13933 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
13934 mkexpr(argL), mkexpr(argR)));
13935 IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
13936 IRTemp nzcv_28x0 = newTemp(Ity_I64);
13937 assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
13938 setFlags_COPY(nzcv_28x0);
13939 DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ity),
13940 cmpZero ? "#0.0" : nameQRegLO(mm, ity));
13941 return True;
13943 return False;
13946 return False;
13947 # undef INSN
13951 static
13952 Bool dis_AdvSIMD_fp_conditional_compare(/*MB_OUT*/DisResult* dres, UInt insn)
13954 /* 31 28 23 21 20 15 11 9 4 3
13955 000 11110 ty 1 m cond 01 n op nzcv
13956 The first 3 bits are really "M 0 S", but M and S are always zero.
13957 Decode fields are: ty,op
13959 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13960 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13961 || INSN(21,21) != 1 || INSN(11,10) != BITS2(0,1)) {
13962 return False;
13964 UInt ty = INSN(23,22);
13965 UInt mm = INSN(20,16);
13966 UInt cond = INSN(15,12);
13967 UInt nn = INSN(9,5);
13968 UInt op = INSN(4,4);
13969 UInt nzcv = INSN(3,0);
13970 vassert(ty < 4 && op <= 1);
13972 if (ty <= BITS2(0,1)) {
13973 /* -------- 00,0 FCCMP s_s -------- */
13974 /* -------- 00,1 FCCMPE s_s -------- */
13975 /* -------- 01,0 FCCMP d_d -------- */
13976 /* -------- 01,1 FCCMPE d_d -------- */
13978 /* FCCMPE generates Invalid Operation exn if either arg is any kind
13979 of NaN. FCCMP generates Invalid Operation exn if either arg is a
13980 signalling NaN. We ignore this detail here and produce the same
13981 IR for both.
13983 Bool isD = (ty & 1) == 1;
13984 Bool isCMPE = op == 1;
13985 IRType ity = isD ? Ity_F64 : Ity_F32;
13986 IRTemp argL = newTemp(ity);
13987 IRTemp argR = newTemp(ity);
13988 IRTemp irRes = newTemp(Ity_I32);
13989 assign(argL, getQRegLO(nn, ity));
13990 assign(argR, getQRegLO(mm, ity));
13991 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
13992 mkexpr(argL), mkexpr(argR)));
13993 IRTemp condT = newTemp(Ity_I1);
13994 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
13995 IRTemp nzcvT = mk_convert_IRCmpF64Result_to_NZCV(irRes);
13997 IRTemp nzcvT_28x0 = newTemp(Ity_I64);
13998 assign(nzcvT_28x0, binop(Iop_Shl64, mkexpr(nzcvT), mkU8(28)));
14000 IRExpr* nzcvF_28x0 = mkU64(((ULong)nzcv) << 28);
14002 IRTemp nzcv_28x0 = newTemp(Ity_I64);
14003 assign(nzcv_28x0, IRExpr_ITE(mkexpr(condT),
14004 mkexpr(nzcvT_28x0), nzcvF_28x0));
14005 setFlags_COPY(nzcv_28x0);
14006 DIP("fccmp%s %s, %s, #%u, %s\n", isCMPE ? "e" : "",
14007 nameQRegLO(nn, ity), nameQRegLO(mm, ity), nzcv, nameCC(cond));
14008 return True;
14011 return False;
14012 # undef INSN
14016 static
14017 Bool dis_AdvSIMD_fp_conditional_select(/*MB_OUT*/DisResult* dres, UInt insn)
14019 /* 31 23 21 20 15 11 9 5
14020 000 11110 ty 1 m cond 11 n d
14021 The first 3 bits are really "M 0 S", but M and S are always zero.
14022 Decode fields: ty
14024 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14025 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) || INSN(21,21) != 1
14026 || INSN(11,10) != BITS2(1,1)) {
14027 return False;
14029 UInt ty = INSN(23,22);
14030 UInt mm = INSN(20,16);
14031 UInt cond = INSN(15,12);
14032 UInt nn = INSN(9,5);
14033 UInt dd = INSN(4,0);
14034 if (ty <= X01) {
14035 /* -------- 00: FCSEL s_s -------- */
14036 /* -------- 00: FCSEL d_d -------- */
14037 IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
14038 IRTemp srcT = newTemp(ity);
14039 IRTemp srcF = newTemp(ity);
14040 IRTemp res = newTemp(ity);
14041 assign(srcT, getQRegLO(nn, ity));
14042 assign(srcF, getQRegLO(mm, ity));
14043 assign(res, IRExpr_ITE(
14044 unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
14045 mkexpr(srcT), mkexpr(srcF)));
14046 putQReg128(dd, mkV128(0x0000));
14047 putQRegLO(dd, mkexpr(res));
14048 DIP("fcsel %s, %s, %s, %s\n",
14049 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity),
14050 nameCC(cond));
14051 return True;
14053 return False;
14054 # undef INSN
14058 static
14059 Bool dis_AdvSIMD_fp_data_proc_1_source(/*MB_OUT*/DisResult* dres, UInt insn)
14061 /* 31 28 23 21 20 14 9 4
14062 000 11110 ty 1 opcode 10000 n d
14063 The first 3 bits are really "M 0 S", but M and S are always zero.
14064 Decode fields: ty,opcode
14066 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14067 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
14068 || INSN(21,21) != 1 || INSN(14,10) != BITS5(1,0,0,0,0)) {
14069 return False;
14071 UInt ty = INSN(23,22);
14072 UInt opcode = INSN(20,15);
14073 UInt nn = INSN(9,5);
14074 UInt dd = INSN(4,0);
14076 if (ty <= X01 && opcode <= BITS6(0,0,0,0,1,1)) {
14077 /* -------- 0x,000000: FMOV d_d, s_s -------- */
14078 /* -------- 0x,000001: FABS d_d, s_s -------- */
14079 /* -------- 0x,000010: FNEG d_d, s_s -------- */
14080 /* -------- 0x,000011: FSQRT d_d, s_s -------- */
14081 IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
14082 IRTemp src = newTemp(ity);
14083 IRTemp res = newTemp(ity);
14084 const HChar* nm = "??";
14085 assign(src, getQRegLO(nn, ity));
14086 switch (opcode) {
14087 case BITS6(0,0,0,0,0,0):
14088 nm = "fmov"; assign(res, mkexpr(src)); break;
14089 case BITS6(0,0,0,0,0,1):
14090 nm = "fabs"; assign(res, unop(mkABSF(ity), mkexpr(src))); break;
14091 case BITS6(0,0,0,0,1,0):
14092 nm = "fabs"; assign(res, unop(mkNEGF(ity), mkexpr(src))); break;
14093 case BITS6(0,0,0,0,1,1):
14094 nm = "fsqrt";
14095 assign(res, binop(mkSQRTF(ity),
14096 mkexpr(mk_get_IR_rounding_mode()),
14097 mkexpr(src))); break;
14098 default:
14099 vassert(0);
14101 putQReg128(dd, mkV128(0x0000));
14102 putQRegLO(dd, mkexpr(res));
14103 DIP("%s %s, %s\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
14104 return True;
14107 if ( (ty == X11 && (opcode == BITS6(0,0,0,1,0,0)
14108 || opcode == BITS6(0,0,0,1,0,1)))
14109 || (ty == X00 && (opcode == BITS6(0,0,0,1,1,1)
14110 || opcode == BITS6(0,0,0,1,0,1)))
14111 || (ty == X01 && (opcode == BITS6(0,0,0,1,1,1)
14112 || opcode == BITS6(0,0,0,1,0,0)))) {
14113 /* -------- 11,000100: FCVT s_h -------- */
14114 /* -------- 11,000101: FCVT d_h -------- */
14115 /* -------- 00,000111: FCVT h_s -------- */
14116 /* -------- 00,000101: FCVT d_s -------- */
14117 /* -------- 01,000111: FCVT h_d -------- */
14118 /* -------- 01,000100: FCVT s_d -------- */
14119 /* 31 23 21 16 14 9 4
14120 000 11110 11 10001 00 10000 n d FCVT Sd, Hn
14121 --------- 11 ----- 01 --------- FCVT Dd, Hn
14122 --------- 00 ----- 11 --------- FCVT Hd, Sn
14123 --------- 00 ----- 01 --------- FCVT Dd, Sn
14124 --------- 01 ----- 11 --------- FCVT Hd, Dn
14125 --------- 01 ----- 00 --------- FCVT Sd, Dn
14126 Rounding, when dst is smaller than src, is per the FPCR.
14128 UInt b2322 = ty;
14129 UInt b1615 = opcode & BITS2(1,1);
14130 switch ((b2322 << 2) | b1615) {
14131 case BITS4(0,0,0,1): // S -> D
14132 case BITS4(1,1,0,1): { // H -> D
14133 Bool srcIsH = b2322 == BITS2(1,1);
14134 IRType srcTy = srcIsH ? Ity_F16 : Ity_F32;
14135 IRTemp res = newTemp(Ity_F64);
14136 assign(res, unop(srcIsH ? Iop_F16toF64 : Iop_F32toF64,
14137 getQRegLO(nn, srcTy)));
14138 putQReg128(dd, mkV128(0x0000));
14139 putQRegLO(dd, mkexpr(res));
14140 DIP("fcvt %s, %s\n",
14141 nameQRegLO(dd, Ity_F64), nameQRegLO(nn, srcTy));
14142 return True;
14144 case BITS4(0,1,0,0): // D -> S
14145 case BITS4(0,1,1,1): { // D -> H
14146 Bool dstIsH = b1615 == BITS2(1,1);
14147 IRType dstTy = dstIsH ? Ity_F16 : Ity_F32;
14148 IRTemp res = newTemp(dstTy);
14149 assign(res, binop(dstIsH ? Iop_F64toF16 : Iop_F64toF32,
14150 mkexpr(mk_get_IR_rounding_mode()),
14151 getQRegLO(nn, Ity_F64)));
14152 putQReg128(dd, mkV128(0x0000));
14153 putQRegLO(dd, mkexpr(res));
14154 DIP("fcvt %s, %s\n",
14155 nameQRegLO(dd, dstTy), nameQRegLO(nn, Ity_F64));
14156 return True;
14158 case BITS4(0,0,1,1): // S -> H
14159 case BITS4(1,1,0,0): { // H -> S
14160 Bool toH = b1615 == BITS2(1,1);
14161 IRType srcTy = toH ? Ity_F32 : Ity_F16;
14162 IRType dstTy = toH ? Ity_F16 : Ity_F32;
14163 IRTemp res = newTemp(dstTy);
14164 if (toH) {
14165 assign(res, binop(Iop_F32toF16,
14166 mkexpr(mk_get_IR_rounding_mode()),
14167 getQRegLO(nn, srcTy)));
14169 } else {
14170 assign(res, unop(Iop_F16toF32,
14171 getQRegLO(nn, srcTy)));
14173 putQReg128(dd, mkV128(0x0000));
14174 putQRegLO(dd, mkexpr(res));
14175 DIP("fcvt %s, %s\n",
14176 nameQRegLO(dd, dstTy), nameQRegLO(nn, srcTy));
14177 return True;
14179 default:
14180 break;
14182 /* else unhandled */
14183 return False;
14186 if (ty <= X01
14187 && opcode >= BITS6(0,0,1,0,0,0) && opcode <= BITS6(0,0,1,1,1,1)
14188 && opcode != BITS6(0,0,1,1,0,1)) {
14189 /* -------- 0x,001000 FRINTN d_d, s_s -------- */
14190 /* -------- 0x,001001 FRINTP d_d, s_s -------- */
14191 /* -------- 0x,001010 FRINTM d_d, s_s -------- */
14192 /* -------- 0x,001011 FRINTZ d_d, s_s -------- */
14193 /* -------- 0x,001100 FRINTA d_d, s_s -------- */
14194 /* -------- 0x,001110 FRINTX d_d, s_s -------- */
14195 /* -------- 0x,001111 FRINTI d_d, s_s -------- */
14196 /* 31 23 21 17 14 9 4
14197 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR)
14199 x==0 => S-registers, x==1 => D-registers
14200 rm (17:15) encodings:
14201 111 per FPCR (FRINTI)
14202 001 +inf (FRINTP)
14203 010 -inf (FRINTM)
14204 011 zero (FRINTZ)
14205 000 tieeven (FRINTN) -- !! FIXME KLUDGED !!
14206 100 tieaway (FRINTA) -- !! FIXME KLUDGED !!
14207 110 per FPCR + "exact = TRUE" (FRINTX)
14208 101 unallocated
14210 Bool isD = (ty & 1) == 1;
14211 UInt rm = opcode & BITS6(0,0,0,1,1,1);
14212 IRType ity = isD ? Ity_F64 : Ity_F32;
14213 IRExpr* irrmE = NULL;
14214 UChar ch = '?';
14215 switch (rm) {
14216 case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
14217 case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
14218 case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
14219 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
14220 case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
14221 // I am unsure about the following, due to the "integral exact"
14222 // description in the manual. What does it mean? (frintx, that is)
14223 case BITS3(1,1,0):
14224 ch = 'x'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
14225 case BITS3(1,1,1):
14226 ch = 'i'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
14227 // The following is a kludge. There's no Irrm_ value to represent
14228 // this ("to nearest, with ties to even")
14229 case BITS3(0,0,0): ch = 'n'; irrmE = mkU32(Irrm_NEAREST); break;
14230 default: break;
14232 if (irrmE) {
14233 IRTemp src = newTemp(ity);
14234 IRTemp dst = newTemp(ity);
14235 assign(src, getQRegLO(nn, ity));
14236 assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
14237 irrmE, mkexpr(src)));
14238 putQReg128(dd, mkV128(0x0000));
14239 putQRegLO(dd, mkexpr(dst));
14240 DIP("frint%c %s, %s\n",
14241 ch, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
14242 return True;
14244 return False;
14247 return False;
14248 # undef INSN
14252 static
14253 Bool dis_AdvSIMD_fp_data_proc_2_source(/*MB_OUT*/DisResult* dres, UInt insn)
14255 /* 31 28 23 21 20 15 11 9 4
14256 000 11110 ty 1 m opcode 10 n d
14257 The first 3 bits are really "M 0 S", but M and S are always zero.
14258 Decode fields: ty, opcode
14260 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14261 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
14262 || INSN(21,21) != 1 || INSN(11,10) != BITS2(1,0)) {
14263 return False;
14265 UInt ty = INSN(23,22);
14266 UInt mm = INSN(20,16);
14267 UInt opcode = INSN(15,12);
14268 UInt nn = INSN(9,5);
14269 UInt dd = INSN(4,0);
14271 if (ty <= X01 && opcode <= BITS4(0,1,1,1)) {
14272 /* ------- 0x,0000: FMUL d_d, s_s ------- */
14273 /* ------- 0x,0001: FDIV d_d, s_s ------- */
14274 /* ------- 0x,0010: FADD d_d, s_s ------- */
14275 /* ------- 0x,0011: FSUB d_d, s_s ------- */
14276 /* ------- 0x,0100: FMAX d_d, s_s ------- */
14277 /* ------- 0x,0101: FMIN d_d, s_s ------- */
14278 /* ------- 0x,0110: FMAXNM d_d, s_s ------- (FIXME KLUDGED) */
14279 /* ------- 0x,0111: FMINNM d_d, s_s ------- (FIXME KLUDGED) */
14280 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
14281 IROp iop = Iop_INVALID;
14282 const HChar* nm = "???";
14283 switch (opcode) {
14284 case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ity); break;
14285 case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ity); break;
14286 case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ity); break;
14287 case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ity); break;
14288 case BITS4(0,1,0,0): nm = "fmax"; iop = mkVecMAXF(ty+2); break;
14289 case BITS4(0,1,0,1): nm = "fmin"; iop = mkVecMINF(ty+2); break;
14290 case BITS4(0,1,1,0): nm = "fmaxnm"; iop = mkVecMAXF(ty+2); break; //!!
14291 case BITS4(0,1,1,1): nm = "fminnm"; iop = mkVecMINF(ty+2); break; //!!
14292 default: vassert(0);
14294 if (opcode <= BITS4(0,0,1,1)) {
14295 // This is really not good code. TODO: avoid width-changing
14296 IRTemp res = newTemp(ity);
14297 assign(res, triop(iop, mkexpr(mk_get_IR_rounding_mode()),
14298 getQRegLO(nn, ity), getQRegLO(mm, ity)));
14299 putQReg128(dd, mkV128(0));
14300 putQRegLO(dd, mkexpr(res));
14301 } else {
14302 putQReg128(dd, unop(mkVecZEROHIxxOFV128(ty+2),
14303 binop(iop, getQReg128(nn), getQReg128(mm))));
14305 DIP("%s %s, %s, %s\n",
14306 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
14307 return True;
14310 if (ty <= X01 && opcode == BITS4(1,0,0,0)) {
14311 /* ------- 0x,1000: FNMUL d_d, s_s ------- */
14312 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
14313 IROp iop = mkMULF(ity);
14314 IROp iopn = mkNEGF(ity);
14315 const HChar* nm = "fnmul";
14316 IRExpr* resE = unop(iopn,
14317 triop(iop, mkexpr(mk_get_IR_rounding_mode()),
14318 getQRegLO(nn, ity), getQRegLO(mm, ity)));
14319 IRTemp res = newTemp(ity);
14320 assign(res, resE);
14321 putQReg128(dd, mkV128(0));
14322 putQRegLO(dd, mkexpr(res));
14323 DIP("%s %s, %s, %s\n",
14324 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
14325 return True;
14328 return False;
14329 # undef INSN
14333 static
14334 Bool dis_AdvSIMD_fp_data_proc_3_source(/*MB_OUT*/DisResult* dres, UInt insn)
14336 /* 31 28 23 21 20 15 14 9 4
14337 000 11111 ty o1 m o0 a n d
14338 The first 3 bits are really "M 0 S", but M and S are always zero.
14339 Decode fields: ty,o1,o0
14341 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14342 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,1)) {
14343 return False;
14345 UInt ty = INSN(23,22);
14346 UInt bitO1 = INSN(21,21);
14347 UInt mm = INSN(20,16);
14348 UInt bitO0 = INSN(15,15);
14349 UInt aa = INSN(14,10);
14350 UInt nn = INSN(9,5);
14351 UInt dd = INSN(4,0);
14352 vassert(ty < 4);
14354 if (ty <= X01) {
14355 /* -------- 0x,0,0 FMADD d_d_d_d, s_s_s_s -------- */
14356 /* -------- 0x,0,1 FMSUB d_d_d_d, s_s_s_s -------- */
14357 /* -------- 0x,1,0 FNMADD d_d_d_d, s_s_s_s -------- */
14358 /* -------- 0x,1,1 FNMSUB d_d_d_d, s_s_s_s -------- */
14359 /* -------------------- F{N}M{ADD,SUB} -------------------- */
14360 /* 31 22 20 15 14 9 4 ix
14361 000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa
14362 000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa
14363 000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa
14364 000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa
14365 where Fx=Dx when sz=1, Fx=Sx when sz=0
14367 -----SPEC------ ----IMPL----
14368 fmadd a + n * m a + n * m
14369 fmsub a + (-n) * m a - n * m
14370 fnmadd (-a) + (-n) * m -(a + n * m)
14371 fnmsub (-a) + n * m -(a - n * m)
14373 Bool isD = (ty & 1) == 1;
14374 UInt ix = (bitO1 << 1) | bitO0;
14375 IRType ity = isD ? Ity_F64 : Ity_F32;
14376 IROp opADD = mkADDF(ity);
14377 IROp opSUB = mkSUBF(ity);
14378 IROp opMUL = mkMULF(ity);
14379 IROp opNEG = mkNEGF(ity);
14380 IRTemp res = newTemp(ity);
14381 IRExpr* eA = getQRegLO(aa, ity);
14382 IRExpr* eN = getQRegLO(nn, ity);
14383 IRExpr* eM = getQRegLO(mm, ity);
14384 IRExpr* rm = mkexpr(mk_get_IR_rounding_mode());
14385 IRExpr* eNxM = triop(opMUL, rm, eN, eM);
14386 switch (ix) {
14387 case 0: assign(res, triop(opADD, rm, eA, eNxM)); break;
14388 case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break;
14389 case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
14390 case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
14391 default: vassert(0);
14393 putQReg128(dd, mkV128(0x0000));
14394 putQRegLO(dd, mkexpr(res));
14395 const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
14396 DIP("%s %s, %s, %s, %s\n",
14397 names[ix], nameQRegLO(dd, ity), nameQRegLO(nn, ity),
14398 nameQRegLO(mm, ity), nameQRegLO(aa, ity));
14399 return True;
14402 return False;
14403 # undef INSN
14407 static
14408 Bool dis_AdvSIMD_fp_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
14410 /* 31 28 23 21 20 12 9 4
14411 000 11110 ty 1 imm8 100 imm5 d
14412 The first 3 bits are really "M 0 S", but M and S are always zero.
14414 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14415 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
14416 || INSN(21,21) != 1 || INSN(12,10) != BITS3(1,0,0)) {
14417 return False;
14419 UInt ty = INSN(23,22);
14420 UInt imm8 = INSN(20,13);
14421 UInt imm5 = INSN(9,5);
14422 UInt dd = INSN(4,0);
14424 /* ------- 00,00000: FMOV s_imm ------- */
14425 /* ------- 01,00000: FMOV d_imm ------- */
14426 if (ty <= X01 && imm5 == BITS5(0,0,0,0,0)) {
14427 Bool isD = (ty & 1) == 1;
14428 ULong imm = VFPExpandImm(imm8, isD ? 64 : 32);
14429 if (!isD) {
14430 vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
14432 putQReg128(dd, mkV128(0));
14433 putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
14434 DIP("fmov %s, #0x%llx\n",
14435 nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
14436 return True;
14439 return False;
14440 # undef INSN
14444 static
14445 Bool dis_AdvSIMD_fp_to_from_fixedp_conv(/*MB_OUT*/DisResult* dres, UInt insn)
14447 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14448 /* 31 30 29 28 23 21 20 18 15 9 4
14449 sf 0 0 11110 type 0 rmode opcode scale n d
14450 The first 3 bits are really "sf 0 S", but S is always zero.
14451 Decode fields: sf,type,rmode,opcode
14453 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14454 if (INSN(30,29) != BITS2(0,0)
14455 || INSN(28,24) != BITS5(1,1,1,1,0)
14456 || INSN(21,21) != 0) {
14457 return False;
14459 UInt bitSF = INSN(31,31);
14460 UInt ty = INSN(23,22); // type
14461 UInt rm = INSN(20,19); // rmode
14462 UInt op = INSN(18,16); // opcode
14463 UInt sc = INSN(15,10); // scale
14464 UInt nn = INSN(9,5);
14465 UInt dd = INSN(4,0);
14467 if (ty <= X01 && rm == X11
14468 && (op == BITS3(0,0,0) || op == BITS3(0,0,1))) {
14469 /* -------- (ix) sf ty rm opc -------- */
14470 /* -------- 0 0 00 11 000: FCVTZS w_s_#fbits -------- */
14471 /* -------- 1 0 01 11 000: FCVTZS w_d_#fbits -------- */
14472 /* -------- 2 1 00 11 000: FCVTZS x_s_#fbits -------- */
14473 /* -------- 3 1 01 11 000: FCVTZS x_d_#fbits -------- */
14475 /* -------- 4 0 00 11 001: FCVTZU w_s_#fbits -------- */
14476 /* -------- 5 0 01 11 001: FCVTZU w_d_#fbits -------- */
14477 /* -------- 6 1 00 11 001: FCVTZU x_s_#fbits -------- */
14478 /* -------- 7 1 01 11 001: FCVTZU x_d_#fbits -------- */
14479 Bool isI64 = bitSF == 1;
14480 Bool isF64 = (ty & 1) == 1;
14481 Bool isU = (op & 1) == 1;
14482 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
14484 Int fbits = 64 - sc;
14485 vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
14487 Double scale = two_to_the_plus(fbits);
14488 IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
14489 : IRExpr_Const(IRConst_F32( (Float)scale ));
14490 IROp opMUL = isF64 ? Iop_MulF64 : Iop_MulF32;
14492 const IROp ops[8]
14493 = { Iop_F32toI32S, Iop_F64toI32S, Iop_F32toI64S, Iop_F64toI64S,
14494 Iop_F32toI32U, Iop_F64toI32U, Iop_F32toI64U, Iop_F64toI64U };
14495 IRTemp irrm = newTemp(Ity_I32);
14496 assign(irrm, mkU32(Irrm_ZERO));
14498 IRExpr* src = getQRegLO(nn, isF64 ? Ity_F64 : Ity_F32);
14499 IRExpr* res = binop(ops[ix], mkexpr(irrm),
14500 triop(opMUL, mkexpr(irrm), src, scaleE));
14501 putIRegOrZR(isI64, dd, res);
14503 DIP("fcvtz%c %s, %s, #%d\n",
14504 isU ? 'u' : 's', nameIRegOrZR(isI64, dd),
14505 nameQRegLO(nn, isF64 ? Ity_F64 : Ity_F32), fbits);
14506 return True;
14509 /* ------ sf,ty,rm,opc ------ */
14510 /* ------ x,0x,00,010 SCVTF s/d, w/x, #fbits ------ */
14511 /* ------ x,0x,00,011 UCVTF s/d, w/x, #fbits ------ */
14512 /* (ix) sf S 28 ty rm opc 15 9 4
14513 0 0 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Wn, #fbits
14514 1 0 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Wn, #fbits
14515 2 1 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Xn, #fbits
14516 3 1 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Xn, #fbits
14518 4 0 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Wn, #fbits
14519 5 0 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Wn, #fbits
14520 6 1 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Xn, #fbits
14521 7 1 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Xn, #fbits
14523 These are signed/unsigned conversion from integer registers to
14524 FP registers, all 4 32/64-bit combinations, rounded per FPCR,
14525 scaled per |scale|.
14527 if (ty <= X01 && rm == X00
14528 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))
14529 && (bitSF == 1 || ((sc >> 5) & 1) == 1)) {
14530 Bool isI64 = bitSF == 1;
14531 Bool isF64 = (ty & 1) == 1;
14532 Bool isU = (op & 1) == 1;
14533 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
14535 Int fbits = 64 - sc;
14536 vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
14538 Double scale = two_to_the_minus(fbits);
14539 IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
14540 : IRExpr_Const(IRConst_F32( (Float)scale ));
14541 IROp opMUL = isF64 ? Iop_MulF64 : Iop_MulF32;
14543 const IROp ops[8]
14544 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
14545 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
14546 IRExpr* src = getIRegOrZR(isI64, nn);
14547 IRExpr* res = (isF64 && !isI64)
14548 ? unop(ops[ix], src)
14549 : binop(ops[ix],
14550 mkexpr(mk_get_IR_rounding_mode()), src);
14551 putQReg128(dd, mkV128(0));
14552 putQRegLO(dd, triop(opMUL, mkU32(Irrm_NEAREST), res, scaleE));
14554 DIP("%ccvtf %s, %s, #%d\n",
14555 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
14556 nameIRegOrZR(isI64, nn), fbits);
14557 return True;
14560 return False;
14561 # undef INSN
14565 static
14566 Bool dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult* dres, UInt insn)
14568 /* 31 30 29 28 23 21 20 18 15 9 4
14569 sf 0 0 11110 type 1 rmode opcode 000000 n d
14570 The first 3 bits are really "sf 0 S", but S is always zero.
14571 Decode fields: sf,type,rmode,opcode
14573 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14574 if (INSN(30,29) != BITS2(0,0)
14575 || INSN(28,24) != BITS5(1,1,1,1,0)
14576 || INSN(21,21) != 1
14577 || INSN(15,10) != BITS6(0,0,0,0,0,0)) {
14578 return False;
14580 UInt bitSF = INSN(31,31);
14581 UInt ty = INSN(23,22); // type
14582 UInt rm = INSN(20,19); // rmode
14583 UInt op = INSN(18,16); // opcode
14584 UInt nn = INSN(9,5);
14585 UInt dd = INSN(4,0);
14587 // op = 000, 001
14588 /* -------- FCVT{N,P,M,Z,A}{S,U} (scalar, integer) -------- */
14589 /* 30 23 20 18 15 9 4
14590 sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to
14591 sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest)
14592 ---------------- 01 -------------- FCVTP-------- (round to +inf)
14593 ---------------- 10 -------------- FCVTM-------- (round to -inf)
14594 ---------------- 11 -------------- FCVTZ-------- (round to zero)
14595 ---------------- 00 100 ---------- FCVTAS------- (nearest, ties away)
14596 ---------------- 00 101 ---------- FCVTAU------- (nearest, ties away)
14598 Rd is Xd when sf==1, Wd when sf==0
14599 Fn is Dn when x==1, Sn when x==0
14600 20:19 carry the rounding mode, using the same encoding as FPCR
14602 if (ty <= X01
14603 && ( ((op == BITS3(0,0,0) || op == BITS3(0,0,1)) && True)
14604 || ((op == BITS3(1,0,0) || op == BITS3(1,0,1)) && rm == BITS2(0,0))
14607 Bool isI64 = bitSF == 1;
14608 Bool isF64 = (ty & 1) == 1;
14609 Bool isU = (op & 1) == 1;
14610 /* Decide on the IR rounding mode to use. */
14611 IRRoundingMode irrm = 8; /*impossible*/
14612 HChar ch = '?';
14613 if (op == BITS3(0,0,0) || op == BITS3(0,0,1)) {
14614 switch (rm) {
14615 case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
14616 case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
14617 case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
14618 case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
14619 default: vassert(0);
14621 } else {
14622 vassert(op == BITS3(1,0,0) || op == BITS3(1,0,1));
14623 switch (rm) {
14624 case BITS2(0,0): ch = 'a'; irrm = Irrm_NEAREST; break;
14625 default: vassert(0);
14628 vassert(irrm != 8);
14629 /* Decide on the conversion primop, based on the source size,
14630 dest size and signedness (8 possibilities). Case coding:
14631 F32 ->s I32 0
14632 F32 ->u I32 1
14633 F32 ->s I64 2
14634 F32 ->u I64 3
14635 F64 ->s I32 4
14636 F64 ->u I32 5
14637 F64 ->s I64 6
14638 F64 ->u I64 7
14640 UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0);
14641 vassert(ix < 8);
14642 const IROp iops[8]
14643 = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
14644 Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
14645 IROp iop = iops[ix];
14646 // A bit of ATCery: bounce all cases we haven't seen an example of.
14647 if (/* F32toI32S */
14648 (iop == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */
14649 || (iop == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
14650 || (iop == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
14651 || (iop == Iop_F32toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,S */
14652 /* F32toI32U */
14653 || (iop == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */
14654 || (iop == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
14655 || (iop == Iop_F32toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Sn */
14656 || (iop == Iop_F32toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,S */
14657 /* F32toI64S */
14658 || (iop == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */
14659 || (iop == Iop_F32toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Sn */
14660 || (iop == Iop_F32toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Sn */
14661 || (iop == Iop_F32toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,S */
14662 /* F32toI64U */
14663 || (iop == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */
14664 || (iop == Iop_F32toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Sn */
14665 || (iop == Iop_F32toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Sn */
14666 || (iop == Iop_F32toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,S */
14667 /* F64toI32S */
14668 || (iop == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */
14669 || (iop == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
14670 || (iop == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
14671 || (iop == Iop_F64toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,D */
14672 /* F64toI32U */
14673 || (iop == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */
14674 || (iop == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
14675 || (iop == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
14676 || (iop == Iop_F64toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,D */
14677 /* F64toI64S */
14678 || (iop == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */
14679 || (iop == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
14680 || (iop == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
14681 || (iop == Iop_F64toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,D */
14682 /* F64toI64U */
14683 || (iop == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */
14684 || (iop == Iop_F64toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Dn */
14685 || (iop == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
14686 || (iop == Iop_F64toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,D */
14688 /* validated */
14689 } else {
14690 return False;
14692 IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
14693 IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
14694 IRTemp src = newTemp(srcTy);
14695 IRTemp dst = newTemp(dstTy);
14696 assign(src, getQRegLO(nn, srcTy));
14697 assign(dst, binop(iop, mkU32(irrm), mkexpr(src)));
14698 putIRegOrZR(isI64, dd, mkexpr(dst));
14699 DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
14700 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
14701 return True;
14704 // op = 010, 011
14705 /* -------------- {S,U}CVTF (scalar, integer) -------------- */
14706 /* (ix) sf S 28 ty rm op 15 9 4
14707 0 0 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
14708 1 0 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
14709 2 1 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
14710 3 1 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
14712 4 0 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
14713 5 0 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
14714 6 1 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
14715 7 1 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
14717 These are signed/unsigned conversion from integer registers to
14718 FP registers, all 4 32/64-bit combinations, rounded per FPCR.
14720 if (ty <= X01 && rm == X00 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))) {
14721 Bool isI64 = bitSF == 1;
14722 Bool isF64 = (ty & 1) == 1;
14723 Bool isU = (op & 1) == 1;
14724 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
14725 const IROp ops[8]
14726 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
14727 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
14728 IRExpr* src = getIRegOrZR(isI64, nn);
14729 IRExpr* res = (isF64 && !isI64)
14730 ? unop(ops[ix], src)
14731 : binop(ops[ix],
14732 mkexpr(mk_get_IR_rounding_mode()), src);
14733 putQReg128(dd, mkV128(0));
14734 putQRegLO(dd, res);
14735 DIP("%ccvtf %s, %s\n",
14736 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
14737 nameIRegOrZR(isI64, nn));
14738 return True;
14741 // op = 110, 111
14742 /* -------- FMOV (general) -------- */
14743 /* case sf S ty rm op 15 9 4
14744 (1) 0 0 0 11110 00 1 00 111 000000 n d FMOV Sd, Wn
14745 (2) 1 0 0 11110 01 1 00 111 000000 n d FMOV Dd, Xn
14746 (3) 1 0 0 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn
14748 (4) 0 0 0 11110 00 1 00 110 000000 n d FMOV Wd, Sn
14749 (5) 1 0 0 11110 01 1 00 110 000000 n d FMOV Xd, Dn
14750 (6) 1 0 0 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1]
14752 if (1) {
14753 UInt ix = 0; // case
14754 if (bitSF == 0) {
14755 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
14756 ix = 1;
14757 else
14758 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
14759 ix = 4;
14760 } else {
14761 vassert(bitSF == 1);
14762 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
14763 ix = 2;
14764 else
14765 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
14766 ix = 5;
14767 else
14768 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
14769 ix = 3;
14770 else
14771 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
14772 ix = 6;
14774 if (ix > 0) {
14775 switch (ix) {
14776 case 1:
14777 putQReg128(dd, mkV128(0));
14778 putQRegLO(dd, getIReg32orZR(nn));
14779 DIP("fmov s%u, w%u\n", dd, nn);
14780 break;
14781 case 2:
14782 putQReg128(dd, mkV128(0));
14783 putQRegLO(dd, getIReg64orZR(nn));
14784 DIP("fmov d%u, x%u\n", dd, nn);
14785 break;
14786 case 3:
14787 putQRegHI64(dd, getIReg64orZR(nn));
14788 DIP("fmov v%u.d[1], x%u\n", dd, nn);
14789 break;
14790 case 4:
14791 putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
14792 DIP("fmov w%u, s%u\n", dd, nn);
14793 break;
14794 case 5:
14795 putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
14796 DIP("fmov x%u, d%u\n", dd, nn);
14797 break;
14798 case 6:
14799 putIReg64orZR(dd, getQRegHI64(nn));
14800 DIP("fmov x%u, v%u.d[1]\n", dd, nn);
14801 break;
14802 default:
14803 vassert(0);
14805 return True;
14807 /* undecodable; fall through */
14810 return False;
14811 # undef INSN
14815 static
14816 Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
14818 Bool ok;
14819 ok = dis_AdvSIMD_EXT(dres, insn);
14820 if (UNLIKELY(ok)) return True;
14821 ok = dis_AdvSIMD_TBL_TBX(dres, insn);
14822 if (UNLIKELY(ok)) return True;
14823 ok = dis_AdvSIMD_ZIP_UZP_TRN(dres, insn);
14824 if (UNLIKELY(ok)) return True;
14825 ok = dis_AdvSIMD_across_lanes(dres, insn);
14826 if (UNLIKELY(ok)) return True;
14827 ok = dis_AdvSIMD_copy(dres, insn);
14828 if (UNLIKELY(ok)) return True;
14829 ok = dis_AdvSIMD_modified_immediate(dres, insn);
14830 if (UNLIKELY(ok)) return True;
14831 ok = dis_AdvSIMD_scalar_copy(dres, insn);
14832 if (UNLIKELY(ok)) return True;
14833 ok = dis_AdvSIMD_scalar_pairwise(dres, insn);
14834 if (UNLIKELY(ok)) return True;
14835 ok = dis_AdvSIMD_scalar_shift_by_imm(dres, insn);
14836 if (UNLIKELY(ok)) return True;
14837 ok = dis_AdvSIMD_scalar_three_different(dres, insn);
14838 if (UNLIKELY(ok)) return True;
14839 ok = dis_AdvSIMD_scalar_three_same(dres, insn);
14840 if (UNLIKELY(ok)) return True;
14841 ok = dis_AdvSIMD_scalar_three_same_extra(dres, insn);
14842 if (UNLIKELY(ok)) return True;
14843 ok = dis_AdvSIMD_scalar_two_reg_misc(dres, insn);
14844 if (UNLIKELY(ok)) return True;
14845 ok = dis_AdvSIMD_scalar_x_indexed_element(dres, insn);
14846 if (UNLIKELY(ok)) return True;
14847 ok = dis_AdvSIMD_shift_by_immediate(dres, insn);
14848 if (UNLIKELY(ok)) return True;
14849 ok = dis_AdvSIMD_three_different(dres, insn);
14850 if (UNLIKELY(ok)) return True;
14851 ok = dis_AdvSIMD_three_same(dres, insn);
14852 if (UNLIKELY(ok)) return True;
14853 ok = dis_AdvSIMD_three_same_extra(dres, insn);
14854 if (UNLIKELY(ok)) return True;
14855 ok = dis_AdvSIMD_two_reg_misc(dres, insn);
14856 if (UNLIKELY(ok)) return True;
14857 ok = dis_AdvSIMD_vector_x_indexed_elem(dres, insn);
14858 if (UNLIKELY(ok)) return True;
14859 ok = dis_AdvSIMD_crypto_aes(dres, insn);
14860 if (UNLIKELY(ok)) return True;
14861 ok = dis_AdvSIMD_crypto_three_reg_sha(dres, insn);
14862 if (UNLIKELY(ok)) return True;
14863 ok = dis_AdvSIMD_crypto_two_reg_sha(dres, insn);
14864 if (UNLIKELY(ok)) return True;
14865 ok = dis_AdvSIMD_fp_compare(dres, insn);
14866 if (UNLIKELY(ok)) return True;
14867 ok = dis_AdvSIMD_fp_conditional_compare(dres, insn);
14868 if (UNLIKELY(ok)) return True;
14869 ok = dis_AdvSIMD_fp_conditional_select(dres, insn);
14870 if (UNLIKELY(ok)) return True;
14871 ok = dis_AdvSIMD_fp_data_proc_1_source(dres, insn);
14872 if (UNLIKELY(ok)) return True;
14873 ok = dis_AdvSIMD_fp_data_proc_2_source(dres, insn);
14874 if (UNLIKELY(ok)) return True;
14875 ok = dis_AdvSIMD_fp_data_proc_3_source(dres, insn);
14876 if (UNLIKELY(ok)) return True;
14877 ok = dis_AdvSIMD_fp_immediate(dres, insn);
14878 if (UNLIKELY(ok)) return True;
14879 ok = dis_AdvSIMD_fp_to_from_fixedp_conv(dres, insn);
14880 if (UNLIKELY(ok)) return True;
14881 ok = dis_AdvSIMD_fp_to_from_int_conv(dres, insn);
14882 if (UNLIKELY(ok)) return True;
14883 return False;
14887 /*------------------------------------------------------------*/
14888 /*--- Disassemble a single ARM64 instruction ---*/
14889 /*------------------------------------------------------------*/
14891 /* Disassemble a single ARM64 instruction into IR. The instruction
14892 has is located at |guest_instr| and has guest IP of
14893 |guest_PC_curr_instr|, which will have been set before the call
14894 here. Returns True iff the instruction was decoded, in which case
14895 *dres will be set accordingly, or False, in which case *dres should
14896 be ignored by the caller. */
14898 static
14899 Bool disInstr_ARM64_WRK (
14900 /*MB_OUT*/DisResult* dres,
14901 const UChar* guest_instr,
14902 const VexArchInfo* archinfo,
14903 const VexAbiInfo* abiinfo,
14904 Bool sigill_diag
14907 // A macro to fish bits out of 'insn'.
14908 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14910 //ZZ DisResult dres;
14911 //ZZ UInt insn;
14912 //ZZ //Bool allow_VFP = False;
14913 //ZZ //UInt hwcaps = archinfo->hwcaps;
14914 //ZZ IRTemp condT; /* :: Ity_I32 */
14915 //ZZ UInt summary;
14916 //ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text
14917 //ZZ
14918 //ZZ /* What insn variants are we supporting today? */
14919 //ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
14920 //ZZ // etc etc
14922 /* Set result defaults. */
14923 dres->whatNext = Dis_Continue;
14924 dres->len = 4;
14925 dres->jk_StopHere = Ijk_INVALID;
14926 dres->hint = Dis_HintNone;
14928 /* At least this is simple on ARM64: insns are all 4 bytes long, and
14929 4-aligned. So just fish the whole thing out of memory right now
14930 and have done. */
14931 UInt insn = getUIntLittleEndianly( guest_instr );
14933 if (0) vex_printf("insn: 0x%x\n", insn);
14935 DIP("\t(arm64) 0x%llx: ", (ULong)guest_PC_curr_instr);
14937 vassert(0 == (guest_PC_curr_instr & 3ULL));
14939 /* ----------------------------------------------------------- */
14941 /* Spot "Special" instructions (see comment at top of file). */
14943 const UChar* code = guest_instr;
14944 /* Spot the 16-byte preamble:
14945 93CC0D8C ror x12, x12, #3
14946 93CC358C ror x12, x12, #13
14947 93CCCD8C ror x12, x12, #51
14948 93CCF58C ror x12, x12, #61
14950 UInt word1 = 0x93CC0D8C;
14951 UInt word2 = 0x93CC358C;
14952 UInt word3 = 0x93CCCD8C;
14953 UInt word4 = 0x93CCF58C;
14954 if (getUIntLittleEndianly(code+ 0) == word1 &&
14955 getUIntLittleEndianly(code+ 4) == word2 &&
14956 getUIntLittleEndianly(code+ 8) == word3 &&
14957 getUIntLittleEndianly(code+12) == word4) {
14958 /* Got a "Special" instruction preamble. Which one is it? */
14959 if (getUIntLittleEndianly(code+16) == 0xAA0A014A
14960 /* orr x10,x10,x10 */) {
14961 /* X3 = client_request ( X4 ) */
14962 DIP("x3 = client_request ( x4 )\n");
14963 putPC(mkU64( guest_PC_curr_instr + 20 ));
14964 dres->jk_StopHere = Ijk_ClientReq;
14965 dres->whatNext = Dis_StopHere;
14966 return True;
14968 else
14969 if (getUIntLittleEndianly(code+16) == 0xAA0B016B
14970 /* orr x11,x11,x11 */) {
14971 /* X3 = guest_NRADDR */
14972 DIP("x3 = guest_NRADDR\n");
14973 dres->len = 20;
14974 putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
14975 return True;
14977 else
14978 if (getUIntLittleEndianly(code+16) == 0xAA0C018C
14979 /* orr x12,x12,x12 */) {
14980 /* branch-and-link-to-noredir X8 */
14981 DIP("branch-and-link-to-noredir x8\n");
14982 putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
14983 putPC(getIReg64orZR(8));
14984 dres->jk_StopHere = Ijk_NoRedir;
14985 dres->whatNext = Dis_StopHere;
14986 return True;
14988 else
14989 if (getUIntLittleEndianly(code+16) == 0xAA090129
14990 /* orr x9,x9,x9 */) {
14991 /* IR injection */
14992 DIP("IR injection\n");
14993 vex_inject_ir(irsb, Iend_LE);
14994 // Invalidate the current insn. The reason is that the IRop we're
14995 // injecting here can change. In which case the translation has to
14996 // be redone. For ease of handling, we simply invalidate all the
14997 // time.
14998 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr)));
14999 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(20)));
15000 putPC(mkU64( guest_PC_curr_instr + 20 ));
15001 dres->whatNext = Dis_StopHere;
15002 dres->jk_StopHere = Ijk_InvalICache;
15003 return True;
15005 /* We don't know what it is. */
15006 return False;
15007 /*NOTREACHED*/
15011 /* ----------------------------------------------------------- */
15013 /* Main ARM64 instruction decoder starts here. */
15015 Bool ok = False;
15017 /* insn[28:25] determines the top-level grouping, so let's start
15018 off with that.
15020 For all of these dis_ARM64_ functions, we pass *dres with the
15021 normal default results "insn OK, 4 bytes long, keep decoding" so
15022 they don't need to change it. However, decodes of control-flow
15023 insns may cause *dres to change.
15025 switch (INSN(28,25)) {
15026 case BITS4(1,0,0,0): case BITS4(1,0,0,1):
15027 // Data processing - immediate
15028 ok = dis_ARM64_data_processing_immediate(dres, insn, sigill_diag);
15029 break;
15030 case BITS4(1,0,1,0): case BITS4(1,0,1,1):
15031 // Branch, exception generation and system instructions
15032 ok = dis_ARM64_branch_etc(dres, insn, archinfo, abiinfo, sigill_diag);
15033 break;
15034 case BITS4(0,1,0,0): case BITS4(0,1,1,0):
15035 case BITS4(1,1,0,0): case BITS4(1,1,1,0):
15036 // Loads and stores
15037 ok = dis_ARM64_load_store(dres, insn, abiinfo, sigill_diag);
15038 break;
15039 case BITS4(0,1,0,1): case BITS4(1,1,0,1):
15040 // Data processing - register
15041 ok = dis_ARM64_data_processing_register(dres, insn, sigill_diag);
15042 break;
15043 case BITS4(0,1,1,1): case BITS4(1,1,1,1):
15044 // Data processing - SIMD and floating point
15045 ok = dis_ARM64_simd_and_fp(dres, insn);
15046 break;
15047 case BITS4(0,0,0,0): case BITS4(0,0,0,1):
15048 case BITS4(0,0,1,0): case BITS4(0,0,1,1):
15049 // UNALLOCATED
15050 break;
15051 default:
15052 vassert(0); /* Can't happen */
15055 /* If the next-level down decoders failed, make sure |dres| didn't
15056 get changed. */
15057 if (!ok) {
15058 vassert(dres->whatNext == Dis_Continue);
15059 vassert(dres->len == 4);
15060 vassert(dres->jk_StopHere == Ijk_INVALID);
15063 return ok;
15065 # undef INSN
15069 /*------------------------------------------------------------*/
15070 /*--- Top-level fn ---*/
15071 /*------------------------------------------------------------*/
15073 /* Disassemble a single instruction into IR. The instruction
15074 is located in host memory at &guest_code[delta]. */
15076 DisResult disInstr_ARM64 ( IRSB* irsb_IN,
15077 const UChar* guest_code_IN,
15078 Long delta_IN,
15079 Addr guest_IP,
15080 VexArch guest_arch,
15081 const VexArchInfo* archinfo,
15082 const VexAbiInfo* abiinfo,
15083 VexEndness host_endness_IN,
15084 Bool sigill_diag_IN )
15086 DisResult dres;
15087 vex_bzero(&dres, sizeof(dres));
15089 /* Set globals (see top of this file) */
15090 vassert(guest_arch == VexArchARM64);
15092 irsb = irsb_IN;
15093 host_endness = host_endness_IN;
15094 guest_PC_curr_instr = (Addr64)guest_IP;
15096 /* Sanity checks */
15097 /* (x::UInt - 2) <= 15 === x >= 2 && x <= 17 (I hope) */
15098 vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15);
15099 vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15);
15101 /* Try to decode */
15102 Bool ok = disInstr_ARM64_WRK( &dres,
15103 &guest_code_IN[delta_IN],
15104 archinfo, abiinfo, sigill_diag_IN );
15105 if (ok) {
15106 /* All decode successes end up here. */
15107 vassert(dres.len == 4 || dres.len == 20);
15108 switch (dres.whatNext) {
15109 case Dis_Continue:
15110 putPC( mkU64(dres.len + guest_PC_curr_instr) );
15111 break;
15112 case Dis_StopHere:
15113 break;
15114 default:
15115 vassert(0);
15117 DIP("\n");
15118 } else {
15119 /* All decode failures end up here. */
15120 if (sigill_diag_IN) {
15121 Int i, j;
15122 UChar buf[64];
15123 UInt insn
15124 = getUIntLittleEndianly( &guest_code_IN[delta_IN] );
15125 vex_bzero(buf, sizeof(buf));
15126 for (i = j = 0; i < 32; i++) {
15127 if (i > 0) {
15128 if ((i & 7) == 0) buf[j++] = ' ';
15129 else if ((i & 3) == 0) buf[j++] = '\'';
15131 buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
15133 vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
15134 vex_printf("disInstr(arm64): %s\n", buf);
15137 /* Tell the dispatcher that this insn cannot be decoded, and so
15138 has not been executed, and (is currently) the next to be
15139 executed. PC should be up-to-date since it is made so at the
15140 start of each insn, but nevertheless be paranoid and update
15141 it again right now. */
15142 putPC( mkU64(guest_PC_curr_instr) );
15143 dres.len = 0;
15144 dres.whatNext = Dis_StopHere;
15145 dres.jk_StopHere = Ijk_NoDecode;
15147 return dres;
15151 /*--------------------------------------------------------------------*/
15152 /*--- end guest_arm64_toIR.c ---*/
15153 /*--------------------------------------------------------------------*/