Bug 431556 - Complete arm64 FADDP v8.2 instruction support started in 413547.
[valgrind.git] / VEX / priv / guest_arm64_toIR.c
blob89231be29d01fe3c257da8a179fc43b457bb81a7
1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*--------------------------------------------------------------------*/
4 /*--- begin guest_arm64_toIR.c ---*/
5 /*--------------------------------------------------------------------*/
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
11 Copyright (C) 2013-2017 OpenWorks
12 info@open-works.net
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
27 The GNU General Public License is contained in the file COPYING.
30 /* KNOWN LIMITATIONS 2014-Nov-16
32 * Correctness: FMAXNM, FMINNM are implemented the same as FMAX/FMIN.
34 Also FP comparison "unordered" .. is implemented as normal FP
35 comparison.
37 Both should be fixed. They behave incorrectly in the presence of
38 NaNs.
40 FMULX is treated the same as FMUL. That's also not correct.
42 * Floating multiply-add (etc) insns. Are split into a multiply and
43 an add, and so suffer double rounding and hence sometimes the
44 least significant mantissa bit is incorrect. Fix: use the IR
45 multiply-add IROps instead.
47 * FRINTA, FRINTN are kludged .. they just round to nearest. No special
48 handling for the "ties" case. FRINTX might be dubious too.
50 * Ditto FCVTXN. No idea what "round to odd" means. This implementation
51 just rounds to nearest.
54 /* "Special" instructions.
56 This instruction decoder can decode four special instructions
57 which mean nothing natively (are no-ops as far as regs/mem are
58 concerned) but have meaning for supporting Valgrind. A special
59 instruction is flagged by a 16-byte preamble:
61 93CC0D8C 93CC358C 93CCCD8C 93CCF58C
62 (ror x12, x12, #3; ror x12, x12, #13
63 ror x12, x12, #51; ror x12, x12, #61)
65 Following that, one of the following 3 are allowed
66 (standard interpretation in parentheses):
68 AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 )
69 AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR
70 AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8
71 AA090129 (orr x9,x9,x9) IR injection
73 Any other bytes following the 16-byte preamble are illegal and
74 constitute a failure in instruction decoding. This all assumes
75 that the preamble will never occur except in specific code
76 fragments designed for Valgrind to catch.
79 /* Translates ARM64 code to IR. */
81 #include "libvex_basictypes.h"
82 #include "libvex_ir.h"
83 #include "libvex.h"
84 #include "libvex_guest_arm64.h"
86 #include "main_util.h"
87 #include "main_globals.h"
88 #include "guest_generic_bb_to_IR.h"
89 #include "guest_arm64_defs.h"
92 /*------------------------------------------------------------*/
93 /*--- Globals ---*/
94 /*------------------------------------------------------------*/
96 /* These are set at the start of the translation of a instruction, so
97 that we don't have to pass them around endlessly. CONST means does
98 not change during translation of the instruction.
101 /* CONST: what is the host's endianness? We need to know this in
102 order to do sub-register accesses to the SIMD/FP registers
103 correctly. */
104 static VexEndness host_endness;
106 /* CONST: The guest address for the instruction currently being
107 translated. */
108 static Addr64 guest_PC_curr_instr;
110 /* MOD: The IRSB* into which we're generating code. */
111 static IRSB* irsb;
114 /*------------------------------------------------------------*/
115 /*--- Debugging output ---*/
116 /*------------------------------------------------------------*/
118 #define DIP(format, args...) \
119 if (vex_traceflags & VEX_TRACE_FE) \
120 vex_printf(format, ## args)
122 #define DIS(buf, format, args...) \
123 if (vex_traceflags & VEX_TRACE_FE) \
124 vex_sprintf(buf, format, ## args)
127 /*------------------------------------------------------------*/
128 /*--- Helper bits and pieces for deconstructing the ---*/
129 /*--- arm insn stream. ---*/
130 /*------------------------------------------------------------*/
132 /* Do a little-endian load of a 32-bit word, regardless of the
133 endianness of the underlying host. */
134 static inline UInt getUIntLittleEndianly ( const UChar* p )
136 UInt w = 0;
137 w = (w << 8) | p[3];
138 w = (w << 8) | p[2];
139 w = (w << 8) | p[1];
140 w = (w << 8) | p[0];
141 return w;
144 /* Sign extend a N-bit value up to 64 bits, by copying
145 bit N-1 into all higher positions. */
146 static ULong sx_to_64 ( ULong x, UInt n )
148 vassert(n > 1 && n < 64);
149 x <<= (64-n);
150 Long r = (Long)x;
151 r >>= (64-n);
152 return (ULong)r;
155 //ZZ /* Do a little-endian load of a 16-bit word, regardless of the
156 //ZZ endianness of the underlying host. */
157 //ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
158 //ZZ {
159 //ZZ UShort w = 0;
160 //ZZ w = (w << 8) | p[1];
161 //ZZ w = (w << 8) | p[0];
162 //ZZ return w;
163 //ZZ }
164 //ZZ
165 //ZZ static UInt ROR32 ( UInt x, UInt sh ) {
166 //ZZ vassert(sh >= 0 && sh < 32);
167 //ZZ if (sh == 0)
168 //ZZ return x;
169 //ZZ else
170 //ZZ return (x << (32-sh)) | (x >> sh);
171 //ZZ }
172 //ZZ
173 //ZZ static Int popcount32 ( UInt x )
174 //ZZ {
175 //ZZ Int res = 0, i;
176 //ZZ for (i = 0; i < 32; i++) {
177 //ZZ res += (x & 1);
178 //ZZ x >>= 1;
179 //ZZ }
180 //ZZ return res;
181 //ZZ }
182 //ZZ
183 //ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
184 //ZZ {
185 //ZZ UInt mask = 1 << ix;
186 //ZZ x &= ~mask;
187 //ZZ x |= ((b << ix) & mask);
188 //ZZ return x;
189 //ZZ }
191 #define BITS2(_b1,_b0) \
192 (((_b1) << 1) | (_b0))
194 #define BITS3(_b2,_b1,_b0) \
195 (((_b2) << 2) | ((_b1) << 1) | (_b0))
197 #define BITS4(_b3,_b2,_b1,_b0) \
198 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
200 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
201 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
202 | BITS4((_b3),(_b2),(_b1),(_b0)))
204 #define BITS5(_b4,_b3,_b2,_b1,_b0) \
205 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
206 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \
207 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
208 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
209 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
211 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
212 (((_b8) << 8) \
213 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
215 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
216 (((_b9) << 9) | ((_b8) << 8) \
217 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
219 #define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
220 (((_b10) << 10) \
221 | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
223 #define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
224 (((_b11) << 11) \
225 | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
227 #define X00 BITS2(0,0)
228 #define X01 BITS2(0,1)
229 #define X10 BITS2(1,0)
230 #define X11 BITS2(1,1)
232 // produces _uint[_bMax:_bMin]
233 #define SLICE_UInt(_uint,_bMax,_bMin) \
234 (( ((UInt)(_uint)) >> (_bMin)) \
235 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
238 /*------------------------------------------------------------*/
239 /*--- Helper bits and pieces for creating IR fragments. ---*/
240 /*------------------------------------------------------------*/
242 static IRExpr* mkV128 ( UShort w )
244 return IRExpr_Const(IRConst_V128(w));
247 static IRExpr* mkU64 ( ULong i )
249 return IRExpr_Const(IRConst_U64(i));
252 static IRExpr* mkU32 ( UInt i )
254 return IRExpr_Const(IRConst_U32(i));
257 static IRExpr* mkU16 ( UInt i )
259 vassert(i < 65536);
260 return IRExpr_Const(IRConst_U16(i));
263 static IRExpr* mkU8 ( UInt i )
265 vassert(i < 256);
266 return IRExpr_Const(IRConst_U8( (UChar)i ));
269 static IRExpr* mkexpr ( IRTemp tmp )
271 return IRExpr_RdTmp(tmp);
274 static IRExpr* unop ( IROp op, IRExpr* a )
276 return IRExpr_Unop(op, a);
279 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
281 return IRExpr_Binop(op, a1, a2);
284 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
286 return IRExpr_Triop(op, a1, a2, a3);
289 static IRExpr* qop ( IROp op, IRExpr* a1, IRExpr* a2,
290 IRExpr* a3, IRExpr* a4 )
292 return IRExpr_Qop(op, a1, a2, a3, a4);
295 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
297 return IRExpr_Load(Iend_LE, ty, addr);
300 /* Add a statement to the list held by "irbb". */
301 static void stmt ( IRStmt* st )
303 addStmtToIRSB( irsb, st );
306 static void assign ( IRTemp dst, IRExpr* e )
308 stmt( IRStmt_WrTmp(dst, e) );
311 static void storeLE ( IRExpr* addr, IRExpr* data )
313 stmt( IRStmt_Store(Iend_LE, addr, data) );
316 //ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
317 //ZZ {
318 //ZZ if (guardT == IRTemp_INVALID) {
319 //ZZ /* unconditional */
320 //ZZ storeLE(addr, data);
321 //ZZ } else {
322 //ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data,
323 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
324 //ZZ }
325 //ZZ }
326 //ZZ
327 //ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
328 //ZZ IRExpr* addr, IRExpr* alt,
329 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
330 //ZZ {
331 //ZZ if (guardT == IRTemp_INVALID) {
332 //ZZ /* unconditional */
333 //ZZ IRExpr* loaded = NULL;
334 //ZZ switch (cvt) {
335 //ZZ case ILGop_Ident32:
336 //ZZ loaded = loadLE(Ity_I32, addr); break;
337 //ZZ case ILGop_8Uto32:
338 //ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
339 //ZZ case ILGop_8Sto32:
340 //ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
341 //ZZ case ILGop_16Uto32:
342 //ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
343 //ZZ case ILGop_16Sto32:
344 //ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
345 //ZZ default:
346 //ZZ vassert(0);
347 //ZZ }
348 //ZZ vassert(loaded != NULL);
349 //ZZ assign(dst, loaded);
350 //ZZ } else {
351 //ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the
352 //ZZ loaded data before putting the data in 'dst'. If the load
353 //ZZ does not take place, 'alt' is placed directly in 'dst'. */
354 //ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
355 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
356 //ZZ }
357 //ZZ }
359 /* Generate a new temporary of the given type. */
360 static IRTemp newTemp ( IRType ty )
362 vassert(isPlausibleIRType(ty));
363 return newIRTemp( irsb->tyenv, ty );
366 /* This is used in many places, so the brevity is an advantage. */
367 static IRTemp newTempV128(void)
369 return newTemp(Ity_V128);
372 /* Initialise V128 temporaries en masse. */
373 static
374 void newTempsV128_2(IRTemp* t1, IRTemp* t2)
376 vassert(t1 && *t1 == IRTemp_INVALID);
377 vassert(t2 && *t2 == IRTemp_INVALID);
378 *t1 = newTempV128();
379 *t2 = newTempV128();
382 static
383 void newTempsV128_3(IRTemp* t1, IRTemp* t2, IRTemp* t3)
385 vassert(t1 && *t1 == IRTemp_INVALID);
386 vassert(t2 && *t2 == IRTemp_INVALID);
387 vassert(t3 && *t3 == IRTemp_INVALID);
388 *t1 = newTempV128();
389 *t2 = newTempV128();
390 *t3 = newTempV128();
393 static
394 void newTempsV128_4(IRTemp* t1, IRTemp* t2, IRTemp* t3, IRTemp* t4)
396 vassert(t1 && *t1 == IRTemp_INVALID);
397 vassert(t2 && *t2 == IRTemp_INVALID);
398 vassert(t3 && *t3 == IRTemp_INVALID);
399 vassert(t4 && *t4 == IRTemp_INVALID);
400 *t1 = newTempV128();
401 *t2 = newTempV128();
402 *t3 = newTempV128();
403 *t4 = newTempV128();
406 static
407 void newTempsV128_7(IRTemp* t1, IRTemp* t2, IRTemp* t3,
408 IRTemp* t4, IRTemp* t5, IRTemp* t6, IRTemp* t7)
410 vassert(t1 && *t1 == IRTemp_INVALID);
411 vassert(t2 && *t2 == IRTemp_INVALID);
412 vassert(t3 && *t3 == IRTemp_INVALID);
413 vassert(t4 && *t4 == IRTemp_INVALID);
414 vassert(t5 && *t5 == IRTemp_INVALID);
415 vassert(t6 && *t6 == IRTemp_INVALID);
416 vassert(t7 && *t7 == IRTemp_INVALID);
417 *t1 = newTempV128();
418 *t2 = newTempV128();
419 *t3 = newTempV128();
420 *t4 = newTempV128();
421 *t5 = newTempV128();
422 *t6 = newTempV128();
423 *t7 = newTempV128();
426 //ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
427 //ZZ IRRoundingMode. */
428 //ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
429 //ZZ {
430 //ZZ return mkU32(Irrm_NEAREST);
431 //ZZ }
432 //ZZ
433 //ZZ /* Generate an expression for SRC rotated right by ROT. */
434 //ZZ static IRExpr* genROR32( IRTemp src, Int rot )
435 //ZZ {
436 //ZZ vassert(rot >= 0 && rot < 32);
437 //ZZ if (rot == 0)
438 //ZZ return mkexpr(src);
439 //ZZ return
440 //ZZ binop(Iop_Or32,
441 //ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
442 //ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
443 //ZZ }
444 //ZZ
445 //ZZ static IRExpr* mkU128 ( ULong i )
446 //ZZ {
447 //ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
448 //ZZ }
449 //ZZ
450 //ZZ /* Generate a 4-aligned version of the given expression if
451 //ZZ the given condition is true. Else return it unchanged. */
452 //ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
453 //ZZ {
454 //ZZ if (b)
455 //ZZ return binop(Iop_And32, e, mkU32(~3));
456 //ZZ else
457 //ZZ return e;
458 //ZZ }
460 /* Other IR construction helpers. */
461 static IROp mkAND ( IRType ty ) {
462 switch (ty) {
463 case Ity_I32: return Iop_And32;
464 case Ity_I64: return Iop_And64;
465 default: vpanic("mkAND");
469 static IROp mkOR ( IRType ty ) {
470 switch (ty) {
471 case Ity_I32: return Iop_Or32;
472 case Ity_I64: return Iop_Or64;
473 default: vpanic("mkOR");
477 static IROp mkXOR ( IRType ty ) {
478 switch (ty) {
479 case Ity_I32: return Iop_Xor32;
480 case Ity_I64: return Iop_Xor64;
481 default: vpanic("mkXOR");
485 static IROp mkSHL ( IRType ty ) {
486 switch (ty) {
487 case Ity_I32: return Iop_Shl32;
488 case Ity_I64: return Iop_Shl64;
489 default: vpanic("mkSHL");
493 static IROp mkSHR ( IRType ty ) {
494 switch (ty) {
495 case Ity_I32: return Iop_Shr32;
496 case Ity_I64: return Iop_Shr64;
497 default: vpanic("mkSHR");
501 static IROp mkSAR ( IRType ty ) {
502 switch (ty) {
503 case Ity_I32: return Iop_Sar32;
504 case Ity_I64: return Iop_Sar64;
505 default: vpanic("mkSAR");
509 static IROp mkNOT ( IRType ty ) {
510 switch (ty) {
511 case Ity_I32: return Iop_Not32;
512 case Ity_I64: return Iop_Not64;
513 default: vpanic("mkNOT");
517 static IROp mkADD ( IRType ty ) {
518 switch (ty) {
519 case Ity_I32: return Iop_Add32;
520 case Ity_I64: return Iop_Add64;
521 default: vpanic("mkADD");
525 static IROp mkSUB ( IRType ty ) {
526 switch (ty) {
527 case Ity_I32: return Iop_Sub32;
528 case Ity_I64: return Iop_Sub64;
529 default: vpanic("mkSUB");
533 static IROp mkADDF ( IRType ty ) {
534 switch (ty) {
535 case Ity_F32: return Iop_AddF32;
536 case Ity_F64: return Iop_AddF64;
537 default: vpanic("mkADDF");
541 static IROp mkFMADDF ( IRType ty ) {
542 switch (ty) {
543 case Ity_F32: return Iop_MAddF32;
544 case Ity_F64: return Iop_MAddF64;
545 default: vpanic("mkFMADDF");
549 static IROp mkFMSUBF ( IRType ty ) {
550 switch (ty) {
551 case Ity_F32: return Iop_MSubF32;
552 case Ity_F64: return Iop_MSubF64;
553 default: vpanic("mkFMSUBF");
557 static IROp mkSUBF ( IRType ty ) {
558 switch (ty) {
559 case Ity_F32: return Iop_SubF32;
560 case Ity_F64: return Iop_SubF64;
561 default: vpanic("mkSUBF");
565 static IROp mkMULF ( IRType ty ) {
566 switch (ty) {
567 case Ity_F32: return Iop_MulF32;
568 case Ity_F64: return Iop_MulF64;
569 default: vpanic("mkMULF");
573 static IROp mkDIVF ( IRType ty ) {
574 switch (ty) {
575 case Ity_F32: return Iop_DivF32;
576 case Ity_F64: return Iop_DivF64;
577 default: vpanic("mkDIVF");
581 static IROp mkNEGF ( IRType ty ) {
582 switch (ty) {
583 case Ity_F32: return Iop_NegF32;
584 case Ity_F64: return Iop_NegF64;
585 default: vpanic("mkNEGF");
589 static IROp mkABSF ( IRType ty ) {
590 switch (ty) {
591 case Ity_F32: return Iop_AbsF32;
592 case Ity_F64: return Iop_AbsF64;
593 default: vpanic("mkABSF");
597 static IROp mkSQRTF ( IRType ty ) {
598 switch (ty) {
599 case Ity_F32: return Iop_SqrtF32;
600 case Ity_F64: return Iop_SqrtF64;
601 default: vpanic("mkSQRTF");
605 static IROp mkVecADD ( UInt size ) {
606 const IROp ops[4]
607 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
608 vassert(size < 4);
609 return ops[size];
612 static IROp mkVecQADDU ( UInt size ) {
613 const IROp ops[4]
614 = { Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2 };
615 vassert(size < 4);
616 return ops[size];
619 static IROp mkVecQADDS ( UInt size ) {
620 const IROp ops[4]
621 = { Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2 };
622 vassert(size < 4);
623 return ops[size];
626 static IROp mkVecQADDEXTSUSATUU ( UInt size ) {
627 const IROp ops[4]
628 = { Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8,
629 Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2 };
630 vassert(size < 4);
631 return ops[size];
634 static IROp mkVecQADDEXTUSSATSS ( UInt size ) {
635 const IROp ops[4]
636 = { Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8,
637 Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2 };
638 vassert(size < 4);
639 return ops[size];
642 static IROp mkVecSUB ( UInt size ) {
643 const IROp ops[4]
644 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
645 vassert(size < 4);
646 return ops[size];
649 static IROp mkVecQSUBU ( UInt size ) {
650 const IROp ops[4]
651 = { Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2 };
652 vassert(size < 4);
653 return ops[size];
656 static IROp mkVecQSUBS ( UInt size ) {
657 const IROp ops[4]
658 = { Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2 };
659 vassert(size < 4);
660 return ops[size];
663 static IROp mkVecSARN ( UInt size ) {
664 const IROp ops[4]
665 = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
666 vassert(size < 4);
667 return ops[size];
670 static IROp mkVecSHRN ( UInt size ) {
671 const IROp ops[4]
672 = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
673 vassert(size < 4);
674 return ops[size];
677 static IROp mkVecSHLN ( UInt size ) {
678 const IROp ops[4]
679 = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
680 vassert(size < 4);
681 return ops[size];
684 static IROp mkVecCATEVENLANES ( UInt size ) {
685 const IROp ops[4]
686 = { Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8,
687 Iop_CatEvenLanes32x4, Iop_InterleaveLO64x2 };
688 vassert(size < 4);
689 return ops[size];
692 static IROp mkVecCATODDLANES ( UInt size ) {
693 const IROp ops[4]
694 = { Iop_CatOddLanes8x16, Iop_CatOddLanes16x8,
695 Iop_CatOddLanes32x4, Iop_InterleaveHI64x2 };
696 vassert(size < 4);
697 return ops[size];
700 static IROp mkVecINTERLEAVELO ( UInt size ) {
701 const IROp ops[4]
702 = { Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
703 Iop_InterleaveLO32x4, Iop_InterleaveLO64x2 };
704 vassert(size < 4);
705 return ops[size];
708 static IROp mkVecINTERLEAVEHI ( UInt size ) {
709 const IROp ops[4]
710 = { Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
711 Iop_InterleaveHI32x4, Iop_InterleaveHI64x2 };
712 vassert(size < 4);
713 return ops[size];
716 static IROp mkVecMAXU ( UInt size ) {
717 const IROp ops[4]
718 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
719 vassert(size < 4);
720 return ops[size];
723 static IROp mkVecMAXS ( UInt size ) {
724 const IROp ops[4]
725 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
726 vassert(size < 4);
727 return ops[size];
730 static IROp mkVecMINU ( UInt size ) {
731 const IROp ops[4]
732 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
733 vassert(size < 4);
734 return ops[size];
737 static IROp mkVecMINS ( UInt size ) {
738 const IROp ops[4]
739 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
740 vassert(size < 4);
741 return ops[size];
744 static IROp mkVecMUL ( UInt size ) {
745 const IROp ops[4]
746 = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
747 vassert(size < 3);
748 return ops[size];
751 static IROp mkVecMULLU ( UInt sizeNarrow ) {
752 const IROp ops[4]
753 = { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2, Iop_INVALID };
754 vassert(sizeNarrow < 3);
755 return ops[sizeNarrow];
758 static IROp mkVecMULLS ( UInt sizeNarrow ) {
759 const IROp ops[4]
760 = { Iop_Mull8Sx8, Iop_Mull16Sx4, Iop_Mull32Sx2, Iop_INVALID };
761 vassert(sizeNarrow < 3);
762 return ops[sizeNarrow];
765 static IROp mkVecQDMULLS ( UInt sizeNarrow ) {
766 const IROp ops[4]
767 = { Iop_INVALID, Iop_QDMull16Sx4, Iop_QDMull32Sx2, Iop_INVALID };
768 vassert(sizeNarrow < 3);
769 return ops[sizeNarrow];
772 static IROp mkVecCMPEQ ( UInt size ) {
773 const IROp ops[4]
774 = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 };
775 vassert(size < 4);
776 return ops[size];
779 static IROp mkVecCMPGTU ( UInt size ) {
780 const IROp ops[4]
781 = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
782 vassert(size < 4);
783 return ops[size];
786 static IROp mkVecCMPGTS ( UInt size ) {
787 const IROp ops[4]
788 = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
789 vassert(size < 4);
790 return ops[size];
793 static IROp mkVecABS ( UInt size ) {
794 const IROp ops[4]
795 = { Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2 };
796 vassert(size < 4);
797 return ops[size];
800 static IROp mkVecZEROHIxxOFV128 ( UInt size ) {
801 const IROp ops[4]
802 = { Iop_ZeroHI120ofV128, Iop_ZeroHI112ofV128,
803 Iop_ZeroHI96ofV128, Iop_ZeroHI64ofV128 };
804 vassert(size < 4);
805 return ops[size];
808 static IRExpr* mkU ( IRType ty, ULong imm ) {
809 switch (ty) {
810 case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
811 case Ity_I64: return mkU64(imm);
812 default: vpanic("mkU");
816 static IROp mkVecQDMULHIS ( UInt size ) {
817 const IROp ops[4]
818 = { Iop_INVALID, Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, Iop_INVALID };
819 vassert(size < 4);
820 return ops[size];
823 static IROp mkVecQRDMULHIS ( UInt size ) {
824 const IROp ops[4]
825 = { Iop_INVALID, Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, Iop_INVALID };
826 vassert(size < 4);
827 return ops[size];
830 static IROp mkVecQANDUQSH ( UInt size ) {
831 const IROp ops[4]
832 = { Iop_QandUQsh8x16, Iop_QandUQsh16x8,
833 Iop_QandUQsh32x4, Iop_QandUQsh64x2 };
834 vassert(size < 4);
835 return ops[size];
838 static IROp mkVecQANDSQSH ( UInt size ) {
839 const IROp ops[4]
840 = { Iop_QandSQsh8x16, Iop_QandSQsh16x8,
841 Iop_QandSQsh32x4, Iop_QandSQsh64x2 };
842 vassert(size < 4);
843 return ops[size];
846 static IROp mkVecQANDUQRSH ( UInt size ) {
847 const IROp ops[4]
848 = { Iop_QandUQRsh8x16, Iop_QandUQRsh16x8,
849 Iop_QandUQRsh32x4, Iop_QandUQRsh64x2 };
850 vassert(size < 4);
851 return ops[size];
854 static IROp mkVecQANDSQRSH ( UInt size ) {
855 const IROp ops[4]
856 = { Iop_QandSQRsh8x16, Iop_QandSQRsh16x8,
857 Iop_QandSQRsh32x4, Iop_QandSQRsh64x2 };
858 vassert(size < 4);
859 return ops[size];
862 static IROp mkVecSHU ( UInt size ) {
863 const IROp ops[4]
864 = { Iop_Sh8Ux16, Iop_Sh16Ux8, Iop_Sh32Ux4, Iop_Sh64Ux2 };
865 vassert(size < 4);
866 return ops[size];
869 static IROp mkVecSHS ( UInt size ) {
870 const IROp ops[4]
871 = { Iop_Sh8Sx16, Iop_Sh16Sx8, Iop_Sh32Sx4, Iop_Sh64Sx2 };
872 vassert(size < 4);
873 return ops[size];
876 static IROp mkVecRSHU ( UInt size ) {
877 const IROp ops[4]
878 = { Iop_Rsh8Ux16, Iop_Rsh16Ux8, Iop_Rsh32Ux4, Iop_Rsh64Ux2 };
879 vassert(size < 4);
880 return ops[size];
883 static IROp mkVecRSHS ( UInt size ) {
884 const IROp ops[4]
885 = { Iop_Rsh8Sx16, Iop_Rsh16Sx8, Iop_Rsh32Sx4, Iop_Rsh64Sx2 };
886 vassert(size < 4);
887 return ops[size];
890 static IROp mkVecNARROWUN ( UInt sizeNarrow ) {
891 const IROp ops[4]
892 = { Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4,
893 Iop_NarrowUn64to32x2, Iop_INVALID };
894 vassert(sizeNarrow < 4);
895 return ops[sizeNarrow];
898 static IROp mkVecQNARROWUNSU ( UInt sizeNarrow ) {
899 const IROp ops[4]
900 = { Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4,
901 Iop_QNarrowUn64Sto32Ux2, Iop_INVALID };
902 vassert(sizeNarrow < 4);
903 return ops[sizeNarrow];
906 static IROp mkVecQNARROWUNSS ( UInt sizeNarrow ) {
907 const IROp ops[4]
908 = { Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4,
909 Iop_QNarrowUn64Sto32Sx2, Iop_INVALID };
910 vassert(sizeNarrow < 4);
911 return ops[sizeNarrow];
914 static IROp mkVecQNARROWUNUU ( UInt sizeNarrow ) {
915 const IROp ops[4]
916 = { Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4,
917 Iop_QNarrowUn64Uto32Ux2, Iop_INVALID };
918 vassert(sizeNarrow < 4);
919 return ops[sizeNarrow];
922 static IROp mkVecQANDqshrNNARROWUU ( UInt sizeNarrow ) {
923 const IROp ops[4]
924 = { Iop_QandQShrNnarrow16Uto8Ux8, Iop_QandQShrNnarrow32Uto16Ux4,
925 Iop_QandQShrNnarrow64Uto32Ux2, Iop_INVALID };
926 vassert(sizeNarrow < 4);
927 return ops[sizeNarrow];
930 static IROp mkVecQANDqsarNNARROWSS ( UInt sizeNarrow ) {
931 const IROp ops[4]
932 = { Iop_QandQSarNnarrow16Sto8Sx8, Iop_QandQSarNnarrow32Sto16Sx4,
933 Iop_QandQSarNnarrow64Sto32Sx2, Iop_INVALID };
934 vassert(sizeNarrow < 4);
935 return ops[sizeNarrow];
938 static IROp mkVecQANDqsarNNARROWSU ( UInt sizeNarrow ) {
939 const IROp ops[4]
940 = { Iop_QandQSarNnarrow16Sto8Ux8, Iop_QandQSarNnarrow32Sto16Ux4,
941 Iop_QandQSarNnarrow64Sto32Ux2, Iop_INVALID };
942 vassert(sizeNarrow < 4);
943 return ops[sizeNarrow];
946 static IROp mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow ) {
947 const IROp ops[4]
948 = { Iop_QandQRShrNnarrow16Uto8Ux8, Iop_QandQRShrNnarrow32Uto16Ux4,
949 Iop_QandQRShrNnarrow64Uto32Ux2, Iop_INVALID };
950 vassert(sizeNarrow < 4);
951 return ops[sizeNarrow];
954 static IROp mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow ) {
955 const IROp ops[4]
956 = { Iop_QandQRSarNnarrow16Sto8Sx8, Iop_QandQRSarNnarrow32Sto16Sx4,
957 Iop_QandQRSarNnarrow64Sto32Sx2, Iop_INVALID };
958 vassert(sizeNarrow < 4);
959 return ops[sizeNarrow];
962 static IROp mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow ) {
963 const IROp ops[4]
964 = { Iop_QandQRSarNnarrow16Sto8Ux8, Iop_QandQRSarNnarrow32Sto16Ux4,
965 Iop_QandQRSarNnarrow64Sto32Ux2, Iop_INVALID };
966 vassert(sizeNarrow < 4);
967 return ops[sizeNarrow];
970 static IROp mkVecQSHLNSATUU ( UInt size ) {
971 const IROp ops[4]
972 = { Iop_QShlNsatUU8x16, Iop_QShlNsatUU16x8,
973 Iop_QShlNsatUU32x4, Iop_QShlNsatUU64x2 };
974 vassert(size < 4);
975 return ops[size];
978 static IROp mkVecQSHLNSATSS ( UInt size ) {
979 const IROp ops[4]
980 = { Iop_QShlNsatSS8x16, Iop_QShlNsatSS16x8,
981 Iop_QShlNsatSS32x4, Iop_QShlNsatSS64x2 };
982 vassert(size < 4);
983 return ops[size];
986 static IROp mkVecQSHLNSATSU ( UInt size ) {
987 const IROp ops[4]
988 = { Iop_QShlNsatSU8x16, Iop_QShlNsatSU16x8,
989 Iop_QShlNsatSU32x4, Iop_QShlNsatSU64x2 };
990 vassert(size < 4);
991 return ops[size];
994 static IROp mkVecADDF ( UInt size ) {
995 const IROp ops[4]
996 = { Iop_INVALID, Iop_Add16Fx8, Iop_Add32Fx4, Iop_Add64Fx2 };
997 vassert(size < 4);
998 return ops[size];
1001 static IROp mkVecMAXF ( UInt size ) {
1002 const IROp ops[4]
1003 = { Iop_INVALID, Iop_INVALID, Iop_Max32Fx4, Iop_Max64Fx2 };
1004 vassert(size < 4);
1005 return ops[size];
1008 static IROp mkVecMINF ( UInt size ) {
1009 const IROp ops[4]
1010 = { Iop_INVALID, Iop_INVALID, Iop_Min32Fx4, Iop_Min64Fx2 };
1011 vassert(size < 4);
1012 return ops[size];
1015 /* Generate IR to create 'arg rotated right by imm', for sane values
1016 of 'ty' and 'imm'. */
1017 static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
1019 UInt w = 0;
1020 if (ty == Ity_I64) {
1021 w = 64;
1022 } else {
1023 vassert(ty == Ity_I32);
1024 w = 32;
1026 vassert(w != 0);
1027 vassert(imm < w);
1028 if (imm == 0) {
1029 return arg;
1031 IRTemp res = newTemp(ty);
1032 assign(res, binop(mkOR(ty),
1033 binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
1034 binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
1035 return res;
1038 /* Generate IR to set the returned temp to either all-zeroes or
1039 all ones, as a copy of arg<imm>. */
1040 static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
1042 UInt w = 0;
1043 if (ty == Ity_I64) {
1044 w = 64;
1045 } else {
1046 vassert(ty == Ity_I32);
1047 w = 32;
1049 vassert(w != 0);
1050 vassert(imm < w);
1051 IRTemp res = newTemp(ty);
1052 assign(res, binop(mkSAR(ty),
1053 binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
1054 mkU8(w - 1)));
1055 return res;
1058 /* S-widen 8/16/32/64 bit int expr to 64. */
1059 static IRExpr* widenSto64 ( IRType srcTy, IRExpr* e )
1061 switch (srcTy) {
1062 case Ity_I64: return e;
1063 case Ity_I32: return unop(Iop_32Sto64, e);
1064 case Ity_I16: return unop(Iop_16Sto64, e);
1065 case Ity_I8: return unop(Iop_8Sto64, e);
1066 default: vpanic("widenSto64(arm64)");
1070 /* U-widen 8/16/32/64 bit int expr to 64. */
1071 static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
1073 switch (srcTy) {
1074 case Ity_I64: return e;
1075 case Ity_I32: return unop(Iop_32Uto64, e);
1076 case Ity_I16: return unop(Iop_16Uto64, e);
1077 case Ity_I8: return unop(Iop_8Uto64, e);
1078 default: vpanic("widenUto64(arm64)");
1082 /* Narrow 64 bit int expr to 8/16/32/64. Clearly only some
1083 of these combinations make sense. */
1084 static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
1086 switch (dstTy) {
1087 case Ity_I64: return e;
1088 case Ity_I32: return unop(Iop_64to32, e);
1089 case Ity_I16: return unop(Iop_64to16, e);
1090 case Ity_I8: return unop(Iop_64to8, e);
1091 default: vpanic("narrowFrom64(arm64)");
1096 /*------------------------------------------------------------*/
1097 /*--- Helpers for accessing guest registers. ---*/
1098 /*------------------------------------------------------------*/
1100 #define OFFB_X0 offsetof(VexGuestARM64State,guest_X0)
1101 #define OFFB_X1 offsetof(VexGuestARM64State,guest_X1)
1102 #define OFFB_X2 offsetof(VexGuestARM64State,guest_X2)
1103 #define OFFB_X3 offsetof(VexGuestARM64State,guest_X3)
1104 #define OFFB_X4 offsetof(VexGuestARM64State,guest_X4)
1105 #define OFFB_X5 offsetof(VexGuestARM64State,guest_X5)
1106 #define OFFB_X6 offsetof(VexGuestARM64State,guest_X6)
1107 #define OFFB_X7 offsetof(VexGuestARM64State,guest_X7)
1108 #define OFFB_X8 offsetof(VexGuestARM64State,guest_X8)
1109 #define OFFB_X9 offsetof(VexGuestARM64State,guest_X9)
1110 #define OFFB_X10 offsetof(VexGuestARM64State,guest_X10)
1111 #define OFFB_X11 offsetof(VexGuestARM64State,guest_X11)
1112 #define OFFB_X12 offsetof(VexGuestARM64State,guest_X12)
1113 #define OFFB_X13 offsetof(VexGuestARM64State,guest_X13)
1114 #define OFFB_X14 offsetof(VexGuestARM64State,guest_X14)
1115 #define OFFB_X15 offsetof(VexGuestARM64State,guest_X15)
1116 #define OFFB_X16 offsetof(VexGuestARM64State,guest_X16)
1117 #define OFFB_X17 offsetof(VexGuestARM64State,guest_X17)
1118 #define OFFB_X18 offsetof(VexGuestARM64State,guest_X18)
1119 #define OFFB_X19 offsetof(VexGuestARM64State,guest_X19)
1120 #define OFFB_X20 offsetof(VexGuestARM64State,guest_X20)
1121 #define OFFB_X21 offsetof(VexGuestARM64State,guest_X21)
1122 #define OFFB_X22 offsetof(VexGuestARM64State,guest_X22)
1123 #define OFFB_X23 offsetof(VexGuestARM64State,guest_X23)
1124 #define OFFB_X24 offsetof(VexGuestARM64State,guest_X24)
1125 #define OFFB_X25 offsetof(VexGuestARM64State,guest_X25)
1126 #define OFFB_X26 offsetof(VexGuestARM64State,guest_X26)
1127 #define OFFB_X27 offsetof(VexGuestARM64State,guest_X27)
1128 #define OFFB_X28 offsetof(VexGuestARM64State,guest_X28)
1129 #define OFFB_X29 offsetof(VexGuestARM64State,guest_X29)
1130 #define OFFB_X30 offsetof(VexGuestARM64State,guest_X30)
1132 #define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP)
1133 #define OFFB_PC offsetof(VexGuestARM64State,guest_PC)
1135 #define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP)
1136 #define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1)
1137 #define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2)
1138 #define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP)
1140 #define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
1141 #define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR)
1143 #define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0)
1144 #define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1)
1145 #define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2)
1146 #define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3)
1147 #define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4)
1148 #define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5)
1149 #define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6)
1150 #define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7)
1151 #define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8)
1152 #define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9)
1153 #define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10)
1154 #define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11)
1155 #define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12)
1156 #define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13)
1157 #define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14)
1158 #define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15)
1159 #define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16)
1160 #define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17)
1161 #define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18)
1162 #define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19)
1163 #define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20)
1164 #define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21)
1165 #define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22)
1166 #define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23)
1167 #define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24)
1168 #define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25)
1169 #define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26)
1170 #define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27)
1171 #define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28)
1172 #define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29)
1173 #define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30)
1174 #define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31)
1176 #define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR)
1177 #define OFFB_QCFLAG offsetof(VexGuestARM64State,guest_QCFLAG)
1179 #define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART)
1180 #define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN)
1182 #define OFFB_LLSC_SIZE offsetof(VexGuestARM64State,guest_LLSC_SIZE)
1183 #define OFFB_LLSC_ADDR offsetof(VexGuestARM64State,guest_LLSC_ADDR)
1184 #define OFFB_LLSC_DATA offsetof(VexGuestARM64State,guest_LLSC_DATA)
1187 /* ---------------- Integer registers ---------------- */
1189 static Int offsetIReg64 ( UInt iregNo )
1191 /* Do we care about endianness here? We do if sub-parts of integer
1192 registers are accessed. */
1193 switch (iregNo) {
1194 case 0: return OFFB_X0;
1195 case 1: return OFFB_X1;
1196 case 2: return OFFB_X2;
1197 case 3: return OFFB_X3;
1198 case 4: return OFFB_X4;
1199 case 5: return OFFB_X5;
1200 case 6: return OFFB_X6;
1201 case 7: return OFFB_X7;
1202 case 8: return OFFB_X8;
1203 case 9: return OFFB_X9;
1204 case 10: return OFFB_X10;
1205 case 11: return OFFB_X11;
1206 case 12: return OFFB_X12;
1207 case 13: return OFFB_X13;
1208 case 14: return OFFB_X14;
1209 case 15: return OFFB_X15;
1210 case 16: return OFFB_X16;
1211 case 17: return OFFB_X17;
1212 case 18: return OFFB_X18;
1213 case 19: return OFFB_X19;
1214 case 20: return OFFB_X20;
1215 case 21: return OFFB_X21;
1216 case 22: return OFFB_X22;
1217 case 23: return OFFB_X23;
1218 case 24: return OFFB_X24;
1219 case 25: return OFFB_X25;
1220 case 26: return OFFB_X26;
1221 case 27: return OFFB_X27;
1222 case 28: return OFFB_X28;
1223 case 29: return OFFB_X29;
1224 case 30: return OFFB_X30;
1225 /* but not 31 */
1226 default: vassert(0);
1230 static Int offsetIReg64orSP ( UInt iregNo )
1232 return iregNo == 31 ? OFFB_XSP : offsetIReg64(iregNo);
1235 static const HChar* nameIReg64orZR ( UInt iregNo )
1237 vassert(iregNo < 32);
1238 static const HChar* names[32]
1239 = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
1240 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
1241 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
1242 "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
1243 return names[iregNo];
1246 static const HChar* nameIReg64orSP ( UInt iregNo )
1248 if (iregNo == 31) {
1249 return "sp";
1251 vassert(iregNo < 31);
1252 return nameIReg64orZR(iregNo);
1255 static IRExpr* getIReg64orSP ( UInt iregNo )
1257 vassert(iregNo < 32);
1258 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1261 static IRExpr* getIReg64orZR ( UInt iregNo )
1263 if (iregNo == 31) {
1264 return mkU64(0);
1266 vassert(iregNo < 31);
1267 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1270 static void putIReg64orSP ( UInt iregNo, IRExpr* e )
1272 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1273 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1276 static void putIReg64orZR ( UInt iregNo, IRExpr* e )
1278 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1279 if (iregNo == 31) {
1280 return;
1282 vassert(iregNo < 31);
1283 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1286 static const HChar* nameIReg32orZR ( UInt iregNo )
1288 vassert(iregNo < 32);
1289 static const HChar* names[32]
1290 = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
1291 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
1292 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
1293 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
1294 return names[iregNo];
1297 static const HChar* nameIReg32orSP ( UInt iregNo )
1299 if (iregNo == 31) {
1300 return "wsp";
1302 vassert(iregNo < 31);
1303 return nameIReg32orZR(iregNo);
1306 static IRExpr* getIReg32orSP ( UInt iregNo )
1308 vassert(iregNo < 32);
1309 return unop(Iop_64to32,
1310 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1313 static IRExpr* getIReg32orZR ( UInt iregNo )
1315 if (iregNo == 31) {
1316 return mkU32(0);
1318 vassert(iregNo < 31);
1319 return unop(Iop_64to32,
1320 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1323 static void putIReg32orSP ( UInt iregNo, IRExpr* e )
1325 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1326 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1329 static void putIReg32orZR ( UInt iregNo, IRExpr* e )
1331 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1332 if (iregNo == 31) {
1333 return;
1335 vassert(iregNo < 31);
1336 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1339 static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
1341 vassert(is64 == True || is64 == False);
1342 return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
1345 static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
1347 vassert(is64 == True || is64 == False);
1348 return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
1351 static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
1353 vassert(is64 == True || is64 == False);
1354 return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
1357 static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
1359 vassert(is64 == True || is64 == False);
1360 if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
1363 static void putPC ( IRExpr* e )
1365 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1366 stmt( IRStmt_Put(OFFB_PC, e) );
1370 /* ---------------- Vector (Q) registers ---------------- */
1372 static Int offsetQReg128 ( UInt qregNo )
1374 /* We don't care about endianness at this point. It only becomes
1375 relevant when dealing with sections of these registers.*/
1376 switch (qregNo) {
1377 case 0: return OFFB_Q0;
1378 case 1: return OFFB_Q1;
1379 case 2: return OFFB_Q2;
1380 case 3: return OFFB_Q3;
1381 case 4: return OFFB_Q4;
1382 case 5: return OFFB_Q5;
1383 case 6: return OFFB_Q6;
1384 case 7: return OFFB_Q7;
1385 case 8: return OFFB_Q8;
1386 case 9: return OFFB_Q9;
1387 case 10: return OFFB_Q10;
1388 case 11: return OFFB_Q11;
1389 case 12: return OFFB_Q12;
1390 case 13: return OFFB_Q13;
1391 case 14: return OFFB_Q14;
1392 case 15: return OFFB_Q15;
1393 case 16: return OFFB_Q16;
1394 case 17: return OFFB_Q17;
1395 case 18: return OFFB_Q18;
1396 case 19: return OFFB_Q19;
1397 case 20: return OFFB_Q20;
1398 case 21: return OFFB_Q21;
1399 case 22: return OFFB_Q22;
1400 case 23: return OFFB_Q23;
1401 case 24: return OFFB_Q24;
1402 case 25: return OFFB_Q25;
1403 case 26: return OFFB_Q26;
1404 case 27: return OFFB_Q27;
1405 case 28: return OFFB_Q28;
1406 case 29: return OFFB_Q29;
1407 case 30: return OFFB_Q30;
1408 case 31: return OFFB_Q31;
1409 default: vassert(0);
1413 /* Write to a complete Qreg. */
1414 static void putQReg128 ( UInt qregNo, IRExpr* e )
1416 vassert(qregNo < 32);
1417 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
1418 stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
1421 /* Read a complete Qreg. */
1422 static IRExpr* getQReg128 ( UInt qregNo )
1424 vassert(qregNo < 32);
1425 return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
1428 /* Produce the IR type for some sub-part of a vector. For 32- and 64-
1429 bit sub-parts we can choose either integer or float types, and
1430 choose float on the basis that that is the common use case and so
1431 will give least interference with Put-to-Get forwarding later
1432 on. */
1433 static IRType preferredVectorSubTypeFromSize ( UInt szB )
1435 switch (szB) {
1436 case 1: return Ity_I8;
1437 case 2: return Ity_I16;
1438 case 4: return Ity_I32; //Ity_F32;
1439 case 8: return Ity_F64;
1440 case 16: return Ity_V128;
1441 default: vassert(0);
1445 /* Find the offset of the laneNo'th lane of type laneTy in the given
1446 Qreg. Since the host is little-endian, the least significant lane
1447 has the lowest offset. */
1448 static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
1450 vassert(host_endness == VexEndnessLE);
1451 Int base = offsetQReg128(qregNo);
1452 /* Since the host is little-endian, the least significant lane
1453 will be at the lowest address. */
1454 /* Restrict this to known types, so as to avoid silently accepting
1455 stupid types. */
1456 UInt laneSzB = 0;
1457 switch (laneTy) {
1458 case Ity_I8: laneSzB = 1; break;
1459 case Ity_F16: case Ity_I16: laneSzB = 2; break;
1460 case Ity_F32: case Ity_I32: laneSzB = 4; break;
1461 case Ity_F64: case Ity_I64: laneSzB = 8; break;
1462 case Ity_V128: laneSzB = 16; break;
1463 default: break;
1465 vassert(laneSzB > 0);
1466 UInt minOff = laneNo * laneSzB;
1467 UInt maxOff = minOff + laneSzB - 1;
1468 vassert(maxOff < 16);
1469 return base + minOff;
1472 /* Put to the least significant lane of a Qreg. */
1473 static void putQRegLO ( UInt qregNo, IRExpr* e )
1475 IRType ty = typeOfIRExpr(irsb->tyenv, e);
1476 Int off = offsetQRegLane(qregNo, ty, 0);
1477 switch (ty) {
1478 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
1479 case Ity_F16: case Ity_F32: case Ity_F64: case Ity_V128:
1480 break;
1481 default:
1482 vassert(0); // Other cases are probably invalid
1484 stmt(IRStmt_Put(off, e));
1487 /* Get from the least significant lane of a Qreg. */
1488 static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
1490 Int off = offsetQRegLane(qregNo, ty, 0);
1491 switch (ty) {
1492 case Ity_I8:
1493 case Ity_F16: case Ity_I16:
1494 case Ity_I32: case Ity_I64:
1495 case Ity_F32: case Ity_F64: case Ity_V128:
1496 break;
1497 default:
1498 vassert(0); // Other cases are ATC
1500 return IRExpr_Get(off, ty);
1503 static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
1505 static const HChar* namesQ[32]
1506 = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1507 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
1508 "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
1509 "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
1510 static const HChar* namesD[32]
1511 = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
1512 "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
1513 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
1514 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
1515 static const HChar* namesS[32]
1516 = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
1517 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
1518 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
1519 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
1520 static const HChar* namesH[32]
1521 = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
1522 "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
1523 "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
1524 "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
1525 static const HChar* namesB[32]
1526 = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
1527 "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
1528 "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
1529 "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
1530 vassert(qregNo < 32);
1531 switch (sizeofIRType(laneTy)) {
1532 case 1: return namesB[qregNo];
1533 case 2: return namesH[qregNo];
1534 case 4: return namesS[qregNo];
1535 case 8: return namesD[qregNo];
1536 case 16: return namesQ[qregNo];
1537 default: vassert(0);
1539 /*NOTREACHED*/
1542 static const HChar* nameQReg128 ( UInt qregNo )
1544 return nameQRegLO(qregNo, Ity_V128);
1547 /* Find the offset of the most significant half (8 bytes) of the given
1548 Qreg. This requires knowing the endianness of the host. */
1549 static Int offsetQRegHI64 ( UInt qregNo )
1551 return offsetQRegLane(qregNo, Ity_I64, 1);
1554 static IRExpr* getQRegHI64 ( UInt qregNo )
1556 return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
1559 static void putQRegHI64 ( UInt qregNo, IRExpr* e )
1561 IRType ty = typeOfIRExpr(irsb->tyenv, e);
1562 Int off = offsetQRegHI64(qregNo);
1563 switch (ty) {
1564 case Ity_I64: case Ity_F64:
1565 break;
1566 default:
1567 vassert(0); // Other cases are plain wrong
1569 stmt(IRStmt_Put(off, e));
1572 /* Put to a specified lane of a Qreg. */
1573 static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
1575 IRType laneTy = typeOfIRExpr(irsb->tyenv, e);
1576 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1577 switch (laneTy) {
1578 case Ity_F64: case Ity_I64:
1579 case Ity_I32: case Ity_F32:
1580 case Ity_I16: case Ity_F16:
1581 case Ity_I8:
1582 break;
1583 default:
1584 vassert(0); // Other cases are ATC
1586 stmt(IRStmt_Put(off, e));
1589 /* Get from a specified lane of a Qreg. */
1590 static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
1592 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1593 switch (laneTy) {
1594 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
1595 case Ity_F64: case Ity_F32: case Ity_F16:
1596 break;
1597 default:
1598 vassert(0); // Other cases are ATC
1600 return IRExpr_Get(off, laneTy);
1604 //ZZ /* ---------------- Misc registers ---------------- */
1605 //ZZ
1606 //ZZ static void putMiscReg32 ( UInt gsoffset,
1607 //ZZ IRExpr* e, /* :: Ity_I32 */
1608 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
1609 //ZZ {
1610 //ZZ switch (gsoffset) {
1611 //ZZ case OFFB_FPSCR: break;
1612 //ZZ case OFFB_QFLAG32: break;
1613 //ZZ case OFFB_GEFLAG0: break;
1614 //ZZ case OFFB_GEFLAG1: break;
1615 //ZZ case OFFB_GEFLAG2: break;
1616 //ZZ case OFFB_GEFLAG3: break;
1617 //ZZ default: vassert(0); /* awaiting more cases */
1618 //ZZ }
1619 //ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1620 //ZZ
1621 //ZZ if (guardT == IRTemp_INVALID) {
1622 //ZZ /* unconditional write */
1623 //ZZ stmt(IRStmt_Put(gsoffset, e));
1624 //ZZ } else {
1625 //ZZ stmt(IRStmt_Put(
1626 //ZZ gsoffset,
1627 //ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
1628 //ZZ e, IRExpr_Get(gsoffset, Ity_I32) )
1629 //ZZ ));
1630 //ZZ }
1631 //ZZ }
1632 //ZZ
1633 //ZZ static IRTemp get_ITSTATE ( void )
1634 //ZZ {
1635 //ZZ ASSERT_IS_THUMB;
1636 //ZZ IRTemp t = newTemp(Ity_I32);
1637 //ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
1638 //ZZ return t;
1639 //ZZ }
1640 //ZZ
1641 //ZZ static void put_ITSTATE ( IRTemp t )
1642 //ZZ {
1643 //ZZ ASSERT_IS_THUMB;
1644 //ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
1645 //ZZ }
1646 //ZZ
1647 //ZZ static IRTemp get_QFLAG32 ( void )
1648 //ZZ {
1649 //ZZ IRTemp t = newTemp(Ity_I32);
1650 //ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
1651 //ZZ return t;
1652 //ZZ }
1653 //ZZ
1654 //ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
1655 //ZZ {
1656 //ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
1657 //ZZ }
1658 //ZZ
1659 //ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
1660 //ZZ Status Register) to indicate that overflow or saturation occurred.
1661 //ZZ Nb: t must be zero to denote no saturation, and any nonzero
1662 //ZZ value to indicate saturation. */
1663 //ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
1664 //ZZ {
1665 //ZZ IRTemp old = get_QFLAG32();
1666 //ZZ IRTemp nyu = newTemp(Ity_I32);
1667 //ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
1668 //ZZ put_QFLAG32(nyu, condT);
1669 //ZZ }
1672 /* ---------------- FPCR stuff ---------------- */
1674 /* Generate IR to get hold of the rounding mode bits in FPCR, and
1675 convert them to IR format. Bind the final result to the
1676 returned temp. */
1677 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1679 /* The ARMvfp encoding for rounding mode bits is:
1680 00 to nearest
1681 01 to +infinity
1682 10 to -infinity
1683 11 to zero
1684 We need to convert that to the IR encoding:
1685 00 to nearest (the default)
1686 10 to +infinity
1687 01 to -infinity
1688 11 to zero
1689 Which can be done by swapping bits 0 and 1.
1690 The rmode bits are at 23:22 in FPSCR.
1692 IRTemp armEncd = newTemp(Ity_I32);
1693 IRTemp swapped = newTemp(Ity_I32);
1694 /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that
1695 we don't zero out bits 24 and above, since the assignment to
1696 'swapped' will mask them out anyway. */
1697 assign(armEncd,
1698 binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
1699 /* Now swap them. */
1700 assign(swapped,
1701 binop(Iop_Or32,
1702 binop(Iop_And32,
1703 binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1704 mkU32(2)),
1705 binop(Iop_And32,
1706 binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1707 mkU32(1))
1709 return swapped;
1713 /*------------------------------------------------------------*/
1714 /*--- Helpers for flag handling and conditional insns ---*/
1715 /*------------------------------------------------------------*/
1717 static const HChar* nameARM64Condcode ( ARM64Condcode cond )
1719 switch (cond) {
1720 case ARM64CondEQ: return "eq";
1721 case ARM64CondNE: return "ne";
1722 case ARM64CondCS: return "cs"; // or 'hs'
1723 case ARM64CondCC: return "cc"; // or 'lo'
1724 case ARM64CondMI: return "mi";
1725 case ARM64CondPL: return "pl";
1726 case ARM64CondVS: return "vs";
1727 case ARM64CondVC: return "vc";
1728 case ARM64CondHI: return "hi";
1729 case ARM64CondLS: return "ls";
1730 case ARM64CondGE: return "ge";
1731 case ARM64CondLT: return "lt";
1732 case ARM64CondGT: return "gt";
1733 case ARM64CondLE: return "le";
1734 case ARM64CondAL: return "al";
1735 case ARM64CondNV: return "nv";
1736 default: vpanic("name_ARM64Condcode");
1740 /* and a handy shorthand for it */
1741 static const HChar* nameCC ( ARM64Condcode cond ) {
1742 return nameARM64Condcode(cond);
1746 /* Build IR to calculate some particular condition from stored
1747 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1748 Ity_I64, suitable for narrowing. Although the return type is
1749 Ity_I64, the returned value is either 0 or 1. 'cond' must be
1750 :: Ity_I64 and must denote the condition to compute in
1751 bits 7:4, and be zero everywhere else.
1753 static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
1755 vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
1756 /* And 'cond' had better produce a value in which only bits 7:4 are
1757 nonzero. However, obviously we can't assert for that. */
1759 /* So what we're constructing for the first argument is
1760 "(cond << 4) | stored-operation".
1761 However, as per comments above, 'cond' must be supplied
1762 pre-shifted to this function.
1764 This pairing scheme requires that the ARM64_CC_OP_ values all fit
1765 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest
1766 8 bits of the first argument. */
1767 IRExpr** args
1768 = mkIRExprVec_4(
1769 binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
1770 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1771 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1772 IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
1774 IRExpr* call
1775 = mkIRExprCCall(
1776 Ity_I64,
1777 0/*regparm*/,
1778 "arm64g_calculate_condition", &arm64g_calculate_condition,
1779 args
1782 /* Exclude the requested condition, OP and NDEP from definedness
1783 checking. We're only interested in DEP1 and DEP2. */
1784 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1785 return call;
1789 /* Build IR to calculate some particular condition from stored
1790 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1791 Ity_I64, suitable for narrowing. Although the return type is
1792 Ity_I64, the returned value is either 0 or 1.
1794 static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
1796 /* First arg is "(cond << 4) | condition". This requires that the
1797 ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a
1798 (COND, OP) pair in the lowest 8 bits of the first argument. */
1799 vassert(cond >= 0 && cond <= 15);
1800 return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
1804 /* Build IR to calculate just the carry flag from stored
1805 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1806 Ity_I64. */
1807 static IRExpr* mk_arm64g_calculate_flag_c ( void )
1809 IRExpr** args
1810 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1811 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1812 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1813 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1814 IRExpr* call
1815 = mkIRExprCCall(
1816 Ity_I64,
1817 0/*regparm*/,
1818 "arm64g_calculate_flag_c", &arm64g_calculate_flag_c,
1819 args
1821 /* Exclude OP and NDEP from definedness checking. We're only
1822 interested in DEP1 and DEP2. */
1823 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1824 return call;
1828 //ZZ /* Build IR to calculate just the overflow flag from stored
1829 //ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1830 //ZZ Ity_I32. */
1831 //ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
1832 //ZZ {
1833 //ZZ IRExpr** args
1834 //ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
1835 //ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1836 //ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1837 //ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1838 //ZZ IRExpr* call
1839 //ZZ = mkIRExprCCall(
1840 //ZZ Ity_I32,
1841 //ZZ 0/*regparm*/,
1842 //ZZ "armg_calculate_flag_v", &armg_calculate_flag_v,
1843 //ZZ args
1844 //ZZ );
1845 //ZZ /* Exclude OP and NDEP from definedness checking. We're only
1846 //ZZ interested in DEP1 and DEP2. */
1847 //ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1848 //ZZ return call;
1849 //ZZ }
1852 /* Build IR to calculate N Z C V in bits 31:28 of the
1853 returned word. */
1854 static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
1856 IRExpr** args
1857 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1858 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1859 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1860 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1861 IRExpr* call
1862 = mkIRExprCCall(
1863 Ity_I64,
1864 0/*regparm*/,
1865 "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
1866 args
1868 /* Exclude OP and NDEP from definedness checking. We're only
1869 interested in DEP1 and DEP2. */
1870 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1871 return call;
1875 /* Build IR to set the flags thunk, in the most general case. */
1876 static
1877 void setFlags_D1_D2_ND ( UInt cc_op,
1878 IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
1880 vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
1881 vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
1882 vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
1883 vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
1884 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) ));
1885 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1886 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1887 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1890 /* Build IR to set the flags thunk after ADD or SUB. */
1891 static
1892 void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
1894 IRTemp argL64 = IRTemp_INVALID;
1895 IRTemp argR64 = IRTemp_INVALID;
1896 IRTemp z64 = newTemp(Ity_I64);
1897 if (is64) {
1898 argL64 = argL;
1899 argR64 = argR;
1900 } else {
1901 argL64 = newTemp(Ity_I64);
1902 argR64 = newTemp(Ity_I64);
1903 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1904 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1906 assign(z64, mkU64(0));
1907 UInt cc_op = ARM64G_CC_OP_NUMBER;
1908 /**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; }
1909 else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
1910 else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; }
1911 else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
1912 else { vassert(0); }
1913 setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
1916 /* Build IR to set the flags thunk after ADC or SBC. */
1917 static
1918 void setFlags_ADC_SBC ( Bool is64, Bool isSBC,
1919 IRTemp argL, IRTemp argR, IRTemp oldC )
1921 IRTemp argL64 = IRTemp_INVALID;
1922 IRTemp argR64 = IRTemp_INVALID;
1923 IRTemp oldC64 = IRTemp_INVALID;
1924 if (is64) {
1925 argL64 = argL;
1926 argR64 = argR;
1927 oldC64 = oldC;
1928 } else {
1929 argL64 = newTemp(Ity_I64);
1930 argR64 = newTemp(Ity_I64);
1931 oldC64 = newTemp(Ity_I64);
1932 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1933 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1934 assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC)));
1936 UInt cc_op = ARM64G_CC_OP_NUMBER;
1937 /**/ if ( isSBC && is64) { cc_op = ARM64G_CC_OP_SBC64; }
1938 else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; }
1939 else if (!isSBC && is64) { cc_op = ARM64G_CC_OP_ADC64; }
1940 else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; }
1941 else { vassert(0); }
1942 setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64);
1945 /* Build IR to set the flags thunk after ADD or SUB, if the given
1946 condition evaluates to True at run time. If not, the flags are set
1947 to the specified NZCV value. */
1948 static
1949 void setFlags_ADD_SUB_conditionally (
1950 Bool is64, Bool isSUB,
1951 IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
1954 /* Generate IR as follows:
1955 CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
1956 CC_DEP1 = ITE(cond, argL64, nzcv << 28)
1957 CC_DEP2 = ITE(cond, argR64, 0)
1958 CC_NDEP = 0
1961 IRTemp z64 = newTemp(Ity_I64);
1962 assign(z64, mkU64(0));
1964 /* Establish the operation and operands for the True case. */
1965 IRTemp t_dep1 = IRTemp_INVALID;
1966 IRTemp t_dep2 = IRTemp_INVALID;
1967 UInt t_op = ARM64G_CC_OP_NUMBER;
1968 /**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; }
1969 else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
1970 else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; }
1971 else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
1972 else { vassert(0); }
1973 /* */
1974 if (is64) {
1975 t_dep1 = argL;
1976 t_dep2 = argR;
1977 } else {
1978 t_dep1 = newTemp(Ity_I64);
1979 t_dep2 = newTemp(Ity_I64);
1980 assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
1981 assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
1984 /* Establish the operation and operands for the False case. */
1985 IRTemp f_dep1 = newTemp(Ity_I64);
1986 IRTemp f_dep2 = z64;
1987 UInt f_op = ARM64G_CC_OP_COPY;
1988 assign(f_dep1, mkU64(nzcv << 28));
1990 /* Final thunk values */
1991 IRTemp dep1 = newTemp(Ity_I64);
1992 IRTemp dep2 = newTemp(Ity_I64);
1993 IRTemp op = newTemp(Ity_I64);
1995 assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
1996 assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
1997 assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
1999 /* finally .. */
2000 stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) ));
2001 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
2002 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
2003 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
2006 /* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
2007 static
2008 void setFlags_LOGIC ( Bool is64, IRTemp res )
2010 IRTemp res64 = IRTemp_INVALID;
2011 IRTemp z64 = newTemp(Ity_I64);
2012 UInt cc_op = ARM64G_CC_OP_NUMBER;
2013 if (is64) {
2014 res64 = res;
2015 cc_op = ARM64G_CC_OP_LOGIC64;
2016 } else {
2017 res64 = newTemp(Ity_I64);
2018 assign(res64, unop(Iop_32Uto64, mkexpr(res)));
2019 cc_op = ARM64G_CC_OP_LOGIC32;
2021 assign(z64, mkU64(0));
2022 setFlags_D1_D2_ND(cc_op, res64, z64, z64);
2025 /* Build IR to set the flags thunk to a given NZCV value. NZCV is
2026 located in bits 31:28 of the supplied value. */
2027 static
2028 void setFlags_COPY ( IRTemp nzcv_28x0 )
2030 IRTemp z64 = newTemp(Ity_I64);
2031 assign(z64, mkU64(0));
2032 setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
2036 //ZZ /* Minor variant of the above that sets NDEP to zero (if it
2037 //ZZ sets it at all) */
2038 //ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
2039 //ZZ IRTemp t_dep2,
2040 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2041 //ZZ {
2042 //ZZ IRTemp z32 = newTemp(Ity_I32);
2043 //ZZ assign( z32, mkU32(0) );
2044 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
2045 //ZZ }
2046 //ZZ
2047 //ZZ
2048 //ZZ /* Minor variant of the above that sets DEP2 to zero (if it
2049 //ZZ sets it at all) */
2050 //ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
2051 //ZZ IRTemp t_ndep,
2052 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2053 //ZZ {
2054 //ZZ IRTemp z32 = newTemp(Ity_I32);
2055 //ZZ assign( z32, mkU32(0) );
2056 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
2057 //ZZ }
2058 //ZZ
2059 //ZZ
2060 //ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
2061 //ZZ sets them at all) */
2062 //ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
2063 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2064 //ZZ {
2065 //ZZ IRTemp z32 = newTemp(Ity_I32);
2066 //ZZ assign( z32, mkU32(0) );
2067 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
2068 //ZZ }
2071 /*------------------------------------------------------------*/
2072 /*--- Misc math helpers ---*/
2073 /*------------------------------------------------------------*/
2075 /* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
2076 static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
2078 IRTemp maskT = newTemp(Ity_I64);
2079 IRTemp res = newTemp(Ity_I64);
2080 vassert(sh >= 1 && sh <= 63);
2081 assign(maskT, mkU64(mask));
2082 assign( res,
2083 binop(Iop_Or64,
2084 binop(Iop_Shr64,
2085 binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
2086 mkU8(sh)),
2087 binop(Iop_And64,
2088 binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
2089 mkexpr(maskT))
2092 return res;
2095 /* Generates byte swaps within 32-bit lanes. */
2096 static IRTemp math_UINTSWAP64 ( IRTemp src )
2098 IRTemp res;
2099 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2100 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2101 return res;
2104 /* Generates byte swaps within 16-bit lanes. */
2105 static IRTemp math_USHORTSWAP64 ( IRTemp src )
2107 IRTemp res;
2108 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2109 return res;
2112 /* Generates a 64-bit byte swap. */
2113 static IRTemp math_BYTESWAP64 ( IRTemp src )
2115 IRTemp res;
2116 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2117 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2118 res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
2119 return res;
2122 /* Generates a 64-bit bit swap. */
2123 static IRTemp math_BITSWAP64 ( IRTemp src )
2125 IRTemp res;
2126 res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
2127 res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
2128 res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
2129 return math_BYTESWAP64(res);
2132 /* Duplicates the bits at the bottom of the given word to fill the
2133 whole word. src :: Ity_I64 is assumed to have zeroes everywhere
2134 except for the bottom bits. */
2135 static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
2137 if (srcTy == Ity_I8) {
2138 IRTemp t16 = newTemp(Ity_I64);
2139 assign(t16, binop(Iop_Or64, mkexpr(src),
2140 binop(Iop_Shl64, mkexpr(src), mkU8(8))));
2141 IRTemp t32 = newTemp(Ity_I64);
2142 assign(t32, binop(Iop_Or64, mkexpr(t16),
2143 binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
2144 IRTemp t64 = newTemp(Ity_I64);
2145 assign(t64, binop(Iop_Or64, mkexpr(t32),
2146 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2147 return t64;
2149 if (srcTy == Ity_I16) {
2150 IRTemp t32 = newTemp(Ity_I64);
2151 assign(t32, binop(Iop_Or64, mkexpr(src),
2152 binop(Iop_Shl64, mkexpr(src), mkU8(16))));
2153 IRTemp t64 = newTemp(Ity_I64);
2154 assign(t64, binop(Iop_Or64, mkexpr(t32),
2155 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2156 return t64;
2158 if (srcTy == Ity_I32) {
2159 IRTemp t64 = newTemp(Ity_I64);
2160 assign(t64, binop(Iop_Or64, mkexpr(src),
2161 binop(Iop_Shl64, mkexpr(src), mkU8(32))));
2162 return t64;
2164 if (srcTy == Ity_I64) {
2165 return src;
2167 vassert(0);
2171 /* Duplicates the src element exactly so as to fill a V128 value. */
2172 static IRTemp math_DUP_TO_V128 ( IRTemp src, IRType srcTy )
2174 IRTemp res = newTempV128();
2175 if (srcTy == Ity_F64) {
2176 IRTemp i64 = newTemp(Ity_I64);
2177 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(src)));
2178 assign(res, binop(Iop_64HLtoV128, mkexpr(i64), mkexpr(i64)));
2179 return res;
2181 if (srcTy == Ity_F32) {
2182 IRTemp i64a = newTemp(Ity_I64);
2183 assign(i64a, unop(Iop_32Uto64, unop(Iop_ReinterpF32asI32, mkexpr(src))));
2184 IRTemp i64b = newTemp(Ity_I64);
2185 assign(i64b, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(i64a), mkU8(32)),
2186 mkexpr(i64a)));
2187 assign(res, binop(Iop_64HLtoV128, mkexpr(i64b), mkexpr(i64b)));
2188 return res;
2190 if (srcTy == Ity_I64) {
2191 assign(res, binop(Iop_64HLtoV128, mkexpr(src), mkexpr(src)));
2192 return res;
2194 if (srcTy == Ity_I32 || srcTy == Ity_I16 || srcTy == Ity_I8) {
2195 IRTemp t1 = newTemp(Ity_I64);
2196 assign(t1, widenUto64(srcTy, mkexpr(src)));
2197 IRTemp t2 = math_DUP_TO_64(t1, srcTy);
2198 assign(res, binop(Iop_64HLtoV128, mkexpr(t2), mkexpr(t2)));
2199 return res;
2201 vassert(0);
2205 /* |fullWidth| is a full V128 width result. Depending on bitQ,
2206 zero out the upper half. */
2207 static IRExpr* math_MAYBE_ZERO_HI64 ( UInt bitQ, IRTemp fullWidth )
2209 if (bitQ == 1) return mkexpr(fullWidth);
2210 if (bitQ == 0) return unop(Iop_ZeroHI64ofV128, mkexpr(fullWidth));
2211 vassert(0);
2214 /* The same, but from an expression instead. */
2215 static IRExpr* math_MAYBE_ZERO_HI64_fromE ( UInt bitQ, IRExpr* fullWidth )
2217 IRTemp fullWidthT = newTempV128();
2218 assign(fullWidthT, fullWidth);
2219 return math_MAYBE_ZERO_HI64(bitQ, fullWidthT);
2223 /*------------------------------------------------------------*/
2224 /*--- FP comparison helpers ---*/
2225 /*------------------------------------------------------------*/
2227 /* irRes :: Ity_I32 holds a floating point comparison result encoded
2228 as an IRCmpF64Result. Generate code to convert it to an
2229 ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
2230 Assign a new temp to hold that value, and return the temp. */
2231 static
2232 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
2234 IRTemp ix = newTemp(Ity_I64);
2235 IRTemp termL = newTemp(Ity_I64);
2236 IRTemp termR = newTemp(Ity_I64);
2237 IRTemp nzcv = newTemp(Ity_I64);
2238 IRTemp irRes = newTemp(Ity_I64);
2240 /* This is where the fun starts. We have to convert 'irRes' from
2241 an IR-convention return result (IRCmpF64Result) to an
2242 ARM-encoded (N,Z,C,V) group. The final result is in the bottom
2243 4 bits of 'nzcv'. */
2244 /* Map compare result from IR to ARM(nzcv) */
2246 FP cmp result | IR | ARM(nzcv)
2247 --------------------------------
2248 UN 0x45 0011
2249 LT 0x01 1000
2250 GT 0x00 0010
2251 EQ 0x40 0110
2253 /* Now since you're probably wondering WTF ..
2255 ix fishes the useful bits out of the IR value, bits 6 and 0, and
2256 places them side by side, giving a number which is 0, 1, 2 or 3.
2258 termL is a sequence cooked up by GNU superopt. It converts ix
2259 into an almost correct value NZCV value (incredibly), except
2260 for the case of UN, where it produces 0100 instead of the
2261 required 0011.
2263 termR is therefore a correction term, also computed from ix. It
2264 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get
2265 the final correct value, we subtract termR from termL.
2267 Don't take my word for it. There's a test program at the bottom
2268 of guest_arm_toIR.c, to try this out with.
2270 assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
2272 assign(
2274 binop(Iop_Or64,
2275 binop(Iop_And64,
2276 binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
2277 mkU64(3)),
2278 binop(Iop_And64, mkexpr(irRes), mkU64(1))));
2280 assign(
2281 termL,
2282 binop(Iop_Add64,
2283 binop(Iop_Shr64,
2284 binop(Iop_Sub64,
2285 binop(Iop_Shl64,
2286 binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
2287 mkU8(62)),
2288 mkU64(1)),
2289 mkU8(61)),
2290 mkU64(1)));
2292 assign(
2293 termR,
2294 binop(Iop_And64,
2295 binop(Iop_And64,
2296 mkexpr(ix),
2297 binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
2298 mkU64(1)));
2300 assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
2301 return nzcv;
2305 /*------------------------------------------------------------*/
2306 /*--- Data processing (immediate) ---*/
2307 /*------------------------------------------------------------*/
2309 /* Helper functions for supporting "DecodeBitMasks" */
2311 static ULong dbm_ROR ( Int width, ULong x, Int rot )
2313 vassert(width > 0 && width <= 64);
2314 vassert(rot >= 0 && rot < width);
2315 if (rot == 0) return x;
2316 ULong res = x >> rot;
2317 res |= (x << (width - rot));
2318 if (width < 64)
2319 res &= ((1ULL << width) - 1);
2320 return res;
2323 static ULong dbm_RepTo64( Int esize, ULong x )
2325 switch (esize) {
2326 case 64:
2327 return x;
2328 case 32:
2329 x &= 0xFFFFFFFF; x |= (x << 32);
2330 return x;
2331 case 16:
2332 x &= 0xFFFF; x |= (x << 16); x |= (x << 32);
2333 return x;
2334 case 8:
2335 x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32);
2336 return x;
2337 case 4:
2338 x &= 0xF; x |= (x << 4); x |= (x << 8);
2339 x |= (x << 16); x |= (x << 32);
2340 return x;
2341 case 2:
2342 x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8);
2343 x |= (x << 16); x |= (x << 32);
2344 return x;
2345 default:
2346 break;
2348 vpanic("dbm_RepTo64");
2349 /*NOTREACHED*/
2350 return 0;
2353 static Int dbm_highestSetBit ( ULong x )
2355 Int i;
2356 for (i = 63; i >= 0; i--) {
2357 if (x & (1ULL << i))
2358 return i;
2360 vassert(x == 0);
2361 return -1;
2364 static
2365 Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask,
2366 ULong immN, ULong imms, ULong immr, Bool immediate,
2367 UInt M /*32 or 64*/)
2369 vassert(immN < (1ULL << 1));
2370 vassert(imms < (1ULL << 6));
2371 vassert(immr < (1ULL << 6));
2372 vassert(immediate == False || immediate == True);
2373 vassert(M == 32 || M == 64);
2375 Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) );
2376 if (len < 1) { /* printf("fail1\n"); */ return False; }
2377 vassert(len <= 6);
2378 vassert(M >= (1 << len));
2380 vassert(len >= 1 && len <= 6);
2381 ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len);
2382 (1 << len) - 1;
2383 vassert(levels >= 1 && levels <= 63);
2385 if (immediate && ((imms & levels) == levels)) {
2386 /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
2387 return False;
2390 ULong S = imms & levels;
2391 ULong R = immr & levels;
2392 Int diff = S - R;
2393 diff &= 63;
2394 Int esize = 1 << len;
2395 vassert(2 <= esize && esize <= 64);
2397 /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
2398 same below with d. S can be 63 in which case we have an out of
2399 range and hence undefined shift. */
2400 vassert(S >= 0 && S <= 63);
2401 vassert(esize >= (S+1));
2402 ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
2403 //(1ULL << (S+1)) - 1;
2404 ((1ULL << S) - 1) + (1ULL << S);
2406 Int d = // diff<len-1:0>
2407 diff & ((1 << len)-1);
2408 vassert(esize >= (d+1));
2409 vassert(d >= 0 && d <= 63);
2411 ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
2412 //(1ULL << (d+1)) - 1;
2413 ((1ULL << d) - 1) + (1ULL << d);
2415 if (esize != 64) vassert(elem_s < (1ULL << esize));
2416 if (esize != 64) vassert(elem_d < (1ULL << esize));
2418 if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
2419 if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
2421 return True;
2425 static
2426 Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres,
2427 UInt insn, Bool sigill_diag)
2429 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2431 /* insn[28:23]
2432 10000x PC-rel addressing
2433 10001x Add/subtract (immediate)
2434 100100 Logical (immediate)
2435 100101 Move Wide (immediate)
2436 100110 Bitfield
2437 100111 Extract
2440 /* ------------------ ADD/SUB{,S} imm12 ------------------ */
2441 if (INSN(28,24) == BITS5(1,0,0,0,1)) {
2442 Bool is64 = INSN(31,31) == 1;
2443 Bool isSub = INSN(30,30) == 1;
2444 Bool setCC = INSN(29,29) == 1;
2445 UInt sh = INSN(23,22);
2446 UInt uimm12 = INSN(21,10);
2447 UInt nn = INSN(9,5);
2448 UInt dd = INSN(4,0);
2449 const HChar* nm = isSub ? "sub" : "add";
2450 if (sh >= 2) {
2451 /* Invalid; fall through */
2452 } else {
2453 vassert(sh <= 1);
2454 uimm12 <<= (12 * sh);
2455 if (is64) {
2456 IRTemp argL = newTemp(Ity_I64);
2457 IRTemp argR = newTemp(Ity_I64);
2458 IRTemp res = newTemp(Ity_I64);
2459 assign(argL, getIReg64orSP(nn));
2460 assign(argR, mkU64(uimm12));
2461 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
2462 mkexpr(argL), mkexpr(argR)));
2463 if (setCC) {
2464 putIReg64orZR(dd, mkexpr(res));
2465 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
2466 DIP("%ss %s, %s, 0x%x\n",
2467 nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
2468 } else {
2469 putIReg64orSP(dd, mkexpr(res));
2470 DIP("%s %s, %s, 0x%x\n",
2471 nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
2473 } else {
2474 IRTemp argL = newTemp(Ity_I32);
2475 IRTemp argR = newTemp(Ity_I32);
2476 IRTemp res = newTemp(Ity_I32);
2477 assign(argL, getIReg32orSP(nn));
2478 assign(argR, mkU32(uimm12));
2479 assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32,
2480 mkexpr(argL), mkexpr(argR)));
2481 if (setCC) {
2482 putIReg32orZR(dd, mkexpr(res));
2483 setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR);
2484 DIP("%ss %s, %s, 0x%x\n",
2485 nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
2486 } else {
2487 putIReg32orSP(dd, mkexpr(res));
2488 DIP("%s %s, %s, 0x%x\n",
2489 nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
2492 return True;
2496 /* -------------------- ADR/ADRP -------------------- */
2497 if (INSN(28,24) == BITS5(1,0,0,0,0)) {
2498 UInt bP = INSN(31,31);
2499 UInt immLo = INSN(30,29);
2500 UInt immHi = INSN(23,5);
2501 UInt rD = INSN(4,0);
2502 ULong uimm = (immHi << 2) | immLo;
2503 ULong simm = sx_to_64(uimm, 21);
2504 ULong val;
2505 if (bP) {
2506 val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
2507 } else {
2508 val = guest_PC_curr_instr + simm;
2510 putIReg64orZR(rD, mkU64(val));
2511 DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
2512 return True;
2515 /* -------------------- LOGIC(imm) -------------------- */
2516 if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
2517 /* 31 30 28 22 21 15 9 4
2518 sf op 100100 N immr imms Rn Rd
2519 op=00: AND Rd|SP, Rn, #imm
2520 op=01: ORR Rd|SP, Rn, #imm
2521 op=10: EOR Rd|SP, Rn, #imm
2522 op=11: ANDS Rd|ZR, Rn, #imm
2524 Bool is64 = INSN(31,31) == 1;
2525 UInt op = INSN(30,29);
2526 UInt N = INSN(22,22);
2527 UInt immR = INSN(21,16);
2528 UInt immS = INSN(15,10);
2529 UInt nn = INSN(9,5);
2530 UInt dd = INSN(4,0);
2531 ULong imm = 0;
2532 Bool ok;
2533 if (N == 1 && !is64)
2534 goto after_logic_imm; /* not allowed; fall through */
2535 ok = dbm_DecodeBitMasks(&imm, NULL,
2536 N, immS, immR, True, is64 ? 64 : 32);
2537 if (!ok)
2538 goto after_logic_imm;
2540 const HChar* names[4] = { "and", "orr", "eor", "ands" };
2541 const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
2542 const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
2544 vassert(op < 4);
2545 if (is64) {
2546 IRExpr* argL = getIReg64orZR(nn);
2547 IRExpr* argR = mkU64(imm);
2548 IRTemp res = newTemp(Ity_I64);
2549 assign(res, binop(ops64[op], argL, argR));
2550 if (op < 3) {
2551 putIReg64orSP(dd, mkexpr(res));
2552 DIP("%s %s, %s, 0x%llx\n", names[op],
2553 nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
2554 } else {
2555 putIReg64orZR(dd, mkexpr(res));
2556 setFlags_LOGIC(True/*is64*/, res);
2557 DIP("%s %s, %s, 0x%llx\n", names[op],
2558 nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
2560 } else {
2561 IRExpr* argL = getIReg32orZR(nn);
2562 IRExpr* argR = mkU32((UInt)imm);
2563 IRTemp res = newTemp(Ity_I32);
2564 assign(res, binop(ops32[op], argL, argR));
2565 if (op < 3) {
2566 putIReg32orSP(dd, mkexpr(res));
2567 DIP("%s %s, %s, 0x%x\n", names[op],
2568 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
2569 } else {
2570 putIReg32orZR(dd, mkexpr(res));
2571 setFlags_LOGIC(False/*!is64*/, res);
2572 DIP("%s %s, %s, 0x%x\n", names[op],
2573 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
2576 return True;
2578 after_logic_imm:
2580 /* -------------------- MOV{Z,N,K} -------------------- */
2581 if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
2582 /* 31 30 28 22 20 4
2583 | | | | | |
2584 sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw))
2585 sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw))
2586 sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw))
2588 Bool is64 = INSN(31,31) == 1;
2589 UInt subopc = INSN(30,29);
2590 UInt hw = INSN(22,21);
2591 UInt imm16 = INSN(20,5);
2592 UInt dd = INSN(4,0);
2593 if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) {
2594 /* invalid; fall through */
2595 } else {
2596 ULong imm64 = ((ULong)imm16) << (16 * hw);
2597 if (!is64)
2598 vassert(imm64 < 0x100000000ULL);
2599 switch (subopc) {
2600 case BITS2(1,0): // MOVZ
2601 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2602 DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2603 break;
2604 case BITS2(0,0): // MOVN
2605 imm64 = ~imm64;
2606 if (!is64)
2607 imm64 &= 0xFFFFFFFFULL;
2608 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2609 DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2610 break;
2611 case BITS2(1,1): // MOVK
2612 /* This is more complex. We are inserting a slice into
2613 the destination register, so we need to have the old
2614 value of it. */
2615 if (is64) {
2616 IRTemp old = newTemp(Ity_I64);
2617 assign(old, getIReg64orZR(dd));
2618 ULong mask = 0xFFFFULL << (16 * hw);
2619 IRExpr* res
2620 = binop(Iop_Or64,
2621 binop(Iop_And64, mkexpr(old), mkU64(~mask)),
2622 mkU64(imm64));
2623 putIReg64orZR(dd, res);
2624 DIP("movk %s, 0x%x, lsl %u\n",
2625 nameIReg64orZR(dd), imm16, 16*hw);
2626 } else {
2627 IRTemp old = newTemp(Ity_I32);
2628 assign(old, getIReg32orZR(dd));
2629 vassert(hw <= 1);
2630 UInt mask = ((UInt)0xFFFF) << (16 * hw);
2631 IRExpr* res
2632 = binop(Iop_Or32,
2633 binop(Iop_And32, mkexpr(old), mkU32(~mask)),
2634 mkU32((UInt)imm64));
2635 putIReg32orZR(dd, res);
2636 DIP("movk %s, 0x%x, lsl %u\n",
2637 nameIReg32orZR(dd), imm16, 16*hw);
2639 break;
2640 default:
2641 vassert(0);
2643 return True;
2647 /* -------------------- {U,S,}BFM -------------------- */
2648 /* 30 28 22 21 15 9 4
2650 sf 10 100110 N immr imms nn dd
2651 UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2652 UBFM Xd, Xn, #immr, #imms when sf=1, N=1
2654 sf 00 100110 N immr imms nn dd
2655 SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2656 SBFM Xd, Xn, #immr, #imms when sf=1, N=1
2658 sf 01 100110 N immr imms nn dd
2659 BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2660 BFM Xd, Xn, #immr, #imms when sf=1, N=1
2662 if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
2663 UInt sf = INSN(31,31);
2664 UInt opc = INSN(30,29);
2665 UInt N = INSN(22,22);
2666 UInt immR = INSN(21,16);
2667 UInt immS = INSN(15,10);
2668 UInt nn = INSN(9,5);
2669 UInt dd = INSN(4,0);
2670 Bool inZero = False;
2671 Bool extend = False;
2672 const HChar* nm = "???";
2673 /* skip invalid combinations */
2674 switch (opc) {
2675 case BITS2(0,0):
2676 inZero = True; extend = True; nm = "sbfm"; break;
2677 case BITS2(0,1):
2678 inZero = False; extend = False; nm = "bfm"; break;
2679 case BITS2(1,0):
2680 inZero = True; extend = False; nm = "ubfm"; break;
2681 case BITS2(1,1):
2682 goto after_bfm; /* invalid */
2683 default:
2684 vassert(0);
2686 if (sf == 1 && N != 1) goto after_bfm;
2687 if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0
2688 || ((immS >> 5) & 1) != 0)) goto after_bfm;
2689 ULong wmask = 0, tmask = 0;
2690 Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
2691 N, immS, immR, False, sf == 1 ? 64 : 32);
2692 if (!ok) goto after_bfm; /* hmmm */
2694 Bool is64 = sf == 1;
2695 IRType ty = is64 ? Ity_I64 : Ity_I32;
2697 // Handle plain shifts explicitly. These are functionally identical to
2698 // the general case below, but iropt isn't clever enough to reduce those
2699 // sequences to plain shifts. So give it a hand.
2700 if (is64 && immS == 63 && immR >= 1 && immR <= 63) {
2701 if (opc == BITS2(0,0)) {
2702 // 64-bit signed shift right
2703 putIReg64orZR(dd, binop(Iop_Sar64, getIReg64orZR(nn), mkU8(immR)));
2704 DIP("asr %s, %s, #%u\n",
2705 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR);
2706 return True;
2708 if (opc == BITS2(1,0)) {
2709 // 64-bit unsigned shift right
2710 putIReg64orZR(dd, binop(Iop_Shr64, getIReg64orZR(nn), mkU8(immR)));
2711 DIP("lsr %s, %s, #%u\n",
2712 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR);
2713 return True;
2717 if (!is64 && immS == 31 && immR >= 1 && immR <= 31) {
2718 if (opc == BITS2(0,0)) {
2719 // 32-bit signed shift right
2720 putIReg32orZR(dd, binop(Iop_Sar32, getIReg32orZR(nn), mkU8(immR)));
2721 DIP("asr %s, %s, #%u\n",
2722 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR);
2723 return True;
2725 if (opc == BITS2(1,0)) {
2726 // 32-bit unsigned shift right
2727 putIReg32orZR(dd, binop(Iop_Shr32, getIReg32orZR(nn), mkU8(immR)));
2728 DIP("lsr %s, %s, #%u\n",
2729 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR);
2730 return True;
2734 if (is64 && immS >= 0 && immS <= 62
2735 && immR == immS + 1 && opc == BITS2(1,0)) {
2736 // 64-bit shift left
2737 UInt shift = 64 - immR;
2738 vassert(shift >= 1 && shift <= 63);
2739 putIReg64orZR(dd, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(shift)));
2740 DIP("lsl %s, %s, #%u\n",
2741 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), shift);
2742 return True;
2744 if (!is64 && immS >= 0 && immS <= 30
2745 && immR == immS + 1 && opc == BITS2(1,0)) {
2746 // 32-bit shift left
2747 UInt shift = 32 - immR;
2748 vassert(shift >= 1 && shift <= 31);
2749 putIReg32orZR(dd, binop(Iop_Shl32, getIReg32orZR(nn), mkU8(shift)));
2750 DIP("lsl %s, %s, #%u\n",
2751 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), shift);
2752 return True;
2755 // Also special-case sxtw.
2756 if (opc == BITS2(0,0) && immR == 0) {
2757 if (is64) {
2758 // The destination size is 64 bits.
2759 if (immS == 31) {
2760 putIReg64orZR(dd, unop(Iop_32Sto64, getIReg32orZR(nn)));
2761 DIP("sxtw %s, %s\n", nameIReg64orZR(dd), nameIReg32orZR(nn));
2762 return True;
2764 if (immS == 15) {
2765 putIReg64orZR(dd, unop(Iop_16Sto64,
2766 unop(Iop_64to16, getIReg64orZR(nn))));
2767 DIP("sxth %s, %s\n", nameIReg64orZR(dd), nameIReg32orZR(nn));
2768 return True;
2770 if (immS == 7) {
2771 putIReg64orZR(dd, unop(Iop_8Sto64,
2772 unop(Iop_64to8, getIReg64orZR(nn))));
2773 DIP("sxtb %s, %s\n", nameIReg64orZR(dd), nameIReg32orZR(nn));
2774 return True;
2776 } else {
2777 // The destination size is 32 bits.
2778 if (immS == 15) {
2779 putIReg32orZR(dd, unop(Iop_16Sto32,
2780 unop(Iop_64to16, getIReg64orZR(nn))));
2781 DIP("sxth %s, %s\n", nameIReg32orZR(dd), nameIReg32orZR(nn));
2782 return True;
2784 if (immS == 7) {
2785 putIReg32orZR(dd, unop(Iop_8Sto32,
2786 unop(Iop_64to8, getIReg64orZR(nn))));
2787 DIP("sxtb %s, %s\n", nameIReg32orZR(dd), nameIReg32orZR(nn));
2788 return True;
2793 // None of the special cases apply. We have to use the (slow) general
2794 // case.
2795 IRTemp dst = newTemp(ty);
2796 IRTemp src = newTemp(ty);
2797 IRTemp bot = newTemp(ty);
2798 IRTemp top = newTemp(ty);
2799 IRTemp res = newTemp(ty);
2800 assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
2801 assign(src, getIRegOrZR(is64, nn));
2802 /* perform bitfield move on low bits */
2803 assign(bot, binop(mkOR(ty),
2804 binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
2805 binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
2806 mkU(ty, wmask))));
2807 /* determine extension bits (sign, zero or dest register) */
2808 assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
2809 /* combine extension bits and result bits */
2810 assign(res, binop(mkOR(ty),
2811 binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
2812 binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
2813 putIRegOrZR(is64, dd, mkexpr(res));
2814 DIP("%s %s, %s, immR=%u, immS=%u\n",
2815 nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
2816 return True;
2818 after_bfm:
2820 /* ---------------------- EXTR ---------------------- */
2821 /* 30 28 22 20 15 9 4
2822 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6
2823 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
2825 if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
2826 Bool is64 = INSN(31,31) == 1;
2827 UInt mm = INSN(20,16);
2828 UInt imm6 = INSN(15,10);
2829 UInt nn = INSN(9,5);
2830 UInt dd = INSN(4,0);
2831 Bool valid = True;
2832 if (INSN(31,31) != INSN(22,22))
2833 valid = False;
2834 if (!is64 && imm6 >= 32)
2835 valid = False;
2836 if (!valid) goto after_extr;
2837 IRType ty = is64 ? Ity_I64 : Ity_I32;
2838 IRTemp srcHi = newTemp(ty);
2839 IRTemp srcLo = newTemp(ty);
2840 IRTemp res = newTemp(ty);
2841 assign(srcHi, getIRegOrZR(is64, nn));
2842 assign(srcLo, getIRegOrZR(is64, mm));
2843 if (imm6 == 0) {
2844 assign(res, mkexpr(srcLo));
2845 } else {
2846 UInt szBits = 8 * sizeofIRType(ty);
2847 vassert(imm6 > 0 && imm6 < szBits);
2848 assign(res, binop(mkOR(ty),
2849 binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
2850 binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
2852 putIRegOrZR(is64, dd, mkexpr(res));
2853 DIP("extr %s, %s, %s, #%u\n",
2854 nameIRegOrZR(is64,dd),
2855 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
2856 return True;
2858 after_extr:
2860 if (sigill_diag) {
2861 vex_printf("ARM64 front end: data_processing_immediate\n");
2863 return False;
2864 # undef INSN
2868 /*------------------------------------------------------------*/
2869 /*--- Data processing (register) instructions ---*/
2870 /*------------------------------------------------------------*/
2872 static const HChar* nameSH ( UInt sh ) {
2873 switch (sh) {
2874 case 0: return "lsl";
2875 case 1: return "lsr";
2876 case 2: return "asr";
2877 case 3: return "ror";
2878 default: vassert(0);
2882 /* Generate IR to get a register value, possibly shifted by an
2883 immediate. Returns either a 32- or 64-bit temporary holding the
2884 result. After the shift, the value can optionally be NOT-ed
2885 too.
2887 sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be
2888 in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR
2889 isn't allowed, but it's the job of the caller to check that.
2891 static IRTemp getShiftedIRegOrZR ( Bool is64,
2892 UInt sh_how, UInt sh_amt, UInt regNo,
2893 Bool invert )
2895 vassert(sh_how < 4);
2896 vassert(sh_amt < (is64 ? 64 : 32));
2897 IRType ty = is64 ? Ity_I64 : Ity_I32;
2898 IRTemp t0 = newTemp(ty);
2899 assign(t0, getIRegOrZR(is64, regNo));
2900 IRTemp t1 = newTemp(ty);
2901 switch (sh_how) {
2902 case BITS2(0,0):
2903 assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
2904 break;
2905 case BITS2(0,1):
2906 assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
2907 break;
2908 case BITS2(1,0):
2909 assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
2910 break;
2911 case BITS2(1,1):
2912 assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
2913 break;
2914 default:
2915 vassert(0);
2917 if (invert) {
2918 IRTemp t2 = newTemp(ty);
2919 assign(t2, unop(mkNOT(ty), mkexpr(t1)));
2920 return t2;
2921 } else {
2922 return t1;
2927 static
2928 Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres,
2929 UInt insn, Bool sigill_diag)
2931 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2933 /* ------------------- ADD/SUB(reg) ------------------- */
2934 /* x==0 => 32 bit op x==1 => 64 bit op
2935 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
2937 31 30 29 28 23 21 20 15 9 4
2938 | | | | | | | | | |
2939 x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6)
2940 x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6)
2941 x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6)
2942 x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6)
2944 if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
2945 UInt bX = INSN(31,31);
2946 UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */
2947 UInt bS = INSN(29, 29); /* set flags? */
2948 UInt sh = INSN(23,22);
2949 UInt rM = INSN(20,16);
2950 UInt imm6 = INSN(15,10);
2951 UInt rN = INSN(9,5);
2952 UInt rD = INSN(4,0);
2953 Bool isSUB = bOP == 1;
2954 Bool is64 = bX == 1;
2955 IRType ty = is64 ? Ity_I64 : Ity_I32;
2956 if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) {
2957 /* invalid; fall through */
2958 } else {
2959 IRTemp argL = newTemp(ty);
2960 assign(argL, getIRegOrZR(is64, rN));
2961 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
2962 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2963 IRTemp res = newTemp(ty);
2964 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2965 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2966 if (bS) {
2967 setFlags_ADD_SUB(is64, isSUB, argL, argR);
2969 DIP("%s%s %s, %s, %s, %s #%u\n",
2970 bOP ? "sub" : "add", bS ? "s" : "",
2971 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2972 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2973 return True;
2977 /* ------------------- ADC/SBC(reg) ------------------- */
2978 /* x==0 => 32 bit op x==1 => 64 bit op
2980 31 30 29 28 23 21 20 15 9 4
2981 | | | | | | | | | |
2982 x 0 0 11010 00 0 Rm 000000 Rn Rd ADC Rd,Rn,Rm
2983 x 0 1 11010 00 0 Rm 000000 Rn Rd ADCS Rd,Rn,Rm
2984 x 1 0 11010 00 0 Rm 000000 Rn Rd SBC Rd,Rn,Rm
2985 x 1 1 11010 00 0 Rm 000000 Rn Rd SBCS Rd,Rn,Rm
2988 if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) {
2989 UInt bX = INSN(31,31);
2990 UInt bOP = INSN(30,30); /* 0: ADC, 1: SBC */
2991 UInt bS = INSN(29,29); /* set flags */
2992 UInt rM = INSN(20,16);
2993 UInt rN = INSN(9,5);
2994 UInt rD = INSN(4,0);
2996 Bool isSUB = bOP == 1;
2997 Bool is64 = bX == 1;
2998 IRType ty = is64 ? Ity_I64 : Ity_I32;
3000 IRTemp oldC = newTemp(ty);
3001 assign(oldC,
3002 is64 ? mk_arm64g_calculate_flag_c()
3003 : unop(Iop_64to32, mk_arm64g_calculate_flag_c()) );
3005 IRTemp argL = newTemp(ty);
3006 assign(argL, getIRegOrZR(is64, rN));
3007 IRTemp argR = newTemp(ty);
3008 assign(argR, getIRegOrZR(is64, rM));
3010 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
3011 IRTemp res = newTemp(ty);
3012 if (isSUB) {
3013 IRExpr* one = is64 ? mkU64(1) : mkU32(1);
3014 IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32;
3015 assign(res,
3016 binop(op,
3017 binop(op, mkexpr(argL), mkexpr(argR)),
3018 binop(xorOp, mkexpr(oldC), one)));
3019 } else {
3020 assign(res,
3021 binop(op,
3022 binop(op, mkexpr(argL), mkexpr(argR)),
3023 mkexpr(oldC)));
3026 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
3028 if (bS) {
3029 setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC);
3032 DIP("%s%s %s, %s, %s\n",
3033 bOP ? "sbc" : "adc", bS ? "s" : "",
3034 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
3035 nameIRegOrZR(is64, rM));
3036 return True;
3039 /* -------------------- LOGIC(reg) -------------------- */
3040 /* x==0 => 32 bit op x==1 => 64 bit op
3041 N==0 => inv? is no-op (no inversion)
3042 N==1 => inv? is NOT
3043 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
3045 31 30 28 23 21 20 15 9 4
3046 | | | | | | | | |
3047 x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6))
3048 x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6))
3049 x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6))
3050 x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6))
3051 With N=1, the names are: BIC ORN EON BICS
3053 if (INSN(28,24) == BITS5(0,1,0,1,0)) {
3054 UInt bX = INSN(31,31);
3055 UInt sh = INSN(23,22);
3056 UInt bN = INSN(21,21);
3057 UInt rM = INSN(20,16);
3058 UInt imm6 = INSN(15,10);
3059 UInt rN = INSN(9,5);
3060 UInt rD = INSN(4,0);
3061 Bool is64 = bX == 1;
3062 IRType ty = is64 ? Ity_I64 : Ity_I32;
3063 if (!is64 && imm6 > 31) {
3064 /* invalid; fall though */
3065 } else {
3066 IRTemp argL = newTemp(ty);
3067 assign(argL, getIRegOrZR(is64, rN));
3068 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
3069 IROp op = Iop_INVALID;
3070 switch (INSN(30,29)) {
3071 case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
3072 case BITS2(0,1): op = mkOR(ty); break;
3073 case BITS2(1,0): op = mkXOR(ty); break;
3074 default: vassert(0);
3076 IRTemp res = newTemp(ty);
3077 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
3078 if (INSN(30,29) == BITS2(1,1)) {
3079 setFlags_LOGIC(is64, res);
3081 putIRegOrZR(is64, rD, mkexpr(res));
3083 static const HChar* names_op[8]
3084 = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
3085 vassert(((bN << 2) | INSN(30,29)) < 8);
3086 const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)];
3087 /* Special-case the printing of "MOV" */
3088 if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) {
3089 DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
3090 nameIRegOrZR(is64, rM));
3091 } else {
3092 DIP("%s %s, %s, %s, %s #%u\n", nm_op,
3093 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
3094 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
3096 return True;
3100 /* -------------------- {U,S}MULH -------------------- */
3101 /* 31 23 22 20 15 9 4
3102 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm
3103 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm
3105 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
3106 && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
3107 Bool isU = INSN(23,23) == 1;
3108 UInt mm = INSN(20,16);
3109 UInt nn = INSN(9,5);
3110 UInt dd = INSN(4,0);
3111 putIReg64orZR(dd, unop(Iop_128HIto64,
3112 binop(isU ? Iop_MullU64 : Iop_MullS64,
3113 getIReg64orZR(nn), getIReg64orZR(mm))));
3114 DIP("%cmulh %s, %s, %s\n",
3115 isU ? 'u' : 's',
3116 nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
3117 return True;
3120 /* -------------------- M{ADD,SUB} -------------------- */
3121 /* 31 30 20 15 14 9 4
3122 sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n
3123 sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n
3125 if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
3126 Bool is64 = INSN(31,31) == 1;
3127 UInt mm = INSN(20,16);
3128 Bool isAdd = INSN(15,15) == 0;
3129 UInt aa = INSN(14,10);
3130 UInt nn = INSN(9,5);
3131 UInt dd = INSN(4,0);
3132 if (is64) {
3133 putIReg64orZR(
3135 binop(isAdd ? Iop_Add64 : Iop_Sub64,
3136 getIReg64orZR(aa),
3137 binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
3138 } else {
3139 putIReg32orZR(
3141 binop(isAdd ? Iop_Add32 : Iop_Sub32,
3142 getIReg32orZR(aa),
3143 binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
3145 DIP("%s %s, %s, %s, %s\n",
3146 isAdd ? "madd" : "msub",
3147 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3148 nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
3149 return True;
3152 /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
3153 /* 31 30 28 20 15 11 9 4
3154 sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm
3155 sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm
3156 sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm
3157 sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm
3158 In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
3160 if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
3161 Bool is64 = INSN(31,31) == 1;
3162 UInt b30 = INSN(30,30);
3163 UInt mm = INSN(20,16);
3164 UInt cond = INSN(15,12);
3165 UInt b10 = INSN(10,10);
3166 UInt nn = INSN(9,5);
3167 UInt dd = INSN(4,0);
3168 UInt op = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */
3169 IRType ty = is64 ? Ity_I64 : Ity_I32;
3170 IRExpr* argL = getIRegOrZR(is64, nn);
3171 IRExpr* argR = getIRegOrZR(is64, mm);
3172 switch (op) {
3173 case BITS2(0,0):
3174 break;
3175 case BITS2(0,1):
3176 argR = binop(mkADD(ty), argR, mkU(ty,1));
3177 break;
3178 case BITS2(1,0):
3179 argR = unop(mkNOT(ty), argR);
3180 break;
3181 case BITS2(1,1):
3182 argR = binop(mkSUB(ty), mkU(ty,0), argR);
3183 break;
3184 default:
3185 vassert(0);
3187 putIRegOrZR(
3188 is64, dd,
3189 IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
3190 argL, argR)
3192 const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
3193 DIP("%s %s, %s, %s, %s\n", op_nm[op],
3194 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3195 nameIRegOrZR(is64, mm), nameCC(cond));
3196 return True;
3199 /* -------------- ADD/SUB(extended reg) -------------- */
3200 /* 28 20 15 12 9 4
3201 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld
3202 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld
3204 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld
3205 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld
3207 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld
3208 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld
3210 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld
3211 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld
3213 The 'm' operand is extended per opt, thusly:
3215 000 Xm & 0xFF UXTB
3216 001 Xm & 0xFFFF UXTH
3217 010 Xm & (2^32)-1 UXTW
3218 011 Xm UXTX
3220 100 Xm sx from bit 7 SXTB
3221 101 Xm sx from bit 15 SXTH
3222 110 Xm sx from bit 31 SXTW
3223 111 Xm SXTX
3225 In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
3226 operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX
3227 are the identity operation on Wm.
3229 After extension, the value is shifted left by imm3 bits, which
3230 may only be in the range 0 .. 4 inclusive.
3232 if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
3233 Bool is64 = INSN(31,31) == 1;
3234 Bool isSub = INSN(30,30) == 1;
3235 Bool setCC = INSN(29,29) == 1;
3236 UInt mm = INSN(20,16);
3237 UInt opt = INSN(15,13);
3238 UInt imm3 = INSN(12,10);
3239 UInt nn = INSN(9,5);
3240 UInt dd = INSN(4,0);
3241 const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
3242 "sxtb", "sxth", "sxtw", "sxtx" };
3243 /* Do almost the same thing in the 32- and 64-bit cases. */
3244 IRTemp xN = newTemp(Ity_I64);
3245 IRTemp xM = newTemp(Ity_I64);
3246 assign(xN, getIReg64orSP(nn));
3247 assign(xM, getIReg64orZR(mm));
3248 IRExpr* xMw = mkexpr(xM); /* "xM widened" */
3249 Int shSX = 0;
3250 /* widen Xm .. */
3251 switch (opt) {
3252 case BITS3(0,0,0): // UXTB
3253 xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
3254 case BITS3(0,0,1): // UXTH
3255 xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
3256 case BITS3(0,1,0): // UXTW -- noop for the 32bit case
3257 if (is64) {
3258 xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
3260 break;
3261 case BITS3(0,1,1): // UXTX -- always a noop
3262 break;
3263 case BITS3(1,0,0): // SXTB
3264 shSX = 56; goto sxTo64;
3265 case BITS3(1,0,1): // SXTH
3266 shSX = 48; goto sxTo64;
3267 case BITS3(1,1,0): // SXTW -- noop for the 32bit case
3268 if (is64) {
3269 shSX = 32; goto sxTo64;
3271 break;
3272 case BITS3(1,1,1): // SXTX -- always a noop
3273 break;
3274 sxTo64:
3275 vassert(shSX >= 32);
3276 xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
3277 mkU8(shSX));
3278 break;
3279 default:
3280 vassert(0);
3282 /* and now shift */
3283 IRTemp argL = xN;
3284 IRTemp argR = newTemp(Ity_I64);
3285 assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
3286 IRTemp res = newTemp(Ity_I64);
3287 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
3288 mkexpr(argL), mkexpr(argR)));
3289 if (is64) {
3290 if (setCC) {
3291 putIReg64orZR(dd, mkexpr(res));
3292 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
3293 } else {
3294 putIReg64orSP(dd, mkexpr(res));
3296 } else {
3297 if (setCC) {
3298 IRTemp argL32 = newTemp(Ity_I32);
3299 IRTemp argR32 = newTemp(Ity_I32);
3300 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
3301 assign(argL32, unop(Iop_64to32, mkexpr(argL)));
3302 assign(argR32, unop(Iop_64to32, mkexpr(argR)));
3303 setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32);
3304 } else {
3305 putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
3308 DIP("%s%s %s, %s, %s %s lsl %u\n",
3309 isSub ? "sub" : "add", setCC ? "s" : "",
3310 setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
3311 nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
3312 nameExt[opt], imm3);
3313 return True;
3316 /* ---------------- CCMP/CCMN(imm) ---------------- */
3317 /* Bizarrely, these appear in the "data processing register"
3318 category, even though they are operations against an
3319 immediate. */
3320 /* 31 29 20 15 11 9 3
3321 sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond
3322 sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond
3324 Operation is:
3325 (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
3326 (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
3328 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3329 && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
3330 Bool is64 = INSN(31,31) == 1;
3331 Bool isSUB = INSN(30,30) == 1;
3332 UInt imm5 = INSN(20,16);
3333 UInt cond = INSN(15,12);
3334 UInt nn = INSN(9,5);
3335 UInt nzcv = INSN(3,0);
3337 IRTemp condT = newTemp(Ity_I1);
3338 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3340 IRType ty = is64 ? Ity_I64 : Ity_I32;
3341 IRTemp argL = newTemp(ty);
3342 IRTemp argR = newTemp(ty);
3344 if (is64) {
3345 assign(argL, getIReg64orZR(nn));
3346 assign(argR, mkU64(imm5));
3347 } else {
3348 assign(argL, getIReg32orZR(nn));
3349 assign(argR, mkU32(imm5));
3351 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3353 DIP("ccm%c %s, #%u, #%u, %s\n",
3354 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3355 imm5, nzcv, nameCC(cond));
3356 return True;
3359 /* ---------------- CCMP/CCMN(reg) ---------------- */
3360 /* 31 29 20 15 11 9 3
3361 sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond
3362 sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond
3363 Operation is:
3364 (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
3365 (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
3367 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3368 && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
3369 Bool is64 = INSN(31,31) == 1;
3370 Bool isSUB = INSN(30,30) == 1;
3371 UInt mm = INSN(20,16);
3372 UInt cond = INSN(15,12);
3373 UInt nn = INSN(9,5);
3374 UInt nzcv = INSN(3,0);
3376 IRTemp condT = newTemp(Ity_I1);
3377 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3379 IRType ty = is64 ? Ity_I64 : Ity_I32;
3380 IRTemp argL = newTemp(ty);
3381 IRTemp argR = newTemp(ty);
3383 if (is64) {
3384 assign(argL, getIReg64orZR(nn));
3385 assign(argR, getIReg64orZR(mm));
3386 } else {
3387 assign(argL, getIReg32orZR(nn));
3388 assign(argR, getIReg32orZR(mm));
3390 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3392 DIP("ccm%c %s, %s, #%u, %s\n",
3393 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3394 nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
3395 return True;
3399 /* -------------- REV/REV16/REV32/RBIT -------------- */
3400 /* 31 30 28 20 15 11 9 4
3402 1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn
3403 0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn
3405 1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn
3406 0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn
3408 1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn
3409 0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn
3411 1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn
3413 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3414 && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
3415 UInt b31 = INSN(31,31);
3416 UInt opc = INSN(11,10);
3418 UInt ix = 0;
3419 /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
3420 else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
3421 else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
3422 else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
3423 else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
3424 else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
3425 else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
3426 if (ix >= 1 && ix <= 7) {
3427 Bool is64 = ix == 1 || ix == 3 || ix == 5 || ix == 7;
3428 UInt nn = INSN(9,5);
3429 UInt dd = INSN(4,0);
3430 IRTemp src = newTemp(Ity_I64);
3431 IRTemp dst = IRTemp_INVALID;
3432 IRTemp (*math)(IRTemp) = NULL;
3433 switch (ix) {
3434 case 1: case 2: math = math_BYTESWAP64; break;
3435 case 3: case 4: math = math_BITSWAP64; break;
3436 case 5: case 6: math = math_USHORTSWAP64; break;
3437 case 7: math = math_UINTSWAP64; break;
3438 default: vassert(0);
3440 const HChar* names[7]
3441 = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
3442 const HChar* nm = names[ix-1];
3443 vassert(math);
3444 if (ix == 6) {
3445 /* This has to be special cased, since the logic below doesn't
3446 handle it correctly. */
3447 assign(src, getIReg64orZR(nn));
3448 dst = math(src);
3449 putIReg64orZR(dd,
3450 unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
3451 } else if (is64) {
3452 assign(src, getIReg64orZR(nn));
3453 dst = math(src);
3454 putIReg64orZR(dd, mkexpr(dst));
3455 } else {
3456 assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
3457 dst = math(src);
3458 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3460 DIP("%s %s, %s\n", nm,
3461 nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
3462 return True;
3464 /* else fall through */
3467 /* -------------------- CLZ/CLS -------------------- */
3468 /* 30 28 24 20 15 9 4
3469 sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn
3470 sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn
3472 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3473 && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
3474 Bool is64 = INSN(31,31) == 1;
3475 Bool isCLS = INSN(10,10) == 1;
3476 UInt nn = INSN(9,5);
3477 UInt dd = INSN(4,0);
3478 IRTemp src = newTemp(Ity_I64);
3479 IRTemp srcZ = newTemp(Ity_I64);
3480 IRTemp dst = newTemp(Ity_I64);
3481 /* Get the argument, widened out to 64 bit */
3482 if (is64) {
3483 assign(src, getIReg64orZR(nn));
3484 } else {
3485 assign(src, binop(Iop_Shl64,
3486 unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
3488 /* If this is CLS, mash the arg around accordingly */
3489 if (isCLS) {
3490 IRExpr* one = mkU8(1);
3491 assign(srcZ,
3492 binop(Iop_Xor64,
3493 binop(Iop_Shl64, mkexpr(src), one),
3494 binop(Iop_Shl64, binop(Iop_Shr64, mkexpr(src), one), one)));
3495 } else {
3496 assign(srcZ, mkexpr(src));
3498 /* And compute CLZ. */
3499 if (is64) {
3500 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3501 mkU64(isCLS ? 63 : 64),
3502 unop(Iop_Clz64, mkexpr(srcZ))));
3503 putIReg64orZR(dd, mkexpr(dst));
3504 } else {
3505 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3506 mkU64(isCLS ? 31 : 32),
3507 unop(Iop_Clz64, mkexpr(srcZ))));
3508 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3510 DIP("cl%c %s, %s\n", isCLS ? 's' : 'z',
3511 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
3512 return True;
3515 /* ------------------ LSLV/LSRV/ASRV/RORV ------------------ */
3516 /* 30 28 20 15 11 9 4
3517 sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm
3518 sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm
3519 sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm
3520 sf 00 1101 0110 m 0010 11 n d RORV Rd,Rn,Rm
3522 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3523 && INSN(15,12) == BITS4(0,0,1,0)) {
3524 Bool is64 = INSN(31,31) == 1;
3525 UInt mm = INSN(20,16);
3526 UInt op = INSN(11,10);
3527 UInt nn = INSN(9,5);
3528 UInt dd = INSN(4,0);
3529 IRType ty = is64 ? Ity_I64 : Ity_I32;
3530 IRTemp srcL = newTemp(ty);
3531 IRTemp srcR = newTemp(Ity_I64);
3532 IRTemp res = newTemp(ty);
3533 IROp iop = Iop_INVALID;
3534 assign(srcL, getIRegOrZR(is64, nn));
3535 assign(srcR, binop(Iop_And64, getIReg64orZR(mm),
3536 mkU64(is64 ? 63 : 31)));
3537 if (op < 3) {
3538 // LSLV, LSRV, ASRV
3539 switch (op) {
3540 case BITS2(0,0): iop = mkSHL(ty); break;
3541 case BITS2(0,1): iop = mkSHR(ty); break;
3542 case BITS2(1,0): iop = mkSAR(ty); break;
3543 default: vassert(0);
3545 assign(res, binop(iop, mkexpr(srcL),
3546 unop(Iop_64to8, mkexpr(srcR))));
3547 } else {
3548 // RORV
3549 IROp opSHL = mkSHL(ty);
3550 IROp opSHR = mkSHR(ty);
3551 IROp opOR = mkOR(ty);
3552 IRExpr* width = mkU64(is64 ? 64: 32);
3553 assign(
3554 res,
3555 IRExpr_ITE(
3556 binop(Iop_CmpEQ64, mkexpr(srcR), mkU64(0)),
3557 mkexpr(srcL),
3558 binop(opOR,
3559 binop(opSHL,
3560 mkexpr(srcL),
3561 unop(Iop_64to8, binop(Iop_Sub64, width,
3562 mkexpr(srcR)))),
3563 binop(opSHR,
3564 mkexpr(srcL), unop(Iop_64to8, mkexpr(srcR))))
3567 putIRegOrZR(is64, dd, mkexpr(res));
3568 vassert(op < 4);
3569 const HChar* names[4] = { "lslv", "lsrv", "asrv", "rorv" };
3570 DIP("%s %s, %s, %s\n",
3571 names[op], nameIRegOrZR(is64,dd),
3572 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
3573 return True;
3576 /* -------------------- SDIV/UDIV -------------------- */
3577 /* 30 28 20 15 10 9 4
3578 sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm
3579 sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm
3581 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3582 && INSN(15,11) == BITS5(0,0,0,0,1)) {
3583 Bool is64 = INSN(31,31) == 1;
3584 UInt mm = INSN(20,16);
3585 Bool isS = INSN(10,10) == 1;
3586 UInt nn = INSN(9,5);
3587 UInt dd = INSN(4,0);
3588 if (isS) {
3589 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
3590 getIRegOrZR(is64, nn),
3591 getIRegOrZR(is64, mm)));
3592 } else {
3593 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
3594 getIRegOrZR(is64, nn),
3595 getIRegOrZR(is64, mm)));
3597 DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
3598 nameIRegOrZR(is64, dd),
3599 nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
3600 return True;
3603 /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
3604 /* 31 23 20 15 14 9 4
3605 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa
3606 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa
3607 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa
3608 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa
3609 with operation
3610 Xd = Xa +/- (Wn *u/s Wm)
3612 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
3613 Bool isU = INSN(23,23) == 1;
3614 UInt mm = INSN(20,16);
3615 Bool isAdd = INSN(15,15) == 0;
3616 UInt aa = INSN(14,10);
3617 UInt nn = INSN(9,5);
3618 UInt dd = INSN(4,0);
3619 IRTemp wN = newTemp(Ity_I32);
3620 IRTemp wM = newTemp(Ity_I32);
3621 IRTemp xA = newTemp(Ity_I64);
3622 IRTemp muld = newTemp(Ity_I64);
3623 IRTemp res = newTemp(Ity_I64);
3624 assign(wN, getIReg32orZR(nn));
3625 assign(wM, getIReg32orZR(mm));
3626 assign(xA, getIReg64orZR(aa));
3627 assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
3628 mkexpr(wN), mkexpr(wM)));
3629 assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
3630 mkexpr(xA), mkexpr(muld)));
3631 putIReg64orZR(dd, mkexpr(res));
3632 DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
3633 nameIReg64orZR(dd), nameIReg32orZR(nn),
3634 nameIReg32orZR(mm), nameIReg64orZR(aa));
3635 return True;
3638 /* -------------------- CRC32/CRC32C -------------------- */
3639 /* 31 30 20 15 11 9 4
3640 sf 00 1101 0110 m 0100 sz n d CRC32<sz> Wd, Wn, Wm|Xm
3641 sf 00 1101 0110 m 0101 sz n d CRC32C<sz> Wd, Wn, Wm|Xm
3643 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3644 && INSN(15,13) == BITS3(0,1,0)) {
3645 UInt bitSF = INSN(31,31);
3646 UInt mm = INSN(20,16);
3647 UInt bitC = INSN(12,12);
3648 UInt sz = INSN(11,10);
3649 UInt nn = INSN(9,5);
3650 UInt dd = INSN(4,0);
3651 vassert(sz >= 0 && sz <= 3);
3652 if ((bitSF == 0 && sz <= BITS2(1,0))
3653 || (bitSF == 1 && sz == BITS2(1,1))) {
3654 UInt ix = (bitC == 1 ? 4 : 0) | sz;
3655 void* helpers[8]
3656 = { &arm64g_calc_crc32b, &arm64g_calc_crc32h,
3657 &arm64g_calc_crc32w, &arm64g_calc_crc32x,
3658 &arm64g_calc_crc32cb, &arm64g_calc_crc32ch,
3659 &arm64g_calc_crc32cw, &arm64g_calc_crc32cx };
3660 const HChar* hNames[8]
3661 = { "arm64g_calc_crc32b", "arm64g_calc_crc32h",
3662 "arm64g_calc_crc32w", "arm64g_calc_crc32x",
3663 "arm64g_calc_crc32cb", "arm64g_calc_crc32ch",
3664 "arm64g_calc_crc32cw", "arm64g_calc_crc32cx" };
3665 const HChar* iNames[8]
3666 = { "crc32b", "crc32h", "crc32w", "crc32x",
3667 "crc32cb", "crc32ch", "crc32cw", "crc32cx" };
3669 IRTemp srcN = newTemp(Ity_I64);
3670 assign(srcN, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
3672 IRTemp srcM = newTemp(Ity_I64);
3673 IRExpr* at64 = getIReg64orZR(mm);
3674 switch (sz) {
3675 case BITS2(0,0):
3676 assign(srcM, binop(Iop_And64, at64, mkU64(0xFF))); break;
3677 case BITS2(0,1):
3678 assign(srcM, binop(Iop_And64, at64, mkU64(0xFFFF))); break;
3679 case BITS2(1,0):
3680 assign(srcM, binop(Iop_And64, at64, mkU64(0xFFFFFFFF))); break;
3681 case BITS2(1,1):
3682 assign(srcM, at64); break;
3683 default:
3684 vassert(0);
3687 vassert(ix >= 0 && ix <= 7);
3689 putIReg64orZR(
3691 unop(Iop_32Uto64,
3692 unop(Iop_64to32,
3693 mkIRExprCCall(Ity_I64, 0/*regparm*/,
3694 hNames[ix], helpers[ix],
3695 mkIRExprVec_2(mkexpr(srcN),
3696 mkexpr(srcM))))));
3698 DIP("%s %s, %s, %s\n", iNames[ix],
3699 nameIReg32orZR(dd),
3700 nameIReg32orZR(nn), nameIRegOrZR(bitSF == 1, mm));
3701 return True;
3703 /* fall through */
3706 if (sigill_diag) {
3707 vex_printf("ARM64 front end: data_processing_register\n");
3709 return False;
3710 # undef INSN
3714 /*------------------------------------------------------------*/
3715 /*--- Math helpers for vector interleave/deinterleave ---*/
3716 /*------------------------------------------------------------*/
3718 #define EX(_tmp) \
3719 mkexpr(_tmp)
3720 #define SL(_hi128,_lo128,_nbytes) \
3721 ( (_nbytes) == 0 \
3722 ? (_lo128) \
3723 : triop(Iop_SliceV128,(_hi128),(_lo128),mkU8(_nbytes)) )
3724 #define ROR(_v128,_nbytes) \
3725 SL((_v128),(_v128),(_nbytes))
3726 #define ROL(_v128,_nbytes) \
3727 SL((_v128),(_v128),16-(_nbytes))
3728 #define SHR(_v128,_nbytes) \
3729 binop(Iop_ShrV128,(_v128),mkU8(8*(_nbytes)))
3730 #define SHL(_v128,_nbytes) \
3731 binop(Iop_ShlV128,(_v128),mkU8(8*(_nbytes)))
3732 #define ILO64x2(_argL,_argR) \
3733 binop(Iop_InterleaveLO64x2,(_argL),(_argR))
3734 #define IHI64x2(_argL,_argR) \
3735 binop(Iop_InterleaveHI64x2,(_argL),(_argR))
3736 #define ILO32x4(_argL,_argR) \
3737 binop(Iop_InterleaveLO32x4,(_argL),(_argR))
3738 #define IHI32x4(_argL,_argR) \
3739 binop(Iop_InterleaveHI32x4,(_argL),(_argR))
3740 #define ILO16x8(_argL,_argR) \
3741 binop(Iop_InterleaveLO16x8,(_argL),(_argR))
3742 #define IHI16x8(_argL,_argR) \
3743 binop(Iop_InterleaveHI16x8,(_argL),(_argR))
3744 #define ILO8x16(_argL,_argR) \
3745 binop(Iop_InterleaveLO8x16,(_argL),(_argR))
3746 #define IHI8x16(_argL,_argR) \
3747 binop(Iop_InterleaveHI8x16,(_argL),(_argR))
3748 #define CEV32x4(_argL,_argR) \
3749 binop(Iop_CatEvenLanes32x4,(_argL),(_argR))
3750 #define COD32x4(_argL,_argR) \
3751 binop(Iop_CatOddLanes32x4,(_argL),(_argR))
3752 #define COD16x8(_argL,_argR) \
3753 binop(Iop_CatOddLanes16x8,(_argL),(_argR))
3754 #define COD8x16(_argL,_argR) \
3755 binop(Iop_CatOddLanes8x16,(_argL),(_argR))
3756 #define CEV8x16(_argL,_argR) \
3757 binop(Iop_CatEvenLanes8x16,(_argL),(_argR))
3758 #define AND(_arg1,_arg2) \
3759 binop(Iop_AndV128,(_arg1),(_arg2))
3760 #define OR2(_arg1,_arg2) \
3761 binop(Iop_OrV128,(_arg1),(_arg2))
3762 #define OR3(_arg1,_arg2,_arg3) \
3763 binop(Iop_OrV128,(_arg1),binop(Iop_OrV128,(_arg2),(_arg3)))
3764 #define OR4(_arg1,_arg2,_arg3,_arg4) \
3765 binop(Iop_OrV128, \
3766 binop(Iop_OrV128,(_arg1),(_arg2)), \
3767 binop(Iop_OrV128,(_arg3),(_arg4)))
3770 /* Do interleaving for 1 128 bit vector, for ST1 insns. */
3771 static
3772 void math_INTERLEAVE1_128( /*OUTx1*/ IRTemp* i0,
3773 UInt laneSzBlg2, IRTemp u0 )
3775 assign(*i0, mkexpr(u0));
3779 /* Do interleaving for 2 128 bit vectors, for ST2 insns. */
3780 static
3781 void math_INTERLEAVE2_128( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
3782 UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
3784 /* This is pretty easy, since we have primitives directly to
3785 hand. */
3786 if (laneSzBlg2 == 3) {
3787 // 64x2
3788 // u1 == B1 B0, u0 == A1 A0
3789 // i1 == B1 A1, i0 == B0 A0
3790 assign(*i0, binop(Iop_InterleaveLO64x2, mkexpr(u1), mkexpr(u0)));
3791 assign(*i1, binop(Iop_InterleaveHI64x2, mkexpr(u1), mkexpr(u0)));
3792 return;
3794 if (laneSzBlg2 == 2) {
3795 // 32x4
3796 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3797 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3798 assign(*i0, binop(Iop_InterleaveLO32x4, mkexpr(u1), mkexpr(u0)));
3799 assign(*i1, binop(Iop_InterleaveHI32x4, mkexpr(u1), mkexpr(u0)));
3800 return;
3802 if (laneSzBlg2 == 1) {
3803 // 16x8
3804 // u1 == B{7..0}, u0 == A{7..0}
3805 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3806 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3807 assign(*i0, binop(Iop_InterleaveLO16x8, mkexpr(u1), mkexpr(u0)));
3808 assign(*i1, binop(Iop_InterleaveHI16x8, mkexpr(u1), mkexpr(u0)));
3809 return;
3811 if (laneSzBlg2 == 0) {
3812 // 8x16
3813 // u1 == B{f..0}, u0 == A{f..0}
3814 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3815 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3816 assign(*i0, binop(Iop_InterleaveLO8x16, mkexpr(u1), mkexpr(u0)));
3817 assign(*i1, binop(Iop_InterleaveHI8x16, mkexpr(u1), mkexpr(u0)));
3818 return;
3820 /*NOTREACHED*/
3821 vassert(0);
3825 /* Do interleaving for 3 128 bit vectors, for ST3 insns. */
3826 static
3827 void math_INTERLEAVE3_128(
3828 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
3829 UInt laneSzBlg2,
3830 IRTemp u0, IRTemp u1, IRTemp u2 )
3832 if (laneSzBlg2 == 3) {
3833 // 64x2
3834 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
3835 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
3836 assign(*i2, IHI64x2( EX(u2), EX(u1) ));
3837 assign(*i1, ILO64x2( ROR(EX(u0),8), EX(u2) ));
3838 assign(*i0, ILO64x2( EX(u1), EX(u0) ));
3839 return;
3842 if (laneSzBlg2 == 2) {
3843 // 32x4
3844 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
3845 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
3846 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
3847 IRTemp p0 = newTempV128();
3848 IRTemp p1 = newTempV128();
3849 IRTemp p2 = newTempV128();
3850 IRTemp c1100 = newTempV128();
3851 IRTemp c0011 = newTempV128();
3852 IRTemp c0110 = newTempV128();
3853 assign(c1100, mkV128(0xFF00));
3854 assign(c0011, mkV128(0x00FF));
3855 assign(c0110, mkV128(0x0FF0));
3856 // First interleave them at 64x2 granularity,
3857 // generating partial ("p") values.
3858 math_INTERLEAVE3_128(&p0, &p1, &p2, 3, u0, u1, u2);
3859 // And more shuffling around for the final answer
3860 assign(*i2, OR2( AND( IHI32x4(EX(p2), ROL(EX(p2),8)), EX(c1100) ),
3861 AND( IHI32x4(ROR(EX(p1),4), EX(p2)), EX(c0011) ) ));
3862 assign(*i1, OR3( SHL(EX(p2),12),
3863 AND(EX(p1),EX(c0110)),
3864 SHR(EX(p0),12) ));
3865 assign(*i0, OR2( AND( ILO32x4(EX(p0),ROL(EX(p1),4)), EX(c1100) ),
3866 AND( ILO32x4(ROR(EX(p0),8),EX(p0)), EX(c0011) ) ));
3867 return;
3870 if (laneSzBlg2 == 1) {
3871 // 16x8
3872 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
3873 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
3874 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
3876 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
3877 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
3878 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
3880 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
3881 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
3882 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
3883 IRTemp p0 = newTempV128();
3884 IRTemp p1 = newTempV128();
3885 IRTemp p2 = newTempV128();
3886 IRTemp c1000 = newTempV128();
3887 IRTemp c0100 = newTempV128();
3888 IRTemp c0010 = newTempV128();
3889 IRTemp c0001 = newTempV128();
3890 assign(c1000, mkV128(0xF000));
3891 assign(c0100, mkV128(0x0F00));
3892 assign(c0010, mkV128(0x00F0));
3893 assign(c0001, mkV128(0x000F));
3894 // First interleave them at 32x4 granularity,
3895 // generating partial ("p") values.
3896 math_INTERLEAVE3_128(&p0, &p1, &p2, 2, u0, u1, u2);
3897 // And more shuffling around for the final answer
3898 assign(*i2,
3899 OR4( AND( IHI16x8( EX(p2), ROL(EX(p2),4) ), EX(c1000) ),
3900 AND( IHI16x8( ROL(EX(p2),6), EX(p2) ), EX(c0100) ),
3901 AND( IHI16x8( ROL(EX(p2),2), ROL(EX(p2),6) ), EX(c0010) ),
3902 AND( ILO16x8( ROR(EX(p2),2), ROL(EX(p1),2) ), EX(c0001) )
3904 assign(*i1,
3905 OR4( AND( IHI16x8( ROL(EX(p1),4), ROR(EX(p2),2) ), EX(c1000) ),
3906 AND( IHI16x8( EX(p1), ROL(EX(p1),4) ), EX(c0100) ),
3907 AND( IHI16x8( ROL(EX(p1),4), ROL(EX(p1),8) ), EX(c0010) ),
3908 AND( IHI16x8( ROR(EX(p0),6), ROL(EX(p1),4) ), EX(c0001) )
3910 assign(*i0,
3911 OR4( AND( IHI16x8( ROR(EX(p1),2), ROL(EX(p0),2) ), EX(c1000) ),
3912 AND( IHI16x8( ROL(EX(p0),2), ROL(EX(p0),6) ), EX(c0100) ),
3913 AND( IHI16x8( ROL(EX(p0),8), ROL(EX(p0),2) ), EX(c0010) ),
3914 AND( IHI16x8( ROL(EX(p0),4), ROL(EX(p0),8) ), EX(c0001) )
3916 return;
3919 if (laneSzBlg2 == 0) {
3920 // 8x16. It doesn't seem worth the hassle of first doing a
3921 // 16x8 interleave, so just generate all 24 partial results
3922 // directly :-(
3923 // u2 == Cf .. C0, u1 == Bf .. B0, u0 == Af .. A0
3924 // i2 == Cf Bf Af Ce .. Bb Ab Ca
3925 // i1 == Ba Aa C9 B9 .. A6 C5 B5
3926 // i0 == A5 C4 B4 A4 .. C0 B0 A0
3928 IRTemp i2_FEDC = newTempV128(); IRTemp i2_BA98 = newTempV128();
3929 IRTemp i2_7654 = newTempV128(); IRTemp i2_3210 = newTempV128();
3930 IRTemp i1_FEDC = newTempV128(); IRTemp i1_BA98 = newTempV128();
3931 IRTemp i1_7654 = newTempV128(); IRTemp i1_3210 = newTempV128();
3932 IRTemp i0_FEDC = newTempV128(); IRTemp i0_BA98 = newTempV128();
3933 IRTemp i0_7654 = newTempV128(); IRTemp i0_3210 = newTempV128();
3934 IRTemp i2_hi64 = newTempV128(); IRTemp i2_lo64 = newTempV128();
3935 IRTemp i1_hi64 = newTempV128(); IRTemp i1_lo64 = newTempV128();
3936 IRTemp i0_hi64 = newTempV128(); IRTemp i0_lo64 = newTempV128();
3938 // eg XXXX(qqq, CC, 0xF, BB, 0xA)) sets qqq to be a vector
3939 // of the form 14 bytes junk : CC[0xF] : BB[0xA]
3941 # define XXXX(_tempName,_srcVec1,_srcShift1,_srcVec2,_srcShift2) \
3942 IRTemp t_##_tempName = newTempV128(); \
3943 assign(t_##_tempName, \
3944 ILO8x16( ROR(EX(_srcVec1),(_srcShift1)), \
3945 ROR(EX(_srcVec2),(_srcShift2)) ) )
3947 // Let CC, BB, AA be (handy) aliases of u2, u1, u0 respectively
3948 IRTemp CC = u2; IRTemp BB = u1; IRTemp AA = u0;
3950 // The slicing and reassembly are done as interleavedly as possible,
3951 // so as to minimise the demand for registers in the back end, which
3952 // was observed to be a problem in testing.
3954 XXXX(CfBf, CC, 0xf, BB, 0xf); // i2[15:14]
3955 XXXX(AfCe, AA, 0xf, CC, 0xe);
3956 assign(i2_FEDC, ILO16x8(EX(t_CfBf), EX(t_AfCe)));
3958 XXXX(BeAe, BB, 0xe, AA, 0xe);
3959 XXXX(CdBd, CC, 0xd, BB, 0xd);
3960 assign(i2_BA98, ILO16x8(EX(t_BeAe), EX(t_CdBd)));
3961 assign(i2_hi64, ILO32x4(EX(i2_FEDC), EX(i2_BA98)));
3963 XXXX(AdCc, AA, 0xd, CC, 0xc);
3964 XXXX(BcAc, BB, 0xc, AA, 0xc);
3965 assign(i2_7654, ILO16x8(EX(t_AdCc), EX(t_BcAc)));
3967 XXXX(CbBb, CC, 0xb, BB, 0xb);
3968 XXXX(AbCa, AA, 0xb, CC, 0xa); // i2[1:0]
3969 assign(i2_3210, ILO16x8(EX(t_CbBb), EX(t_AbCa)));
3970 assign(i2_lo64, ILO32x4(EX(i2_7654), EX(i2_3210)));
3971 assign(*i2, ILO64x2(EX(i2_hi64), EX(i2_lo64)));
3973 XXXX(BaAa, BB, 0xa, AA, 0xa); // i1[15:14]
3974 XXXX(C9B9, CC, 0x9, BB, 0x9);
3975 assign(i1_FEDC, ILO16x8(EX(t_BaAa), EX(t_C9B9)));
3977 XXXX(A9C8, AA, 0x9, CC, 0x8);
3978 XXXX(B8A8, BB, 0x8, AA, 0x8);
3979 assign(i1_BA98, ILO16x8(EX(t_A9C8), EX(t_B8A8)));
3980 assign(i1_hi64, ILO32x4(EX(i1_FEDC), EX(i1_BA98)));
3982 XXXX(C7B7, CC, 0x7, BB, 0x7);
3983 XXXX(A7C6, AA, 0x7, CC, 0x6);
3984 assign(i1_7654, ILO16x8(EX(t_C7B7), EX(t_A7C6)));
3986 XXXX(B6A6, BB, 0x6, AA, 0x6);
3987 XXXX(C5B5, CC, 0x5, BB, 0x5); // i1[1:0]
3988 assign(i1_3210, ILO16x8(EX(t_B6A6), EX(t_C5B5)));
3989 assign(i1_lo64, ILO32x4(EX(i1_7654), EX(i1_3210)));
3990 assign(*i1, ILO64x2(EX(i1_hi64), EX(i1_lo64)));
3992 XXXX(A5C4, AA, 0x5, CC, 0x4); // i0[15:14]
3993 XXXX(B4A4, BB, 0x4, AA, 0x4);
3994 assign(i0_FEDC, ILO16x8(EX(t_A5C4), EX(t_B4A4)));
3996 XXXX(C3B3, CC, 0x3, BB, 0x3);
3997 XXXX(A3C2, AA, 0x3, CC, 0x2);
3998 assign(i0_BA98, ILO16x8(EX(t_C3B3), EX(t_A3C2)));
3999 assign(i0_hi64, ILO32x4(EX(i0_FEDC), EX(i0_BA98)));
4001 XXXX(B2A2, BB, 0x2, AA, 0x2);
4002 XXXX(C1B1, CC, 0x1, BB, 0x1);
4003 assign(i0_7654, ILO16x8(EX(t_B2A2), EX(t_C1B1)));
4005 XXXX(A1C0, AA, 0x1, CC, 0x0);
4006 XXXX(B0A0, BB, 0x0, AA, 0x0); // i0[1:0]
4007 assign(i0_3210, ILO16x8(EX(t_A1C0), EX(t_B0A0)));
4008 assign(i0_lo64, ILO32x4(EX(i0_7654), EX(i0_3210)));
4009 assign(*i0, ILO64x2(EX(i0_hi64), EX(i0_lo64)));
4011 # undef XXXX
4012 return;
4015 /*NOTREACHED*/
4016 vassert(0);
4020 /* Do interleaving for 4 128 bit vectors, for ST4 insns. */
4021 static
4022 void math_INTERLEAVE4_128(
4023 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
4024 UInt laneSzBlg2,
4025 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
4027 if (laneSzBlg2 == 3) {
4028 // 64x2
4029 assign(*i0, ILO64x2(EX(u1), EX(u0)));
4030 assign(*i1, ILO64x2(EX(u3), EX(u2)));
4031 assign(*i2, IHI64x2(EX(u1), EX(u0)));
4032 assign(*i3, IHI64x2(EX(u3), EX(u2)));
4033 return;
4035 if (laneSzBlg2 == 2) {
4036 // 32x4
4037 // First, interleave at the 64-bit lane size.
4038 IRTemp p0 = newTempV128();
4039 IRTemp p1 = newTempV128();
4040 IRTemp p2 = newTempV128();
4041 IRTemp p3 = newTempV128();
4042 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 3, u0, u1, u2, u3);
4043 // And interleave (cat) at the 32 bit size.
4044 assign(*i0, CEV32x4(EX(p1), EX(p0)));
4045 assign(*i1, COD32x4(EX(p1), EX(p0)));
4046 assign(*i2, CEV32x4(EX(p3), EX(p2)));
4047 assign(*i3, COD32x4(EX(p3), EX(p2)));
4048 return;
4050 if (laneSzBlg2 == 1) {
4051 // 16x8
4052 // First, interleave at the 32-bit lane size.
4053 IRTemp p0 = newTempV128();
4054 IRTemp p1 = newTempV128();
4055 IRTemp p2 = newTempV128();
4056 IRTemp p3 = newTempV128();
4057 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 2, u0, u1, u2, u3);
4058 // And rearrange within each vector, to get the right 16 bit lanes.
4059 assign(*i0, COD16x8(EX(p0), SHL(EX(p0), 2)));
4060 assign(*i1, COD16x8(EX(p1), SHL(EX(p1), 2)));
4061 assign(*i2, COD16x8(EX(p2), SHL(EX(p2), 2)));
4062 assign(*i3, COD16x8(EX(p3), SHL(EX(p3), 2)));
4063 return;
4065 if (laneSzBlg2 == 0) {
4066 // 8x16
4067 // First, interleave at the 16-bit lane size.
4068 IRTemp p0 = newTempV128();
4069 IRTemp p1 = newTempV128();
4070 IRTemp p2 = newTempV128();
4071 IRTemp p3 = newTempV128();
4072 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 1, u0, u1, u2, u3);
4073 // And rearrange within each vector, to get the right 8 bit lanes.
4074 assign(*i0, IHI32x4(COD8x16(EX(p0),EX(p0)), CEV8x16(EX(p0),EX(p0))));
4075 assign(*i1, IHI32x4(COD8x16(EX(p1),EX(p1)), CEV8x16(EX(p1),EX(p1))));
4076 assign(*i2, IHI32x4(COD8x16(EX(p2),EX(p2)), CEV8x16(EX(p2),EX(p2))));
4077 assign(*i3, IHI32x4(COD8x16(EX(p3),EX(p3)), CEV8x16(EX(p3),EX(p3))));
4078 return;
4080 /*NOTREACHED*/
4081 vassert(0);
4085 /* Do deinterleaving for 1 128 bit vector, for LD1 insns. */
4086 static
4087 void math_DEINTERLEAVE1_128( /*OUTx1*/ IRTemp* u0,
4088 UInt laneSzBlg2, IRTemp i0 )
4090 assign(*u0, mkexpr(i0));
4094 /* Do deinterleaving for 2 128 bit vectors, for LD2 insns. */
4095 static
4096 void math_DEINTERLEAVE2_128( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
4097 UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
4099 /* This is pretty easy, since we have primitives directly to
4100 hand. */
4101 if (laneSzBlg2 == 3) {
4102 // 64x2
4103 // i1 == B1 A1, i0 == B0 A0
4104 // u1 == B1 B0, u0 == A1 A0
4105 assign(*u0, binop(Iop_InterleaveLO64x2, mkexpr(i1), mkexpr(i0)));
4106 assign(*u1, binop(Iop_InterleaveHI64x2, mkexpr(i1), mkexpr(i0)));
4107 return;
4109 if (laneSzBlg2 == 2) {
4110 // 32x4
4111 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
4112 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
4113 assign(*u0, binop(Iop_CatEvenLanes32x4, mkexpr(i1), mkexpr(i0)));
4114 assign(*u1, binop(Iop_CatOddLanes32x4, mkexpr(i1), mkexpr(i0)));
4115 return;
4117 if (laneSzBlg2 == 1) {
4118 // 16x8
4119 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
4120 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
4121 // u1 == B{7..0}, u0 == A{7..0}
4122 assign(*u0, binop(Iop_CatEvenLanes16x8, mkexpr(i1), mkexpr(i0)));
4123 assign(*u1, binop(Iop_CatOddLanes16x8, mkexpr(i1), mkexpr(i0)));
4124 return;
4126 if (laneSzBlg2 == 0) {
4127 // 8x16
4128 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
4129 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
4130 // u1 == B{f..0}, u0 == A{f..0}
4131 assign(*u0, binop(Iop_CatEvenLanes8x16, mkexpr(i1), mkexpr(i0)));
4132 assign(*u1, binop(Iop_CatOddLanes8x16, mkexpr(i1), mkexpr(i0)));
4133 return;
4135 /*NOTREACHED*/
4136 vassert(0);
4140 /* Do deinterleaving for 3 128 bit vectors, for LD3 insns. */
4141 static
4142 void math_DEINTERLEAVE3_128(
4143 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
4144 UInt laneSzBlg2,
4145 IRTemp i0, IRTemp i1, IRTemp i2 )
4147 if (laneSzBlg2 == 3) {
4148 // 64x2
4149 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
4150 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
4151 assign(*u2, ILO64x2( ROL(EX(i2),8), EX(i1) ));
4152 assign(*u1, ILO64x2( EX(i2), ROL(EX(i0),8) ));
4153 assign(*u0, ILO64x2( ROL(EX(i1),8), EX(i0) ));
4154 return;
4157 if (laneSzBlg2 == 2) {
4158 // 32x4
4159 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
4160 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
4161 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
4162 IRTemp t_a1c0b0a0 = newTempV128();
4163 IRTemp t_a2c1b1a1 = newTempV128();
4164 IRTemp t_a3c2b2a2 = newTempV128();
4165 IRTemp t_a0c3b3a3 = newTempV128();
4166 IRTemp p0 = newTempV128();
4167 IRTemp p1 = newTempV128();
4168 IRTemp p2 = newTempV128();
4169 // Compute some intermediate values.
4170 assign(t_a1c0b0a0, EX(i0));
4171 assign(t_a2c1b1a1, SL(EX(i1),EX(i0),3*4));
4172 assign(t_a3c2b2a2, SL(EX(i2),EX(i1),2*4));
4173 assign(t_a0c3b3a3, SL(EX(i0),EX(i2),1*4));
4174 // First deinterleave into lane-pairs
4175 assign(p0, ILO32x4(EX(t_a2c1b1a1),EX(t_a1c0b0a0)));
4176 assign(p1, ILO64x2(ILO32x4(EX(t_a0c3b3a3), EX(t_a3c2b2a2)),
4177 IHI32x4(EX(t_a2c1b1a1), EX(t_a1c0b0a0))));
4178 assign(p2, ILO32x4(ROR(EX(t_a0c3b3a3),1*4), ROR(EX(t_a3c2b2a2),1*4)));
4179 // Then deinterleave at 64x2 granularity.
4180 math_DEINTERLEAVE3_128(u0, u1, u2, 3, p0, p1, p2);
4181 return;
4184 if (laneSzBlg2 == 1) {
4185 // 16x8
4186 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
4187 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
4188 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
4190 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
4191 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
4192 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
4194 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
4195 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
4196 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
4198 IRTemp s0, s1, s2, s3, t0, t1, t2, t3, p0, p1, p2, c00111111;
4199 s0 = s1 = s2 = s3
4200 = t0 = t1 = t2 = t3 = p0 = p1 = p2 = c00111111 = IRTemp_INVALID;
4201 newTempsV128_4(&s0, &s1, &s2, &s3);
4202 newTempsV128_4(&t0, &t1, &t2, &t3);
4203 newTempsV128_4(&p0, &p1, &p2, &c00111111);
4205 // s0 == b2a2 c1b1a1 c0b0a0
4206 // s1 == b4a4 c3b3c3 c2b2a2
4207 // s2 == b6a6 c5b5a5 c4b4a4
4208 // s3 == b0a0 c7b7a7 c6b6a6
4209 assign(s0, EX(i0));
4210 assign(s1, SL(EX(i1),EX(i0),6*2));
4211 assign(s2, SL(EX(i2),EX(i1),4*2));
4212 assign(s3, SL(EX(i0),EX(i2),2*2));
4214 // t0 == 0 0 c1c0 b1b0 a1a0
4215 // t1 == 0 0 c3c2 b3b2 a3a2
4216 // t2 == 0 0 c5c4 b5b4 a5a4
4217 // t3 == 0 0 c7c6 b7b6 a7a6
4218 assign(c00111111, mkV128(0x0FFF));
4219 assign(t0, AND( ILO16x8( ROR(EX(s0),3*2), EX(s0)), EX(c00111111)));
4220 assign(t1, AND( ILO16x8( ROR(EX(s1),3*2), EX(s1)), EX(c00111111)));
4221 assign(t2, AND( ILO16x8( ROR(EX(s2),3*2), EX(s2)), EX(c00111111)));
4222 assign(t3, AND( ILO16x8( ROR(EX(s3),3*2), EX(s3)), EX(c00111111)));
4224 assign(p0, OR2(EX(t0), SHL(EX(t1),6*2)));
4225 assign(p1, OR2(SHL(EX(t2),4*2), SHR(EX(t1),2*2)));
4226 assign(p2, OR2(SHL(EX(t3),2*2), SHR(EX(t2),4*2)));
4228 // Then deinterleave at 32x4 granularity.
4229 math_DEINTERLEAVE3_128(u0, u1, u2, 2, p0, p1, p2);
4230 return;
4233 if (laneSzBlg2 == 0) {
4234 // 8x16. This is the same scheme as for 16x8, with twice the
4235 // number of intermediate values.
4237 // u2 == C{f..0}
4238 // u1 == B{f..0}
4239 // u0 == A{f..0}
4241 // i2 == CBA{f} CBA{e} CBA{d} CBA{c} CBA{b} C{a}
4242 // i1 == BA{a} CBA{9} CBA{8} CBA{7} CBA{6} CB{5}
4243 // i0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4245 // p2 == C{fe} B{fe} A{fe} C{dc} B{dc} A{dc} C{ba} B{ba}
4246 // p1 == A{ba} C{98} B{98} A{98} C{76} B{76} A{76} C{54}
4247 // p0 == B{54} A{54} C{32} B{32} A{32} C{10} B{10} A{10}
4249 IRTemp s0, s1, s2, s3, s4, s5, s6, s7,
4250 t0, t1, t2, t3, t4, t5, t6, t7, p0, p1, p2, cMASK;
4251 s0 = s1 = s2 = s3 = s4 = s5 = s6 = s7
4252 = t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = p0 = p1 = p2 = cMASK
4253 = IRTemp_INVALID;
4254 newTempsV128_4(&s0, &s1, &s2, &s3);
4255 newTempsV128_4(&s4, &s5, &s6, &s7);
4256 newTempsV128_4(&t0, &t1, &t2, &t3);
4257 newTempsV128_4(&t4, &t5, &t6, &t7);
4258 newTempsV128_4(&p0, &p1, &p2, &cMASK);
4260 // s0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4261 // s1 == A{7} CBA{6} CBA{5} CBA{4} CBA{3} CBA{2}
4262 // s2 == A{9} CBA{8} CBA{7} CBA{6} CBA{5} CBA{4}
4263 // s3 == A{b} CBA{a} CBA{9} CBA{8} CBA{7} CBA{6}
4264 // s4 == A{d} CBA{c} CBA{b} CBA{a} CBA{9} CBA{8}
4265 // s5 == A{f} CBA{e} CBA{d} CBA{c} CBA{b} CBA{a}
4266 // s6 == A{1} CBA{0} CBA{f} CBA{e} CBA{d} CBA{c}
4267 // s7 == A{3} CBA{2} CBA{1} CBA{0} CBA{f} CBA{e}
4268 assign(s0, SL(EX(i1),EX(i0), 0));
4269 assign(s1, SL(EX(i1),EX(i0), 6));
4270 assign(s2, SL(EX(i1),EX(i0),12));
4271 assign(s3, SL(EX(i2),EX(i1), 2));
4272 assign(s4, SL(EX(i2),EX(i1), 8));
4273 assign(s5, SL(EX(i2),EX(i1),14));
4274 assign(s6, SL(EX(i0),EX(i2), 4));
4275 assign(s7, SL(EX(i0),EX(i2),10));
4277 // t0 == 0--(ten)--0 C1 C0 B1 B0 A1 A0
4278 // t1 == 0--(ten)--0 C3 C2 B3 B2 A3 A2
4279 // t2 == 0--(ten)--0 C5 C4 B5 B4 A5 A4
4280 // t3 == 0--(ten)--0 C7 C6 B7 B6 A7 A6
4281 // t4 == 0--(ten)--0 C9 C8 B9 B8 A9 A8
4282 // t5 == 0--(ten)--0 Cb Ca Bb Ba Ab Aa
4283 // t6 == 0--(ten)--0 Cd Cc Bd Bc Ad Ac
4284 // t7 == 0--(ten)--0 Cf Ce Bf Be Af Ae
4285 assign(cMASK, mkV128(0x003F));
4286 assign(t0, AND( ILO8x16( ROR(EX(s0),3), EX(s0)), EX(cMASK)));
4287 assign(t1, AND( ILO8x16( ROR(EX(s1),3), EX(s1)), EX(cMASK)));
4288 assign(t2, AND( ILO8x16( ROR(EX(s2),3), EX(s2)), EX(cMASK)));
4289 assign(t3, AND( ILO8x16( ROR(EX(s3),3), EX(s3)), EX(cMASK)));
4290 assign(t4, AND( ILO8x16( ROR(EX(s4),3), EX(s4)), EX(cMASK)));
4291 assign(t5, AND( ILO8x16( ROR(EX(s5),3), EX(s5)), EX(cMASK)));
4292 assign(t6, AND( ILO8x16( ROR(EX(s6),3), EX(s6)), EX(cMASK)));
4293 assign(t7, AND( ILO8x16( ROR(EX(s7),3), EX(s7)), EX(cMASK)));
4295 assign(p0, OR3( SHL(EX(t2),12), SHL(EX(t1),6), EX(t0) ));
4296 assign(p1, OR4( SHL(EX(t5),14), SHL(EX(t4),8),
4297 SHL(EX(t3),2), SHR(EX(t2),4) ));
4298 assign(p2, OR3( SHL(EX(t7),10), SHL(EX(t6),4), SHR(EX(t5),2) ));
4300 // Then deinterleave at 16x8 granularity.
4301 math_DEINTERLEAVE3_128(u0, u1, u2, 1, p0, p1, p2);
4302 return;
4305 /*NOTREACHED*/
4306 vassert(0);
4310 /* Do deinterleaving for 4 128 bit vectors, for LD4 insns. */
4311 static
4312 void math_DEINTERLEAVE4_128(
4313 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4314 UInt laneSzBlg2,
4315 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4317 if (laneSzBlg2 == 3) {
4318 // 64x2
4319 assign(*u0, ILO64x2(EX(i2), EX(i0)));
4320 assign(*u1, IHI64x2(EX(i2), EX(i0)));
4321 assign(*u2, ILO64x2(EX(i3), EX(i1)));
4322 assign(*u3, IHI64x2(EX(i3), EX(i1)));
4323 return;
4325 if (laneSzBlg2 == 2) {
4326 // 32x4
4327 IRTemp p0 = newTempV128();
4328 IRTemp p2 = newTempV128();
4329 IRTemp p1 = newTempV128();
4330 IRTemp p3 = newTempV128();
4331 assign(p0, ILO32x4(EX(i1), EX(i0)));
4332 assign(p1, IHI32x4(EX(i1), EX(i0)));
4333 assign(p2, ILO32x4(EX(i3), EX(i2)));
4334 assign(p3, IHI32x4(EX(i3), EX(i2)));
4335 // And now do what we did for the 64-bit case.
4336 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 3, p0, p1, p2, p3);
4337 return;
4339 if (laneSzBlg2 == 1) {
4340 // 16x8
4341 // Deinterleave into 32-bit chunks, then do as the 32-bit case.
4342 IRTemp p0 = newTempV128();
4343 IRTemp p1 = newTempV128();
4344 IRTemp p2 = newTempV128();
4345 IRTemp p3 = newTempV128();
4346 assign(p0, IHI16x8(EX(i0), SHL(EX(i0), 8)));
4347 assign(p1, IHI16x8(EX(i1), SHL(EX(i1), 8)));
4348 assign(p2, IHI16x8(EX(i2), SHL(EX(i2), 8)));
4349 assign(p3, IHI16x8(EX(i3), SHL(EX(i3), 8)));
4350 // From here on is like the 32 bit case.
4351 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 2, p0, p1, p2, p3);
4352 return;
4354 if (laneSzBlg2 == 0) {
4355 // 8x16
4356 // Deinterleave into 16-bit chunks, then do as the 16-bit case.
4357 IRTemp p0 = newTempV128();
4358 IRTemp p1 = newTempV128();
4359 IRTemp p2 = newTempV128();
4360 IRTemp p3 = newTempV128();
4361 assign(p0, IHI64x2( IHI8x16(EX(i0),ROL(EX(i0),4)),
4362 ILO8x16(EX(i0),ROL(EX(i0),4)) ));
4363 assign(p1, IHI64x2( IHI8x16(EX(i1),ROL(EX(i1),4)),
4364 ILO8x16(EX(i1),ROL(EX(i1),4)) ));
4365 assign(p2, IHI64x2( IHI8x16(EX(i2),ROL(EX(i2),4)),
4366 ILO8x16(EX(i2),ROL(EX(i2),4)) ));
4367 assign(p3, IHI64x2( IHI8x16(EX(i3),ROL(EX(i3),4)),
4368 ILO8x16(EX(i3),ROL(EX(i3),4)) ));
4369 // From here on is like the 16 bit case.
4370 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 1, p0, p1, p2, p3);
4371 return;
4373 /*NOTREACHED*/
4374 vassert(0);
4378 /* Wrappers that use the full-width (de)interleavers to do half-width
4379 (de)interleaving. The scheme is to clone each input lane in the
4380 lower half of each incoming value, do a full width (de)interleave
4381 at the next lane size up, and remove every other lane of the the
4382 result. The returned values may have any old junk in the upper
4383 64 bits -- the caller must ignore that. */
4385 /* Helper function -- get doubling and narrowing operations. */
4386 static
4387 void math_get_doubler_and_halver ( /*OUT*/IROp* doubler,
4388 /*OUT*/IROp* halver,
4389 UInt laneSzBlg2 )
4391 switch (laneSzBlg2) {
4392 case 2:
4393 *doubler = Iop_InterleaveLO32x4; *halver = Iop_CatEvenLanes32x4;
4394 break;
4395 case 1:
4396 *doubler = Iop_InterleaveLO16x8; *halver = Iop_CatEvenLanes16x8;
4397 break;
4398 case 0:
4399 *doubler = Iop_InterleaveLO8x16; *halver = Iop_CatEvenLanes8x16;
4400 break;
4401 default:
4402 vassert(0);
4406 /* Do interleaving for 1 64 bit vector, for ST1 insns. */
4407 static
4408 void math_INTERLEAVE1_64( /*OUTx1*/ IRTemp* i0,
4409 UInt laneSzBlg2, IRTemp u0 )
4411 assign(*i0, mkexpr(u0));
4415 /* Do interleaving for 2 64 bit vectors, for ST2 insns. */
4416 static
4417 void math_INTERLEAVE2_64( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
4418 UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
4420 if (laneSzBlg2 == 3) {
4421 // 1x64, degenerate case
4422 assign(*i0, EX(u0));
4423 assign(*i1, EX(u1));
4424 return;
4427 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4428 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4429 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4431 IRTemp du0 = newTempV128();
4432 IRTemp du1 = newTempV128();
4433 assign(du0, binop(doubler, EX(u0), EX(u0)));
4434 assign(du1, binop(doubler, EX(u1), EX(u1)));
4435 IRTemp di0 = newTempV128();
4436 IRTemp di1 = newTempV128();
4437 math_INTERLEAVE2_128(&di0, &di1, laneSzBlg2 + 1, du0, du1);
4438 assign(*i0, binop(halver, EX(di0), EX(di0)));
4439 assign(*i1, binop(halver, EX(di1), EX(di1)));
4443 /* Do interleaving for 3 64 bit vectors, for ST3 insns. */
4444 static
4445 void math_INTERLEAVE3_64(
4446 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
4447 UInt laneSzBlg2,
4448 IRTemp u0, IRTemp u1, IRTemp u2 )
4450 if (laneSzBlg2 == 3) {
4451 // 1x64, degenerate case
4452 assign(*i0, EX(u0));
4453 assign(*i1, EX(u1));
4454 assign(*i2, EX(u2));
4455 return;
4458 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4459 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4460 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4462 IRTemp du0 = newTempV128();
4463 IRTemp du1 = newTempV128();
4464 IRTemp du2 = newTempV128();
4465 assign(du0, binop(doubler, EX(u0), EX(u0)));
4466 assign(du1, binop(doubler, EX(u1), EX(u1)));
4467 assign(du2, binop(doubler, EX(u2), EX(u2)));
4468 IRTemp di0 = newTempV128();
4469 IRTemp di1 = newTempV128();
4470 IRTemp di2 = newTempV128();
4471 math_INTERLEAVE3_128(&di0, &di1, &di2, laneSzBlg2 + 1, du0, du1, du2);
4472 assign(*i0, binop(halver, EX(di0), EX(di0)));
4473 assign(*i1, binop(halver, EX(di1), EX(di1)));
4474 assign(*i2, binop(halver, EX(di2), EX(di2)));
4478 /* Do interleaving for 4 64 bit vectors, for ST4 insns. */
4479 static
4480 void math_INTERLEAVE4_64(
4481 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
4482 UInt laneSzBlg2,
4483 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
4485 if (laneSzBlg2 == 3) {
4486 // 1x64, degenerate case
4487 assign(*i0, EX(u0));
4488 assign(*i1, EX(u1));
4489 assign(*i2, EX(u2));
4490 assign(*i3, EX(u3));
4491 return;
4494 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4495 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4496 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4498 IRTemp du0 = newTempV128();
4499 IRTemp du1 = newTempV128();
4500 IRTemp du2 = newTempV128();
4501 IRTemp du3 = newTempV128();
4502 assign(du0, binop(doubler, EX(u0), EX(u0)));
4503 assign(du1, binop(doubler, EX(u1), EX(u1)));
4504 assign(du2, binop(doubler, EX(u2), EX(u2)));
4505 assign(du3, binop(doubler, EX(u3), EX(u3)));
4506 IRTemp di0 = newTempV128();
4507 IRTemp di1 = newTempV128();
4508 IRTemp di2 = newTempV128();
4509 IRTemp di3 = newTempV128();
4510 math_INTERLEAVE4_128(&di0, &di1, &di2, &di3,
4511 laneSzBlg2 + 1, du0, du1, du2, du3);
4512 assign(*i0, binop(halver, EX(di0), EX(di0)));
4513 assign(*i1, binop(halver, EX(di1), EX(di1)));
4514 assign(*i2, binop(halver, EX(di2), EX(di2)));
4515 assign(*i3, binop(halver, EX(di3), EX(di3)));
4519 /* Do deinterleaving for 1 64 bit vector, for LD1 insns. */
4520 static
4521 void math_DEINTERLEAVE1_64( /*OUTx1*/ IRTemp* u0,
4522 UInt laneSzBlg2, IRTemp i0 )
4524 assign(*u0, mkexpr(i0));
4528 /* Do deinterleaving for 2 64 bit vectors, for LD2 insns. */
4529 static
4530 void math_DEINTERLEAVE2_64( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
4531 UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
4533 if (laneSzBlg2 == 3) {
4534 // 1x64, degenerate case
4535 assign(*u0, EX(i0));
4536 assign(*u1, EX(i1));
4537 return;
4540 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4541 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4542 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4544 IRTemp di0 = newTempV128();
4545 IRTemp di1 = newTempV128();
4546 assign(di0, binop(doubler, EX(i0), EX(i0)));
4547 assign(di1, binop(doubler, EX(i1), EX(i1)));
4549 IRTemp du0 = newTempV128();
4550 IRTemp du1 = newTempV128();
4551 math_DEINTERLEAVE2_128(&du0, &du1, laneSzBlg2 + 1, di0, di1);
4552 assign(*u0, binop(halver, EX(du0), EX(du0)));
4553 assign(*u1, binop(halver, EX(du1), EX(du1)));
4557 /* Do deinterleaving for 3 64 bit vectors, for LD3 insns. */
4558 static
4559 void math_DEINTERLEAVE3_64(
4560 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
4561 UInt laneSzBlg2,
4562 IRTemp i0, IRTemp i1, IRTemp i2 )
4564 if (laneSzBlg2 == 3) {
4565 // 1x64, degenerate case
4566 assign(*u0, EX(i0));
4567 assign(*u1, EX(i1));
4568 assign(*u2, EX(i2));
4569 return;
4572 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4573 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4574 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4576 IRTemp di0 = newTempV128();
4577 IRTemp di1 = newTempV128();
4578 IRTemp di2 = newTempV128();
4579 assign(di0, binop(doubler, EX(i0), EX(i0)));
4580 assign(di1, binop(doubler, EX(i1), EX(i1)));
4581 assign(di2, binop(doubler, EX(i2), EX(i2)));
4582 IRTemp du0 = newTempV128();
4583 IRTemp du1 = newTempV128();
4584 IRTemp du2 = newTempV128();
4585 math_DEINTERLEAVE3_128(&du0, &du1, &du2, laneSzBlg2 + 1, di0, di1, di2);
4586 assign(*u0, binop(halver, EX(du0), EX(du0)));
4587 assign(*u1, binop(halver, EX(du1), EX(du1)));
4588 assign(*u2, binop(halver, EX(du2), EX(du2)));
4592 /* Do deinterleaving for 4 64 bit vectors, for LD4 insns. */
4593 static
4594 void math_DEINTERLEAVE4_64(
4595 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4596 UInt laneSzBlg2,
4597 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4599 if (laneSzBlg2 == 3) {
4600 // 1x64, degenerate case
4601 assign(*u0, EX(i0));
4602 assign(*u1, EX(i1));
4603 assign(*u2, EX(i2));
4604 assign(*u3, EX(i3));
4605 return;
4608 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4609 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4610 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4612 IRTemp di0 = newTempV128();
4613 IRTemp di1 = newTempV128();
4614 IRTemp di2 = newTempV128();
4615 IRTemp di3 = newTempV128();
4616 assign(di0, binop(doubler, EX(i0), EX(i0)));
4617 assign(di1, binop(doubler, EX(i1), EX(i1)));
4618 assign(di2, binop(doubler, EX(i2), EX(i2)));
4619 assign(di3, binop(doubler, EX(i3), EX(i3)));
4620 IRTemp du0 = newTempV128();
4621 IRTemp du1 = newTempV128();
4622 IRTemp du2 = newTempV128();
4623 IRTemp du3 = newTempV128();
4624 math_DEINTERLEAVE4_128(&du0, &du1, &du2, &du3,
4625 laneSzBlg2 + 1, di0, di1, di2, di3);
4626 assign(*u0, binop(halver, EX(du0), EX(du0)));
4627 assign(*u1, binop(halver, EX(du1), EX(du1)));
4628 assign(*u2, binop(halver, EX(du2), EX(du2)));
4629 assign(*u3, binop(halver, EX(du3), EX(du3)));
4633 #undef EX
4634 #undef SL
4635 #undef ROR
4636 #undef ROL
4637 #undef SHR
4638 #undef SHL
4639 #undef ILO64x2
4640 #undef IHI64x2
4641 #undef ILO32x4
4642 #undef IHI32x4
4643 #undef ILO16x8
4644 #undef IHI16x8
4645 #undef ILO16x8
4646 #undef IHI16x8
4647 #undef CEV32x4
4648 #undef COD32x4
4649 #undef COD16x8
4650 #undef COD8x16
4651 #undef CEV8x16
4652 #undef AND
4653 #undef OR2
4654 #undef OR3
4655 #undef OR4
4658 /*------------------------------------------------------------*/
4659 /*--- Load and Store instructions ---*/
4660 /*------------------------------------------------------------*/
4662 /* Generate the EA for a "reg + reg" style amode. This is done from
4663 parts of the insn, but for sanity checking sake it takes the whole
4664 insn. This appears to depend on insn[15:12], with opt=insn[15:13]
4665 and S=insn[12]:
4667 The possible forms, along with their opt:S values, are:
4668 011:0 Xn|SP + Xm
4669 111:0 Xn|SP + Xm
4670 011:1 Xn|SP + Xm * transfer_szB
4671 111:1 Xn|SP + Xm * transfer_szB
4672 010:0 Xn|SP + 32Uto64(Wm)
4673 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB
4674 110:0 Xn|SP + 32Sto64(Wm)
4675 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB
4677 Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of
4678 the transfer size is insn[23,31,30]. For integer loads/stores,
4679 insn[23] is zero, hence szLg2 can be at most 3 in such cases.
4681 If the decoding fails, it returns IRTemp_INVALID.
4683 isInt is True iff this is decoding is for transfers to/from integer
4684 registers. If False it is for transfers to/from vector registers.
4686 static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt )
4688 UInt optS = SLICE_UInt(insn, 15, 12);
4689 UInt mm = SLICE_UInt(insn, 20, 16);
4690 UInt nn = SLICE_UInt(insn, 9, 5);
4691 UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
4692 | SLICE_UInt(insn, 31, 30); // Log2 of the size
4694 buf[0] = 0;
4696 /* Sanity checks, that this really is a load/store insn. */
4697 if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
4698 goto fail;
4700 if (isInt
4701 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
4702 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
4703 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
4704 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
4705 goto fail;
4707 if (!isInt
4708 && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
4709 goto fail;
4711 /* Throw out non-verified but possibly valid cases. */
4712 switch (szLg2) {
4713 case BITS3(0,0,0): break; // 8 bit, valid for both int and vec
4714 case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
4715 case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
4716 case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
4717 case BITS3(1,0,0): // can only ever be valid for the vector case
4718 if (isInt) goto fail; else break;
4719 case BITS3(1,0,1): // these sizes are never valid
4720 case BITS3(1,1,0):
4721 case BITS3(1,1,1): goto fail;
4723 default: vassert(0);
4726 IRExpr* rhs = NULL;
4727 switch (optS) {
4728 case BITS4(1,1,1,0): goto fail; //ATC
4729 case BITS4(0,1,1,0):
4730 rhs = getIReg64orZR(mm);
4731 vex_sprintf(buf, "[%s, %s]",
4732 nameIReg64orZR(nn), nameIReg64orZR(mm));
4733 break;
4734 case BITS4(1,1,1,1): goto fail; //ATC
4735 case BITS4(0,1,1,1):
4736 rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
4737 vex_sprintf(buf, "[%s, %s lsl %u]",
4738 nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
4739 break;
4740 case BITS4(0,1,0,0):
4741 rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
4742 vex_sprintf(buf, "[%s, %s uxtx]",
4743 nameIReg64orZR(nn), nameIReg32orZR(mm));
4744 break;
4745 case BITS4(0,1,0,1):
4746 rhs = binop(Iop_Shl64,
4747 unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
4748 vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
4749 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4750 break;
4751 case BITS4(1,1,0,0):
4752 rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
4753 vex_sprintf(buf, "[%s, %s sxtx]",
4754 nameIReg64orZR(nn), nameIReg32orZR(mm));
4755 break;
4756 case BITS4(1,1,0,1):
4757 rhs = binop(Iop_Shl64,
4758 unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
4759 vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
4760 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4761 break;
4762 default:
4763 /* The rest appear to be genuinely invalid */
4764 goto fail;
4767 vassert(rhs);
4768 IRTemp res = newTemp(Ity_I64);
4769 assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
4770 return res;
4772 fail:
4773 if (0 /*really, sigill_diag, but that causes too much plumbing*/) {
4774 vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
4776 return IRTemp_INVALID;
4780 /* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
4781 bits of DATAE :: Ity_I64. */
4782 static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
4784 IRExpr* addrE = mkexpr(addr);
4785 switch (szB) {
4786 case 8:
4787 storeLE(addrE, dataE);
4788 break;
4789 case 4:
4790 storeLE(addrE, unop(Iop_64to32, dataE));
4791 break;
4792 case 2:
4793 storeLE(addrE, unop(Iop_64to16, dataE));
4794 break;
4795 case 1:
4796 storeLE(addrE, unop(Iop_64to8, dataE));
4797 break;
4798 default:
4799 vassert(0);
4804 /* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
4805 placing the result in an Ity_I64 temporary. */
4806 static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
4808 IRTemp res = newTemp(Ity_I64);
4809 IRExpr* addrE = mkexpr(addr);
4810 switch (szB) {
4811 case 8:
4812 assign(res, loadLE(Ity_I64,addrE));
4813 break;
4814 case 4:
4815 assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
4816 break;
4817 case 2:
4818 assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
4819 break;
4820 case 1:
4821 assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
4822 break;
4823 default:
4824 vassert(0);
4826 return res;
4830 /* Generate a "standard 7" name, from bitQ and size. But also
4831 allow ".1d" since that's occasionally useful. */
4832 static
4833 const HChar* nameArr_Q_SZ ( UInt bitQ, UInt size )
4835 vassert(bitQ <= 1 && size <= 3);
4836 const HChar* nms[8]
4837 = { "8b", "4h", "2s", "1d", "16b", "8h", "4s", "2d" };
4838 UInt ix = (bitQ << 2) | size;
4839 vassert(ix < 8);
4840 return nms[ix];
4844 static
4845 Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
4846 const VexAbiInfo* abiinfo, Bool sigill_diag)
4848 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
4850 /* ------------ LDR,STR (immediate, uimm12) ----------- */
4851 /* uimm12 is scaled by the transfer size
4853 31 29 26 21 9 4
4854 | | | | | |
4855 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8]
4856 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8]
4858 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4]
4859 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4]
4861 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2]
4862 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2]
4864 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1]
4865 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1]
4867 if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
4868 UInt szLg2 = INSN(31,30);
4869 UInt szB = 1 << szLg2;
4870 Bool isLD = INSN(22,22) == 1;
4871 UInt offs = INSN(21,10) * szB;
4872 UInt nn = INSN(9,5);
4873 UInt tt = INSN(4,0);
4874 IRTemp ta = newTemp(Ity_I64);
4875 assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
4876 if (nn == 31) { /* FIXME generate stack alignment check */ }
4877 vassert(szLg2 < 4);
4878 if (isLD) {
4879 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
4880 } else {
4881 gen_narrowing_store(szB, ta, getIReg64orZR(tt));
4883 const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
4884 const HChar* st_name[4] = { "strb", "strh", "str", "str" };
4885 DIP("%s %s, [%s, #%u]\n",
4886 (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
4887 nameIReg64orSP(nn), offs);
4888 return True;
4891 /* ------------ LDUR,STUR (immediate, simm9) ----------- */
4893 31 29 26 20 11 9 4
4894 | | | | | | |
4895 (at-Rn-then-Rn=EA) | | |
4896 sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9
4897 sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9
4899 (at-EA-then-Rn=EA)
4900 sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]!
4901 sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]!
4903 (at-EA)
4904 sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9]
4905 sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9]
4907 simm9 is unscaled.
4909 The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the
4910 load case this is because would create two competing values for
4911 Rt. In the store case the reason is unclear, but the spec
4912 disallows it anyway.
4914 Stores are narrowing, loads are unsigned widening. sz encodes
4915 the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
4917 if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
4918 == BITS9(1,1,1, 0,0,0,0,0, 0)) {
4919 UInt szLg2 = INSN(31,30);
4920 UInt szB = 1 << szLg2;
4921 Bool isLoad = INSN(22,22) == 1;
4922 UInt imm9 = INSN(20,12);
4923 UInt nn = INSN(9,5);
4924 UInt tt = INSN(4,0);
4925 Bool wBack = INSN(10,10) == 1;
4926 UInt how = INSN(11,10);
4927 if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) {
4928 /* undecodable; fall through */
4929 } else {
4930 if (nn == 31) { /* FIXME generate stack alignment check */ }
4932 // Compute the transfer address TA and the writeback address WA.
4933 IRTemp tRN = newTemp(Ity_I64);
4934 assign(tRN, getIReg64orSP(nn));
4935 IRTemp tEA = newTemp(Ity_I64);
4936 Long simm9 = (Long)sx_to_64(imm9, 9);
4937 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
4939 IRTemp tTA = newTemp(Ity_I64);
4940 IRTemp tWA = newTemp(Ity_I64);
4941 switch (how) {
4942 case BITS2(0,1):
4943 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4944 case BITS2(1,1):
4945 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4946 case BITS2(0,0):
4947 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4948 default:
4949 vassert(0); /* NOTREACHED */
4952 /* Normally rN would be updated after the transfer. However, in
4953 the special cases typifed by
4954 str x30, [sp,#-16]!
4955 str w1, [sp,#-32]!
4956 it is necessary to update SP before the transfer, (1)
4957 because Memcheck will otherwise complain about a write
4958 below the stack pointer, and (2) because the segfault
4959 stack extension mechanism will otherwise extend the stack
4960 only down to SP before the instruction, which might not be
4961 far enough, if the -16/-32 bit takes the actual access
4962 address to the next page.
4964 Bool earlyWBack
4965 = wBack && simm9 < 0 && (szB == 8 || szB == 4)
4966 && how == BITS2(1,1) && nn == 31 && !isLoad;
4968 if (wBack && earlyWBack)
4969 putIReg64orSP(nn, mkexpr(tEA));
4971 if (isLoad) {
4972 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
4973 } else {
4974 gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
4977 if (wBack && !earlyWBack)
4978 putIReg64orSP(nn, mkexpr(tEA));
4980 const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
4981 const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
4982 const HChar* fmt_str = NULL;
4983 switch (how) {
4984 case BITS2(0,1):
4985 fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4986 break;
4987 case BITS2(1,1):
4988 fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4989 break;
4990 case BITS2(0,0):
4991 fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
4992 break;
4993 default:
4994 vassert(0);
4996 DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
4997 nameIRegOrZR(szB == 8, tt),
4998 nameIReg64orSP(nn), simm9);
4999 return True;
5003 /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
5004 /* L==1 => mm==LD
5005 L==0 => mm==ST
5006 x==0 => 32 bit transfers, and zero extended loads
5007 x==1 => 64 bit transfers
5008 simm7 is scaled by the (single-register) transfer size
5010 (at-Rn-then-Rn=EA)
5011 x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm
5013 (at-EA-then-Rn=EA)
5014 x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]!
5016 (at-EA)
5017 x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]
5019 UInt insn_30_23 = INSN(30,23);
5020 if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
5021 || insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
5022 || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
5023 UInt bL = INSN(22,22);
5024 UInt bX = INSN(31,31);
5025 UInt bWBack = INSN(23,23);
5026 UInt rT1 = INSN(4,0);
5027 UInt rN = INSN(9,5);
5028 UInt rT2 = INSN(14,10);
5029 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
5030 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
5031 || (bL && rT1 == rT2)) {
5032 /* undecodable; fall through */
5033 } else {
5034 if (rN == 31) { /* FIXME generate stack alignment check */ }
5036 // Compute the transfer address TA and the writeback address WA.
5037 IRTemp tRN = newTemp(Ity_I64);
5038 assign(tRN, getIReg64orSP(rN));
5039 IRTemp tEA = newTemp(Ity_I64);
5040 simm7 = (bX ? 8 : 4) * simm7;
5041 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
5043 IRTemp tTA = newTemp(Ity_I64);
5044 IRTemp tWA = newTemp(Ity_I64);
5045 switch (INSN(24,23)) {
5046 case BITS2(0,1):
5047 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
5048 case BITS2(1,1):
5049 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
5050 case BITS2(1,0):
5051 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
5052 default:
5053 vassert(0); /* NOTREACHED */
5056 /* Normally rN would be updated after the transfer. However, in
5057 the special case typifed by
5058 stp x29, x30, [sp,#-112]!
5059 it is necessary to update SP before the transfer, (1)
5060 because Memcheck will otherwise complain about a write
5061 below the stack pointer, and (2) because the segfault
5062 stack extension mechanism will otherwise extend the stack
5063 only down to SP before the instruction, which might not be
5064 far enough, if the -112 bit takes the actual access
5065 address to the next page.
5067 Bool earlyWBack
5068 = bWBack && simm7 < 0
5069 && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
5071 if (bWBack && earlyWBack)
5072 putIReg64orSP(rN, mkexpr(tEA));
5074 /**/ if (bL == 1 && bX == 1) {
5075 // 64 bit load
5076 putIReg64orZR(rT1, loadLE(Ity_I64,
5077 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
5078 putIReg64orZR(rT2, loadLE(Ity_I64,
5079 binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
5080 } else if (bL == 1 && bX == 0) {
5081 // 32 bit load
5082 putIReg32orZR(rT1, loadLE(Ity_I32,
5083 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
5084 putIReg32orZR(rT2, loadLE(Ity_I32,
5085 binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
5086 } else if (bL == 0 && bX == 1) {
5087 // 64 bit store
5088 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
5089 getIReg64orZR(rT1));
5090 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
5091 getIReg64orZR(rT2));
5092 } else {
5093 vassert(bL == 0 && bX == 0);
5094 // 32 bit store
5095 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
5096 getIReg32orZR(rT1));
5097 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
5098 getIReg32orZR(rT2));
5101 if (bWBack && !earlyWBack)
5102 putIReg64orSP(rN, mkexpr(tEA));
5104 const HChar* fmt_str = NULL;
5105 switch (INSN(24,23)) {
5106 case BITS2(0,1):
5107 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5108 break;
5109 case BITS2(1,1):
5110 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5111 break;
5112 case BITS2(1,0):
5113 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
5114 break;
5115 default:
5116 vassert(0);
5118 DIP(fmt_str, bL == 0 ? "st" : "ld",
5119 nameIRegOrZR(bX == 1, rT1),
5120 nameIRegOrZR(bX == 1, rT2),
5121 nameIReg64orSP(rN), simm7);
5122 return True;
5126 /* -------- LDPSW (immediate, simm7) (INT REGS) -------- */
5127 /* Does 32 bit transfers which are sign extended to 64 bits.
5128 simm7 is scaled by the (single-register) transfer size
5130 (at-Rn-then-Rn=EA)
5131 01 101 0001 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP], #imm
5133 (at-EA-then-Rn=EA)
5134 01 101 0011 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP, #imm]!
5136 (at-EA)
5137 01 101 0010 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP, #imm]
5139 UInt insn_31_22 = INSN(31,22);
5140 if (insn_31_22 == BITS10(0,1,1,0,1,0,0,0,1,1)
5141 || insn_31_22 == BITS10(0,1,1,0,1,0,0,1,1,1)
5142 || insn_31_22 == BITS10(0,1,1,0,1,0,0,1,0,1)) {
5143 UInt bWBack = INSN(23,23);
5144 UInt rT1 = INSN(4,0);
5145 UInt rN = INSN(9,5);
5146 UInt rT2 = INSN(14,10);
5147 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
5148 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
5149 || (rT1 == rT2)) {
5150 /* undecodable; fall through */
5151 } else {
5152 if (rN == 31) { /* FIXME generate stack alignment check */ }
5154 // Compute the transfer address TA and the writeback address WA.
5155 IRTemp tRN = newTemp(Ity_I64);
5156 assign(tRN, getIReg64orSP(rN));
5157 IRTemp tEA = newTemp(Ity_I64);
5158 simm7 = 4 * simm7;
5159 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
5161 IRTemp tTA = newTemp(Ity_I64);
5162 IRTemp tWA = newTemp(Ity_I64);
5163 switch (INSN(24,23)) {
5164 case BITS2(0,1):
5165 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
5166 case BITS2(1,1):
5167 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
5168 case BITS2(1,0):
5169 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
5170 default:
5171 vassert(0); /* NOTREACHED */
5174 // 32 bit load, sign extended to 64 bits
5175 putIReg64orZR(rT1, unop(Iop_32Sto64,
5176 loadLE(Ity_I32, binop(Iop_Add64,
5177 mkexpr(tTA),
5178 mkU64(0)))));
5179 putIReg64orZR(rT2, unop(Iop_32Sto64,
5180 loadLE(Ity_I32, binop(Iop_Add64,
5181 mkexpr(tTA),
5182 mkU64(4)))));
5183 if (bWBack)
5184 putIReg64orSP(rN, mkexpr(tEA));
5186 const HChar* fmt_str = NULL;
5187 switch (INSN(24,23)) {
5188 case BITS2(0,1):
5189 fmt_str = "ldpsw %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5190 break;
5191 case BITS2(1,1):
5192 fmt_str = "ldpsw %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5193 break;
5194 case BITS2(1,0):
5195 fmt_str = "ldpsw %s, %s, [%s, #%lld] (at-Rn)\n";
5196 break;
5197 default:
5198 vassert(0);
5200 DIP(fmt_str, nameIReg64orZR(rT1),
5201 nameIReg64orZR(rT2),
5202 nameIReg64orSP(rN), simm7);
5203 return True;
5207 /* ---------------- LDR (literal, int reg) ---------------- */
5208 /* 31 29 23 4
5209 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)]
5210 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)]
5211 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)]
5212 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)]
5213 Just handles the first two cases for now.
5215 if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
5216 UInt imm19 = INSN(23,5);
5217 UInt rT = INSN(4,0);
5218 UInt bX = INSN(30,30);
5219 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
5220 if (bX) {
5221 putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
5222 } else {
5223 putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
5225 DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
5226 return True;
5229 /* -------------- {LD,ST}R (integer register) --------------- */
5230 /* 31 29 20 15 12 11 9 4
5231 | | | | | | | |
5232 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R<m>{ext/sh}]
5233 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R<m>{ext/sh}]
5234 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R<m>{ext/sh}]
5235 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R<m>{ext/sh}]
5237 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R<m>{ext/sh}]
5238 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R<m>{ext/sh}]
5239 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R<m>{ext/sh}]
5240 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R<m>{ext/sh}]
5242 if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
5243 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5244 HChar dis_buf[64];
5245 UInt szLg2 = INSN(31,30);
5246 Bool isLD = INSN(22,22) == 1;
5247 UInt tt = INSN(4,0);
5248 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
5249 if (ea != IRTemp_INVALID) {
5250 switch (szLg2) {
5251 case 3: /* 64 bit */
5252 if (isLD) {
5253 putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
5254 DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
5255 } else {
5256 storeLE(mkexpr(ea), getIReg64orZR(tt));
5257 DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
5259 break;
5260 case 2: /* 32 bit */
5261 if (isLD) {
5262 putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
5263 DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
5264 } else {
5265 storeLE(mkexpr(ea), getIReg32orZR(tt));
5266 DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
5268 break;
5269 case 1: /* 16 bit */
5270 if (isLD) {
5271 putIReg64orZR(tt, unop(Iop_16Uto64,
5272 loadLE(Ity_I16, mkexpr(ea))));
5273 DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5274 } else {
5275 storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
5276 DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5278 break;
5279 case 0: /* 8 bit */
5280 if (isLD) {
5281 putIReg64orZR(tt, unop(Iop_8Uto64,
5282 loadLE(Ity_I8, mkexpr(ea))));
5283 DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
5284 } else {
5285 storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
5286 DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
5288 break;
5289 default:
5290 vassert(0);
5292 return True;
5296 /* -------------- LDRS{B,H,W} (uimm12) -------------- */
5297 /* 31 29 26 23 21 9 4
5298 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4]
5299 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2]
5300 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1]
5301 where
5302 Rt is Wt when x==1, Xt when x==0
5304 if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
5305 /* Further checks on bits 31:30 and 22 */
5306 Bool valid = False;
5307 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5308 case BITS3(1,0,0):
5309 case BITS3(0,1,0): case BITS3(0,1,1):
5310 case BITS3(0,0,0): case BITS3(0,0,1):
5311 valid = True;
5312 break;
5314 if (valid) {
5315 UInt szLg2 = INSN(31,30);
5316 UInt bitX = INSN(22,22);
5317 UInt imm12 = INSN(21,10);
5318 UInt nn = INSN(9,5);
5319 UInt tt = INSN(4,0);
5320 UInt szB = 1 << szLg2;
5321 IRExpr* ea = binop(Iop_Add64,
5322 getIReg64orSP(nn), mkU64(imm12 * szB));
5323 switch (szB) {
5324 case 4:
5325 vassert(bitX == 0);
5326 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
5327 DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
5328 nameIReg64orSP(nn), imm12 * szB);
5329 break;
5330 case 2:
5331 if (bitX == 1) {
5332 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
5333 } else {
5334 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
5336 DIP("ldrsh %s, [%s, #%u]\n",
5337 nameIRegOrZR(bitX == 0, tt),
5338 nameIReg64orSP(nn), imm12 * szB);
5339 break;
5340 case 1:
5341 if (bitX == 1) {
5342 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
5343 } else {
5344 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
5346 DIP("ldrsb %s, [%s, #%u]\n",
5347 nameIRegOrZR(bitX == 0, tt),
5348 nameIReg64orSP(nn), imm12 * szB);
5349 break;
5350 default:
5351 vassert(0);
5353 return True;
5355 /* else fall through */
5358 /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
5359 /* (at-Rn-then-Rn=EA)
5360 31 29 23 21 20 11 9 4
5361 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9
5362 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9
5363 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9
5365 (at-EA-then-Rn=EA)
5366 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]!
5367 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]!
5368 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]!
5369 where
5370 Rt is Wt when x==1, Xt when x==0
5371 transfer-at-Rn when [11]==0, at EA when [11]==1
5373 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5374 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5375 /* Further checks on bits 31:30 and 22 */
5376 Bool valid = False;
5377 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5378 case BITS3(1,0,0): // LDRSW Xt
5379 case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
5380 case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
5381 valid = True;
5382 break;
5384 if (valid) {
5385 UInt szLg2 = INSN(31,30);
5386 UInt imm9 = INSN(20,12);
5387 Bool atRN = INSN(11,11) == 0;
5388 UInt nn = INSN(9,5);
5389 UInt tt = INSN(4,0);
5390 IRTemp tRN = newTemp(Ity_I64);
5391 IRTemp tEA = newTemp(Ity_I64);
5392 IRTemp tTA = IRTemp_INVALID;
5393 ULong simm9 = sx_to_64(imm9, 9);
5394 Bool is64 = INSN(22,22) == 0;
5395 assign(tRN, getIReg64orSP(nn));
5396 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5397 tTA = atRN ? tRN : tEA;
5398 HChar ch = '?';
5399 /* There are 5 cases:
5400 byte load, SX to 64
5401 byte load, SX to 32, ZX to 64
5402 halfword load, SX to 64
5403 halfword load, SX to 32, ZX to 64
5404 word load, SX to 64
5405 The ifs below handle them in the listed order.
5407 if (szLg2 == 0) {
5408 ch = 'b';
5409 if (is64) {
5410 putIReg64orZR(tt, unop(Iop_8Sto64,
5411 loadLE(Ity_I8, mkexpr(tTA))));
5412 } else {
5413 putIReg32orZR(tt, unop(Iop_8Sto32,
5414 loadLE(Ity_I8, mkexpr(tTA))));
5417 else if (szLg2 == 1) {
5418 ch = 'h';
5419 if (is64) {
5420 putIReg64orZR(tt, unop(Iop_16Sto64,
5421 loadLE(Ity_I16, mkexpr(tTA))));
5422 } else {
5423 putIReg32orZR(tt, unop(Iop_16Sto32,
5424 loadLE(Ity_I16, mkexpr(tTA))));
5427 else if (szLg2 == 2 && is64) {
5428 ch = 'w';
5429 putIReg64orZR(tt, unop(Iop_32Sto64,
5430 loadLE(Ity_I32, mkexpr(tTA))));
5432 else {
5433 vassert(0);
5435 putIReg64orSP(nn, mkexpr(tEA));
5436 DIP(atRN ? "ldrs%c %s, [%s], #%llu\n" : "ldrs%c %s, [%s, #%llu]!",
5437 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
5438 return True;
5440 /* else fall through */
5443 /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
5444 /* 31 29 23 21 20 11 9 4
5445 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9]
5446 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9]
5447 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9]
5448 where
5449 Rt is Wt when x==1, Xt when x==0
5451 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5452 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5453 /* Further checks on bits 31:30 and 22 */
5454 Bool valid = False;
5455 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5456 case BITS3(1,0,0): // LDURSW Xt
5457 case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
5458 case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
5459 valid = True;
5460 break;
5462 if (valid) {
5463 UInt szLg2 = INSN(31,30);
5464 UInt imm9 = INSN(20,12);
5465 UInt nn = INSN(9,5);
5466 UInt tt = INSN(4,0);
5467 IRTemp tRN = newTemp(Ity_I64);
5468 IRTemp tEA = newTemp(Ity_I64);
5469 ULong simm9 = sx_to_64(imm9, 9);
5470 Bool is64 = INSN(22,22) == 0;
5471 assign(tRN, getIReg64orSP(nn));
5472 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5473 HChar ch = '?';
5474 /* There are 5 cases:
5475 byte load, SX to 64
5476 byte load, SX to 32, ZX to 64
5477 halfword load, SX to 64
5478 halfword load, SX to 32, ZX to 64
5479 word load, SX to 64
5480 The ifs below handle them in the listed order.
5482 if (szLg2 == 0) {
5483 ch = 'b';
5484 if (is64) {
5485 putIReg64orZR(tt, unop(Iop_8Sto64,
5486 loadLE(Ity_I8, mkexpr(tEA))));
5487 } else {
5488 putIReg32orZR(tt, unop(Iop_8Sto32,
5489 loadLE(Ity_I8, mkexpr(tEA))));
5492 else if (szLg2 == 1) {
5493 ch = 'h';
5494 if (is64) {
5495 putIReg64orZR(tt, unop(Iop_16Sto64,
5496 loadLE(Ity_I16, mkexpr(tEA))));
5497 } else {
5498 putIReg32orZR(tt, unop(Iop_16Sto32,
5499 loadLE(Ity_I16, mkexpr(tEA))));
5502 else if (szLg2 == 2 && is64) {
5503 ch = 'w';
5504 putIReg64orZR(tt, unop(Iop_32Sto64,
5505 loadLE(Ity_I32, mkexpr(tEA))));
5507 else {
5508 vassert(0);
5510 DIP("ldurs%c %s, [%s, #%lld]\n",
5511 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), (Long)simm9);
5512 return True;
5514 /* else fall through */
5517 /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
5518 /* L==1 => mm==LD
5519 L==0 => mm==ST
5520 sz==00 => 32 bit (S) transfers
5521 sz==01 => 64 bit (D) transfers
5522 sz==10 => 128 bit (Q) transfers
5523 sz==11 isn't allowed
5524 simm7 is scaled by the (single-register) transfer size
5526 31 29 26 22 21 14 9 4
5528 sz 101 1000 L imm7 t2 n t1 mmNP SDQt1, SDQt2, [Xn|SP, #imm]
5529 (at-EA, with nontemporal hint)
5531 sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm
5532 (at-Rn-then-Rn=EA)
5534 sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]
5535 (at-EA)
5537 sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]!
5538 (at-EA-then-Rn=EA)
5540 if (INSN(29,25) == BITS5(1,0,1,1,0)) {
5541 UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
5542 Bool isLD = INSN(22,22) == 1;
5543 Bool wBack = INSN(23,23) == 1;
5544 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
5545 UInt tt2 = INSN(14,10);
5546 UInt nn = INSN(9,5);
5547 UInt tt1 = INSN(4,0);
5548 if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) {
5549 /* undecodable; fall through */
5550 } else {
5551 if (nn == 31) { /* FIXME generate stack alignment check */ }
5553 // Compute the transfer address TA and the writeback address WA.
5554 UInt szB = 4 << szSlg2; /* szB is the per-register size */
5555 IRTemp tRN = newTemp(Ity_I64);
5556 assign(tRN, getIReg64orSP(nn));
5557 IRTemp tEA = newTemp(Ity_I64);
5558 simm7 = szB * simm7;
5559 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
5561 IRTemp tTA = newTemp(Ity_I64);
5562 IRTemp tWA = newTemp(Ity_I64);
5563 switch (INSN(24,23)) {
5564 case BITS2(0,1):
5565 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
5566 case BITS2(1,1):
5567 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
5568 case BITS2(1,0):
5569 case BITS2(0,0):
5570 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
5571 default:
5572 vassert(0); /* NOTREACHED */
5575 IRType ty = Ity_INVALID;
5576 switch (szB) {
5577 case 4: ty = Ity_F32; break;
5578 case 8: ty = Ity_F64; break;
5579 case 16: ty = Ity_V128; break;
5580 default: vassert(0);
5583 /* Normally rN would be updated after the transfer. However, in
5584 the special cases typifed by
5585 stp q0, q1, [sp,#-512]!
5586 stp d0, d1, [sp,#-512]!
5587 stp s0, s1, [sp,#-512]!
5588 it is necessary to update SP before the transfer, (1)
5589 because Memcheck will otherwise complain about a write
5590 below the stack pointer, and (2) because the segfault
5591 stack extension mechanism will otherwise extend the stack
5592 only down to SP before the instruction, which might not be
5593 far enough, if the -512 bit takes the actual access
5594 address to the next page.
5596 Bool earlyWBack
5597 = wBack && simm7 < 0
5598 && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
5600 if (wBack && earlyWBack)
5601 putIReg64orSP(nn, mkexpr(tEA));
5603 if (isLD) {
5604 if (szB < 16) {
5605 putQReg128(tt1, mkV128(0x0000));
5607 putQRegLO(tt1,
5608 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
5609 if (szB < 16) {
5610 putQReg128(tt2, mkV128(0x0000));
5612 putQRegLO(tt2,
5613 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
5614 } else {
5615 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
5616 getQRegLO(tt1, ty));
5617 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
5618 getQRegLO(tt2, ty));
5621 if (wBack && !earlyWBack)
5622 putIReg64orSP(nn, mkexpr(tEA));
5624 const HChar* fmt_str = NULL;
5625 switch (INSN(24,23)) {
5626 case BITS2(0,1):
5627 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5628 break;
5629 case BITS2(1,1):
5630 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5631 break;
5632 case BITS2(1,0):
5633 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
5634 break;
5635 case BITS2(0,0):
5636 fmt_str = "%snp %s, %s, [%s, #%lld] (at-Rn)\n";
5637 break;
5638 default:
5639 vassert(0);
5641 DIP(fmt_str, isLD ? "ld" : "st",
5642 nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
5643 nameIReg64orSP(nn), simm7);
5644 return True;
5648 /* -------------- {LD,ST}R (vector register) --------------- */
5649 /* 31 29 23 20 15 12 11 9 4
5650 | | | | | | | | |
5651 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R<m>{ext/sh}]
5652 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R<m>{ext/sh}]
5653 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R<m>{ext/sh}]
5654 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R<m>{ext/sh}]
5655 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R<m>{ext/sh}]
5657 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R<m>{ext/sh}]
5658 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R<m>{ext/sh}]
5659 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R<m>{ext/sh}]
5660 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R<m>{ext/sh}]
5661 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R<m>{ext/sh}]
5663 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5664 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5665 HChar dis_buf[64];
5666 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5667 Bool isLD = INSN(22,22) == 1;
5668 UInt tt = INSN(4,0);
5669 if (szLg2 > 4) goto after_LDR_STR_vector_register;
5670 IRTemp ea = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/);
5671 if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
5672 switch (szLg2) {
5673 case 0: /* 8 bit */
5674 if (isLD) {
5675 putQReg128(tt, mkV128(0x0000));
5676 putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
5677 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
5678 } else {
5679 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
5680 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
5682 break;
5683 case 1:
5684 if (isLD) {
5685 putQReg128(tt, mkV128(0x0000));
5686 putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
5687 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
5688 } else {
5689 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
5690 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
5692 break;
5693 case 2: /* 32 bit */
5694 if (isLD) {
5695 putQReg128(tt, mkV128(0x0000));
5696 putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
5697 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
5698 } else {
5699 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
5700 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
5702 break;
5703 case 3: /* 64 bit */
5704 if (isLD) {
5705 putQReg128(tt, mkV128(0x0000));
5706 putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
5707 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
5708 } else {
5709 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
5710 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
5712 break;
5713 case 4:
5714 if (isLD) {
5715 putQReg128(tt, loadLE(Ity_V128, mkexpr(ea)));
5716 DIP("ldr %s, %s\n", nameQReg128(tt), dis_buf);
5717 } else {
5718 storeLE(mkexpr(ea), getQReg128(tt));
5719 DIP("str %s, %s\n", nameQReg128(tt), dis_buf);
5721 break;
5722 default:
5723 vassert(0);
5725 return True;
5727 after_LDR_STR_vector_register:
5729 /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
5730 /* 31 29 22 20 15 12 11 9 4
5731 | | | | | | | | |
5732 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
5734 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
5735 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
5737 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
5738 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
5740 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5741 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5742 HChar dis_buf[64];
5743 UInt szLg2 = INSN(31,30);
5744 Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
5745 UInt tt = INSN(4,0);
5746 if (szLg2 == 3) goto after_LDRS_integer_register;
5747 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
5748 if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
5749 /* Enumerate the 5 variants explicitly. */
5750 if (szLg2 == 2/*32 bit*/ && sxTo64) {
5751 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
5752 DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
5753 return True;
5755 else
5756 if (szLg2 == 1/*16 bit*/) {
5757 if (sxTo64) {
5758 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
5759 DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
5760 } else {
5761 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
5762 DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5764 return True;
5766 else
5767 if (szLg2 == 0/*8 bit*/) {
5768 if (sxTo64) {
5769 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
5770 DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
5771 } else {
5772 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
5773 DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
5775 return True;
5777 /* else it's an invalid combination */
5779 after_LDRS_integer_register:
5781 /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
5782 /* This is the Unsigned offset variant only. The Post-Index and
5783 Pre-Index variants are below.
5785 31 29 23 21 9 4
5786 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1]
5787 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2]
5788 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4]
5789 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8]
5790 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16]
5792 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1]
5793 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2]
5794 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4]
5795 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8]
5796 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16]
5798 if (INSN(29,24) == BITS6(1,1,1,1,0,1)
5799 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
5800 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5801 Bool isLD = INSN(22,22) == 1;
5802 UInt pimm12 = INSN(21,10) << szLg2;
5803 UInt nn = INSN(9,5);
5804 UInt tt = INSN(4,0);
5805 IRTemp tEA = newTemp(Ity_I64);
5806 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5807 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
5808 if (isLD) {
5809 if (szLg2 < 4) {
5810 putQReg128(tt, mkV128(0x0000));
5812 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
5813 } else {
5814 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
5816 DIP("%s %s, [%s, #%u]\n",
5817 isLD ? "ldr" : "str",
5818 nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
5819 return True;
5822 /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
5823 /* These are the Post-Index and Pre-Index variants.
5825 31 29 23 20 11 9 4
5826 (at-Rn-then-Rn=EA)
5827 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm
5828 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm
5829 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm
5830 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm
5831 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm
5833 (at-EA-then-Rn=EA)
5834 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]!
5835 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]!
5836 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]!
5837 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]!
5838 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]!
5840 Stores are the same except with bit 22 set to 0.
5842 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5843 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5844 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5845 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5846 Bool isLD = INSN(22,22) == 1;
5847 UInt imm9 = INSN(20,12);
5848 Bool atRN = INSN(11,11) == 0;
5849 UInt nn = INSN(9,5);
5850 UInt tt = INSN(4,0);
5851 IRTemp tRN = newTemp(Ity_I64);
5852 IRTemp tEA = newTemp(Ity_I64);
5853 IRTemp tTA = IRTemp_INVALID;
5854 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5855 ULong simm9 = sx_to_64(imm9, 9);
5856 assign(tRN, getIReg64orSP(nn));
5857 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5858 tTA = atRN ? tRN : tEA;
5860 /* Do early writeback for the cases typified by
5861 str d8, [sp, #-32]!
5862 str d10, [sp, #-128]!
5863 str q1, [sp, #-32]!
5864 for the same reasons as described in a similar comment in the
5865 "LDP,STP (immediate, simm7) (FP&VEC)" case just above.
5867 Bool earlyWBack
5868 = !atRN && !isLD && (ty == Ity_F64 || ty == Ity_V128)
5869 && nn == 31 && ((Long)simm9) < 0;
5871 if (earlyWBack)
5872 putIReg64orSP(nn, mkexpr(tEA));
5874 if (isLD) {
5875 if (szLg2 < 4) {
5876 putQReg128(tt, mkV128(0x0000));
5878 putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
5879 } else {
5880 storeLE(mkexpr(tTA), getQRegLO(tt, ty));
5883 if (!earlyWBack)
5884 putIReg64orSP(nn, mkexpr(tEA));
5886 DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
5887 isLD ? "ldr" : "str",
5888 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
5889 return True;
5892 /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
5893 /* 31 29 23 20 11 9 4
5894 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm]
5895 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm]
5896 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm]
5897 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm]
5898 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm]
5900 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm]
5901 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm]
5902 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm]
5903 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm]
5904 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm]
5906 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5907 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5908 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5909 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5910 Bool isLD = INSN(22,22) == 1;
5911 UInt imm9 = INSN(20,12);
5912 UInt nn = INSN(9,5);
5913 UInt tt = INSN(4,0);
5914 ULong simm9 = sx_to_64(imm9, 9);
5915 IRTemp tEA = newTemp(Ity_I64);
5916 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5917 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
5918 if (isLD) {
5919 if (szLg2 < 4) {
5920 putQReg128(tt, mkV128(0x0000));
5922 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
5923 } else {
5924 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
5926 DIP("%s %s, [%s, #%lld]\n",
5927 isLD ? "ldur" : "stur",
5928 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
5929 return True;
5932 /* ---------------- LDR (literal, SIMD&FP) ---------------- */
5933 /* 31 29 23 4
5934 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)]
5935 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)]
5936 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)]
5938 if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
5939 UInt szB = 4 << INSN(31,30);
5940 UInt imm19 = INSN(23,5);
5941 UInt tt = INSN(4,0);
5942 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
5943 IRType ty = preferredVectorSubTypeFromSize(szB);
5944 putQReg128(tt, mkV128(0x0000));
5945 putQRegLO(tt, loadLE(ty, mkU64(ea)));
5946 DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
5947 return True;
5950 /* ------ LD1/ST1 (multiple 1-elem structs to/from 1 reg ------ */
5951 /* ------ LD2/ST2 (multiple 2-elem structs to/from 2 regs ------ */
5952 /* ------ LD3/ST3 (multiple 3-elem structs to/from 3 regs ------ */
5953 /* ------ LD4/ST4 (multiple 4-elem structs to/from 4 regs ------ */
5954 /* 31 29 26 22 21 20 15 11 9 4
5956 0q 001 1000 L 0 00000 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP]
5957 0q 001 1001 L 0 m 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP], step
5959 0q 001 1000 L 0 00000 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP]
5960 0q 001 1001 L 0 m 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP], step
5962 0q 001 1000 L 0 00000 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP]
5963 0q 001 1001 L 0 m 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP], step
5965 0q 001 1000 L 0 00000 0111 sz n t xx1 {Vt.T}, [Xn|SP]
5966 0q 001 1001 L 0 m 0111 sz n t xx1 {Vt.T}, [Xn|SP], step
5968 T = defined by Q and sz in the normal way
5969 step = if m == 11111 then transfer-size else Xm
5970 xx = case L of 1 -> LD ; 0 -> ST
5972 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
5973 && INSN(21,21) == 0) {
5974 Bool bitQ = INSN(30,30);
5975 Bool isPX = INSN(23,23) == 1;
5976 Bool isLD = INSN(22,22) == 1;
5977 UInt mm = INSN(20,16);
5978 UInt opc = INSN(15,12);
5979 UInt sz = INSN(11,10);
5980 UInt nn = INSN(9,5);
5981 UInt tt = INSN(4,0);
5982 Bool isQ = bitQ == 1;
5983 Bool is1d = sz == BITS2(1,1) && !isQ;
5984 UInt nRegs = 0;
5985 switch (opc) {
5986 case BITS4(0,0,0,0): nRegs = 4; break;
5987 case BITS4(0,1,0,0): nRegs = 3; break;
5988 case BITS4(1,0,0,0): nRegs = 2; break;
5989 case BITS4(0,1,1,1): nRegs = 1; break;
5990 default: break;
5993 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
5994 If we see it, set nRegs to 0 so as to cause the next conditional
5995 to fail. */
5996 if (!isPX && mm != 0)
5997 nRegs = 0;
5999 if (nRegs == 1 /* .1d is allowed */
6000 || (nRegs >= 2 && nRegs <= 4 && !is1d) /* .1d is not allowed */) {
6002 UInt xferSzB = (isQ ? 16 : 8) * nRegs;
6004 /* Generate the transfer address (TA) and if necessary the
6005 writeback address (WB) */
6006 IRTemp tTA = newTemp(Ity_I64);
6007 assign(tTA, getIReg64orSP(nn));
6008 if (nn == 31) { /* FIXME generate stack alignment check */ }
6009 IRTemp tWB = IRTemp_INVALID;
6010 if (isPX) {
6011 tWB = newTemp(Ity_I64);
6012 assign(tWB, binop(Iop_Add64,
6013 mkexpr(tTA),
6014 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6015 : getIReg64orZR(mm)));
6018 /* -- BEGIN generate the transfers -- */
6020 IRTemp u0, u1, u2, u3, i0, i1, i2, i3;
6021 u0 = u1 = u2 = u3 = i0 = i1 = i2 = i3 = IRTemp_INVALID;
6022 switch (nRegs) {
6023 case 4: u3 = newTempV128(); i3 = newTempV128(); /* fallthru */
6024 case 3: u2 = newTempV128(); i2 = newTempV128(); /* fallthru */
6025 case 2: u1 = newTempV128(); i1 = newTempV128(); /* fallthru */
6026 case 1: u0 = newTempV128(); i0 = newTempV128(); break;
6027 default: vassert(0);
6030 /* -- Multiple 128 or 64 bit stores -- */
6031 if (!isLD) {
6032 switch (nRegs) {
6033 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
6034 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
6035 case 2: assign(u1, getQReg128((tt+1) % 32)); /* fallthru */
6036 case 1: assign(u0, getQReg128((tt+0) % 32)); break;
6037 default: vassert(0);
6039 switch (nRegs) {
6040 case 4: (isQ ? math_INTERLEAVE4_128 : math_INTERLEAVE4_64)
6041 (&i0, &i1, &i2, &i3, sz, u0, u1, u2, u3);
6042 break;
6043 case 3: (isQ ? math_INTERLEAVE3_128 : math_INTERLEAVE3_64)
6044 (&i0, &i1, &i2, sz, u0, u1, u2);
6045 break;
6046 case 2: (isQ ? math_INTERLEAVE2_128 : math_INTERLEAVE2_64)
6047 (&i0, &i1, sz, u0, u1);
6048 break;
6049 case 1: (isQ ? math_INTERLEAVE1_128 : math_INTERLEAVE1_64)
6050 (&i0, sz, u0);
6051 break;
6052 default: vassert(0);
6054 # define MAYBE_NARROW_TO_64(_expr) \
6055 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
6056 UInt step = isQ ? 16 : 8;
6057 switch (nRegs) {
6058 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
6059 MAYBE_NARROW_TO_64(mkexpr(i3)) );
6060 /* fallthru */
6061 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
6062 MAYBE_NARROW_TO_64(mkexpr(i2)) );
6063 /* fallthru */
6064 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
6065 MAYBE_NARROW_TO_64(mkexpr(i1)) );
6066 /* fallthru */
6067 case 1: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
6068 MAYBE_NARROW_TO_64(mkexpr(i0)) );
6069 break;
6070 default: vassert(0);
6072 # undef MAYBE_NARROW_TO_64
6075 /* -- Multiple 128 or 64 bit loads -- */
6076 else /* isLD */ {
6077 UInt step = isQ ? 16 : 8;
6078 IRType loadTy = isQ ? Ity_V128 : Ity_I64;
6079 # define MAYBE_WIDEN_FROM_64(_expr) \
6080 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
6081 switch (nRegs) {
6082 case 4:
6083 assign(i3, MAYBE_WIDEN_FROM_64(
6084 loadLE(loadTy,
6085 binop(Iop_Add64, mkexpr(tTA),
6086 mkU64(3 * step)))));
6087 /* fallthru */
6088 case 3:
6089 assign(i2, MAYBE_WIDEN_FROM_64(
6090 loadLE(loadTy,
6091 binop(Iop_Add64, mkexpr(tTA),
6092 mkU64(2 * step)))));
6093 /* fallthru */
6094 case 2:
6095 assign(i1, MAYBE_WIDEN_FROM_64(
6096 loadLE(loadTy,
6097 binop(Iop_Add64, mkexpr(tTA),
6098 mkU64(1 * step)))));
6099 /* fallthru */
6100 case 1:
6101 assign(i0, MAYBE_WIDEN_FROM_64(
6102 loadLE(loadTy,
6103 binop(Iop_Add64, mkexpr(tTA),
6104 mkU64(0 * step)))));
6105 break;
6106 default:
6107 vassert(0);
6109 # undef MAYBE_WIDEN_FROM_64
6110 switch (nRegs) {
6111 case 4: (isQ ? math_DEINTERLEAVE4_128 : math_DEINTERLEAVE4_64)
6112 (&u0, &u1, &u2, &u3, sz, i0,i1,i2,i3);
6113 break;
6114 case 3: (isQ ? math_DEINTERLEAVE3_128 : math_DEINTERLEAVE3_64)
6115 (&u0, &u1, &u2, sz, i0, i1, i2);
6116 break;
6117 case 2: (isQ ? math_DEINTERLEAVE2_128 : math_DEINTERLEAVE2_64)
6118 (&u0, &u1, sz, i0, i1);
6119 break;
6120 case 1: (isQ ? math_DEINTERLEAVE1_128 : math_DEINTERLEAVE1_64)
6121 (&u0, sz, i0);
6122 break;
6123 default: vassert(0);
6125 switch (nRegs) {
6126 case 4: putQReg128( (tt+3) % 32,
6127 math_MAYBE_ZERO_HI64(bitQ, u3));
6128 /* fallthru */
6129 case 3: putQReg128( (tt+2) % 32,
6130 math_MAYBE_ZERO_HI64(bitQ, u2));
6131 /* fallthru */
6132 case 2: putQReg128( (tt+1) % 32,
6133 math_MAYBE_ZERO_HI64(bitQ, u1));
6134 /* fallthru */
6135 case 1: putQReg128( (tt+0) % 32,
6136 math_MAYBE_ZERO_HI64(bitQ, u0));
6137 break;
6138 default: vassert(0);
6142 /* -- END generate the transfers -- */
6144 /* Do the writeback, if necessary */
6145 if (isPX) {
6146 putIReg64orSP(nn, mkexpr(tWB));
6149 HChar pxStr[20];
6150 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6151 if (isPX) {
6152 if (mm == BITS5(1,1,1,1,1))
6153 vex_sprintf(pxStr, ", #%u", xferSzB);
6154 else
6155 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6157 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6158 DIP("%s%u {v%u.%s .. v%u.%s}, [%s]%s\n",
6159 isLD ? "ld" : "st", nRegs,
6160 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6161 pxStr);
6163 if (nRegs >= 3) {
6164 dres->hint = Dis_HintVerbose;
6166 return True;
6168 /* else fall through */
6171 /* ------ LD1/ST1 (multiple 1-elem structs to/from 2 regs ------ */
6172 /* ------ LD1/ST1 (multiple 1-elem structs to/from 3 regs ------ */
6173 /* ------ LD1/ST1 (multiple 1-elem structs to/from 4 regs ------ */
6174 /* 31 29 26 22 21 20 15 11 9 4
6176 0q 001 1000 L 0 00000 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP]
6177 0q 001 1001 L 0 m 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP], step
6179 0q 001 1000 L 0 00000 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP]
6180 0q 001 1001 L 0 m 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP], step
6182 0q 001 1000 L 0 00000 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP]
6183 0q 001 1001 L 0 m 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP], step
6185 T = defined by Q and sz in the normal way
6186 step = if m == 11111 then transfer-size else Xm
6187 xx = case L of 1 -> LD ; 0 -> ST
6189 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
6190 && INSN(21,21) == 0) {
6191 Bool bitQ = INSN(30,30);
6192 Bool isPX = INSN(23,23) == 1;
6193 Bool isLD = INSN(22,22) == 1;
6194 UInt mm = INSN(20,16);
6195 UInt opc = INSN(15,12);
6196 UInt sz = INSN(11,10);
6197 UInt nn = INSN(9,5);
6198 UInt tt = INSN(4,0);
6199 Bool isQ = bitQ == 1;
6200 UInt nRegs = 0;
6201 switch (opc) {
6202 case BITS4(0,0,1,0): nRegs = 4; break;
6203 case BITS4(0,1,1,0): nRegs = 3; break;
6204 case BITS4(1,0,1,0): nRegs = 2; break;
6205 default: break;
6208 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
6209 If we see it, set nRegs to 0 so as to cause the next conditional
6210 to fail. */
6211 if (!isPX && mm != 0)
6212 nRegs = 0;
6214 if (nRegs >= 2 && nRegs <= 4) {
6216 UInt xferSzB = (isQ ? 16 : 8) * nRegs;
6218 /* Generate the transfer address (TA) and if necessary the
6219 writeback address (WB) */
6220 IRTemp tTA = newTemp(Ity_I64);
6221 assign(tTA, getIReg64orSP(nn));
6222 if (nn == 31) { /* FIXME generate stack alignment check */ }
6223 IRTemp tWB = IRTemp_INVALID;
6224 if (isPX) {
6225 tWB = newTemp(Ity_I64);
6226 assign(tWB, binop(Iop_Add64,
6227 mkexpr(tTA),
6228 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6229 : getIReg64orZR(mm)));
6232 /* -- BEGIN generate the transfers -- */
6234 IRTemp u0, u1, u2, u3;
6235 u0 = u1 = u2 = u3 = IRTemp_INVALID;
6236 switch (nRegs) {
6237 case 4: u3 = newTempV128(); /* fallthru */
6238 case 3: u2 = newTempV128(); /* fallthru */
6239 case 2: u1 = newTempV128();
6240 u0 = newTempV128(); break;
6241 default: vassert(0);
6244 /* -- Multiple 128 or 64 bit stores -- */
6245 if (!isLD) {
6246 switch (nRegs) {
6247 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
6248 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
6249 case 2: assign(u1, getQReg128((tt+1) % 32));
6250 assign(u0, getQReg128((tt+0) % 32)); break;
6251 default: vassert(0);
6253 # define MAYBE_NARROW_TO_64(_expr) \
6254 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
6255 UInt step = isQ ? 16 : 8;
6256 switch (nRegs) {
6257 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
6258 MAYBE_NARROW_TO_64(mkexpr(u3)) );
6259 /* fallthru */
6260 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
6261 MAYBE_NARROW_TO_64(mkexpr(u2)) );
6262 /* fallthru */
6263 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
6264 MAYBE_NARROW_TO_64(mkexpr(u1)) );
6265 storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
6266 MAYBE_NARROW_TO_64(mkexpr(u0)) );
6267 break;
6268 default: vassert(0);
6270 # undef MAYBE_NARROW_TO_64
6273 /* -- Multiple 128 or 64 bit loads -- */
6274 else /* isLD */ {
6275 UInt step = isQ ? 16 : 8;
6276 IRType loadTy = isQ ? Ity_V128 : Ity_I64;
6277 # define MAYBE_WIDEN_FROM_64(_expr) \
6278 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
6279 switch (nRegs) {
6280 case 4:
6281 assign(u3, MAYBE_WIDEN_FROM_64(
6282 loadLE(loadTy,
6283 binop(Iop_Add64, mkexpr(tTA),
6284 mkU64(3 * step)))));
6285 /* fallthru */
6286 case 3:
6287 assign(u2, MAYBE_WIDEN_FROM_64(
6288 loadLE(loadTy,
6289 binop(Iop_Add64, mkexpr(tTA),
6290 mkU64(2 * step)))));
6291 /* fallthru */
6292 case 2:
6293 assign(u1, MAYBE_WIDEN_FROM_64(
6294 loadLE(loadTy,
6295 binop(Iop_Add64, mkexpr(tTA),
6296 mkU64(1 * step)))));
6297 assign(u0, MAYBE_WIDEN_FROM_64(
6298 loadLE(loadTy,
6299 binop(Iop_Add64, mkexpr(tTA),
6300 mkU64(0 * step)))));
6301 break;
6302 default:
6303 vassert(0);
6305 # undef MAYBE_WIDEN_FROM_64
6306 switch (nRegs) {
6307 case 4: putQReg128( (tt+3) % 32,
6308 math_MAYBE_ZERO_HI64(bitQ, u3));
6309 /* fallthru */
6310 case 3: putQReg128( (tt+2) % 32,
6311 math_MAYBE_ZERO_HI64(bitQ, u2));
6312 /* fallthru */
6313 case 2: putQReg128( (tt+1) % 32,
6314 math_MAYBE_ZERO_HI64(bitQ, u1));
6315 putQReg128( (tt+0) % 32,
6316 math_MAYBE_ZERO_HI64(bitQ, u0));
6317 break;
6318 default: vassert(0);
6322 /* -- END generate the transfers -- */
6324 /* Do the writeback, if necessary */
6325 if (isPX) {
6326 putIReg64orSP(nn, mkexpr(tWB));
6329 HChar pxStr[20];
6330 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6331 if (isPX) {
6332 if (mm == BITS5(1,1,1,1,1))
6333 vex_sprintf(pxStr, ", #%u", xferSzB);
6334 else
6335 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6337 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6338 DIP("%s1 {v%u.%s .. v%u.%s}, [%s]%s\n",
6339 isLD ? "ld" : "st",
6340 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6341 pxStr);
6343 return True;
6345 /* else fall through */
6348 /* ---------- LD1R (single structure, replicate) ---------- */
6349 /* ---------- LD2R (single structure, replicate) ---------- */
6350 /* ---------- LD3R (single structure, replicate) ---------- */
6351 /* ---------- LD4R (single structure, replicate) ---------- */
6352 /* 31 29 22 20 15 11 9 4
6353 0q 001 1010 10 00000 110 0 sz n t LD1R {Vt.T}, [Xn|SP]
6354 0q 001 1011 10 m 110 0 sz n t LD1R {Vt.T}, [Xn|SP], step
6356 0q 001 1010 11 00000 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP]
6357 0q 001 1011 11 m 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP], step
6359 0q 001 1010 10 00000 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP]
6360 0q 001 1011 10 m 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP], step
6362 0q 001 1010 11 00000 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP]
6363 0q 001 1011 11 m 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP], step
6365 step = if m == 11111 then transfer-size else Xm
6367 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)
6368 && INSN(22,22) == 1 && INSN(15,14) == BITS2(1,1)
6369 && INSN(12,12) == 0) {
6370 UInt bitQ = INSN(30,30);
6371 Bool isPX = INSN(23,23) == 1;
6372 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6373 UInt mm = INSN(20,16);
6374 UInt sz = INSN(11,10);
6375 UInt nn = INSN(9,5);
6376 UInt tt = INSN(4,0);
6378 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6379 if (isPX || mm == 0) {
6381 IRType ty = integerIRTypeOfSize(1 << sz);
6383 UInt laneSzB = 1 << sz;
6384 UInt xferSzB = laneSzB * nRegs;
6386 /* Generate the transfer address (TA) and if necessary the
6387 writeback address (WB) */
6388 IRTemp tTA = newTemp(Ity_I64);
6389 assign(tTA, getIReg64orSP(nn));
6390 if (nn == 31) { /* FIXME generate stack alignment check */ }
6391 IRTemp tWB = IRTemp_INVALID;
6392 if (isPX) {
6393 tWB = newTemp(Ity_I64);
6394 assign(tWB, binop(Iop_Add64,
6395 mkexpr(tTA),
6396 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6397 : getIReg64orZR(mm)));
6400 /* Do the writeback, if necessary */
6401 if (isPX) {
6402 putIReg64orSP(nn, mkexpr(tWB));
6405 IRTemp e0, e1, e2, e3, v0, v1, v2, v3;
6406 e0 = e1 = e2 = e3 = v0 = v1 = v2 = v3 = IRTemp_INVALID;
6407 switch (nRegs) {
6408 case 4:
6409 e3 = newTemp(ty);
6410 assign(e3, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6411 mkU64(3 * laneSzB))));
6412 v3 = math_DUP_TO_V128(e3, ty);
6413 putQReg128((tt+3) % 32, math_MAYBE_ZERO_HI64(bitQ, v3));
6414 /* fallthrough */
6415 case 3:
6416 e2 = newTemp(ty);
6417 assign(e2, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6418 mkU64(2 * laneSzB))));
6419 v2 = math_DUP_TO_V128(e2, ty);
6420 putQReg128((tt+2) % 32, math_MAYBE_ZERO_HI64(bitQ, v2));
6421 /* fallthrough */
6422 case 2:
6423 e1 = newTemp(ty);
6424 assign(e1, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6425 mkU64(1 * laneSzB))));
6426 v1 = math_DUP_TO_V128(e1, ty);
6427 putQReg128((tt+1) % 32, math_MAYBE_ZERO_HI64(bitQ, v1));
6428 /* fallthrough */
6429 case 1:
6430 e0 = newTemp(ty);
6431 assign(e0, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6432 mkU64(0 * laneSzB))));
6433 v0 = math_DUP_TO_V128(e0, ty);
6434 putQReg128((tt+0) % 32, math_MAYBE_ZERO_HI64(bitQ, v0));
6435 break;
6436 default:
6437 vassert(0);
6440 HChar pxStr[20];
6441 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6442 if (isPX) {
6443 if (mm == BITS5(1,1,1,1,1))
6444 vex_sprintf(pxStr, ", #%u", xferSzB);
6445 else
6446 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6448 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6449 DIP("ld%ur {v%u.%s .. v%u.%s}, [%s]%s\n",
6450 nRegs,
6451 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6452 pxStr);
6454 return True;
6456 /* else fall through */
6459 /* ------ LD1/ST1 (single structure, to/from one lane) ------ */
6460 /* ------ LD2/ST2 (single structure, to/from one lane) ------ */
6461 /* ------ LD3/ST3 (single structure, to/from one lane) ------ */
6462 /* ------ LD4/ST4 (single structure, to/from one lane) ------ */
6463 /* 31 29 22 21 20 15 11 9 4
6464 0q 001 1010 L 0 00000 xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP]
6465 0q 001 1011 L 0 m xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP], step
6467 0q 001 1010 L 1 00000 xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP]
6468 0q 001 1011 L 1 m xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP], step
6470 0q 001 1010 L 0 00000 xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP]
6471 0q 001 1011 L 0 m xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP], step
6473 0q 001 1010 L 1 00000 xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP]
6474 0q 001 1011 L 1 m xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP], step
6476 step = if m == 11111 then transfer-size else Xm
6477 op = case L of 1 -> LD ; 0 -> ST
6479 laneszB,ix = case xx:q:S:sz of 00:b:b:bb -> 1, bbbb
6480 01:b:b:b0 -> 2, bbb
6481 10:b:b:00 -> 4, bb
6482 10:b:0:01 -> 8, b
6484 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)) {
6485 UInt bitQ = INSN(30,30);
6486 Bool isPX = INSN(23,23) == 1;
6487 Bool isLD = INSN(22,22) == 1;
6488 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6489 UInt mm = INSN(20,16);
6490 UInt xx = INSN(15,14);
6491 UInt bitS = INSN(12,12);
6492 UInt sz = INSN(11,10);
6493 UInt nn = INSN(9,5);
6494 UInt tt = INSN(4,0);
6496 Bool valid = True;
6498 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6499 if (!isPX && mm != 0)
6500 valid = False;
6502 UInt laneSzB = 0; /* invalid */
6503 UInt ix = 16; /* invalid */
6505 UInt xx_q_S_sz = (xx << 4) | (bitQ << 3) | (bitS << 2) | sz;
6506 switch (xx_q_S_sz) {
6507 case 0x00: case 0x01: case 0x02: case 0x03:
6508 case 0x04: case 0x05: case 0x06: case 0x07:
6509 case 0x08: case 0x09: case 0x0A: case 0x0B:
6510 case 0x0C: case 0x0D: case 0x0E: case 0x0F:
6511 laneSzB = 1; ix = xx_q_S_sz & 0xF;
6512 break;
6513 case 0x10: case 0x12: case 0x14: case 0x16:
6514 case 0x18: case 0x1A: case 0x1C: case 0x1E:
6515 laneSzB = 2; ix = (xx_q_S_sz >> 1) & 7;
6516 break;
6517 case 0x20: case 0x24: case 0x28: case 0x2C:
6518 laneSzB = 4; ix = (xx_q_S_sz >> 2) & 3;
6519 break;
6520 case 0x21: case 0x29:
6521 laneSzB = 8; ix = (xx_q_S_sz >> 3) & 1;
6522 break;
6523 default:
6524 break;
6527 if (valid && laneSzB != 0) {
6529 IRType ty = integerIRTypeOfSize(laneSzB);
6530 UInt xferSzB = laneSzB * nRegs;
6532 /* Generate the transfer address (TA) and if necessary the
6533 writeback address (WB) */
6534 IRTemp tTA = newTemp(Ity_I64);
6535 assign(tTA, getIReg64orSP(nn));
6536 if (nn == 31) { /* FIXME generate stack alignment check */ }
6537 IRTemp tWB = IRTemp_INVALID;
6538 if (isPX) {
6539 tWB = newTemp(Ity_I64);
6540 assign(tWB, binop(Iop_Add64,
6541 mkexpr(tTA),
6542 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6543 : getIReg64orZR(mm)));
6546 /* Do the writeback, if necessary */
6547 if (isPX) {
6548 putIReg64orSP(nn, mkexpr(tWB));
6551 switch (nRegs) {
6552 case 4: {
6553 IRExpr* addr
6554 = binop(Iop_Add64, mkexpr(tTA), mkU64(3 * laneSzB));
6555 if (isLD) {
6556 putQRegLane((tt+3) % 32, ix, loadLE(ty, addr));
6557 } else {
6558 storeLE(addr, getQRegLane((tt+3) % 32, ix, ty));
6561 /* fallthrough */
6562 case 3: {
6563 IRExpr* addr
6564 = binop(Iop_Add64, mkexpr(tTA), mkU64(2 * laneSzB));
6565 if (isLD) {
6566 putQRegLane((tt+2) % 32, ix, loadLE(ty, addr));
6567 } else {
6568 storeLE(addr, getQRegLane((tt+2) % 32, ix, ty));
6571 /* fallthrough */
6572 case 2: {
6573 IRExpr* addr
6574 = binop(Iop_Add64, mkexpr(tTA), mkU64(1 * laneSzB));
6575 if (isLD) {
6576 putQRegLane((tt+1) % 32, ix, loadLE(ty, addr));
6577 } else {
6578 storeLE(addr, getQRegLane((tt+1) % 32, ix, ty));
6581 /* fallthrough */
6582 case 1: {
6583 IRExpr* addr
6584 = binop(Iop_Add64, mkexpr(tTA), mkU64(0 * laneSzB));
6585 if (isLD) {
6586 putQRegLane((tt+0) % 32, ix, loadLE(ty, addr));
6587 } else {
6588 storeLE(addr, getQRegLane((tt+0) % 32, ix, ty));
6590 break;
6592 default:
6593 vassert(0);
6596 HChar pxStr[20];
6597 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6598 if (isPX) {
6599 if (mm == BITS5(1,1,1,1,1))
6600 vex_sprintf(pxStr, ", #%u", xferSzB);
6601 else
6602 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6604 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6605 DIP("%s%u {v%u.%s .. v%u.%s}[%u], [%s]%s\n",
6606 isLD ? "ld" : "st", nRegs,
6607 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr,
6608 ix, nameIReg64orSP(nn), pxStr);
6610 return True;
6612 /* else fall through */
6615 /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
6616 /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
6617 /* 31 29 23 20 14 9 4
6618 sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP]
6619 sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP]
6620 sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP]
6621 sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP]
6623 /* For the "standard" implementation we pass through the LL and SC to
6624 the host. For the "fallback" implementation, for details see
6625 https://bugs.kde.org/show_bug.cgi?id=344524 and
6626 https://bugs.kde.org/show_bug.cgi?id=369459,
6627 but in short:
6629 LoadLinked(addr)
6630 gs.LLsize = load_size // 1, 2, 4 or 8
6631 gs.LLaddr = addr
6632 gs.LLdata = zeroExtend(*addr)
6634 StoreCond(addr, data)
6635 tmp_LLsize = gs.LLsize
6636 gs.LLsize = 0 // "no transaction"
6637 if tmp_LLsize != store_size -> fail
6638 if addr != gs.LLaddr -> fail
6639 if zeroExtend(*addr) != gs.LLdata -> fail
6640 cas_ok = CAS(store_size, addr, gs.LLdata -> data)
6641 if !cas_ok -> fail
6642 succeed
6644 When thread scheduled
6645 gs.LLsize = 0 // "no transaction"
6646 (coregrind/m_scheduler/scheduler.c, run_thread_for_a_while()
6647 has to do this bit)
6649 if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
6650 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
6651 && INSN(14,10) == BITS5(1,1,1,1,1)) {
6652 UInt szBlg2 = INSN(31,30);
6653 Bool isLD = INSN(22,22) == 1;
6654 Bool isAcqOrRel = INSN(15,15) == 1;
6655 UInt ss = INSN(20,16);
6656 UInt nn = INSN(9,5);
6657 UInt tt = INSN(4,0);
6659 vassert(szBlg2 < 4);
6660 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6661 IRType ty = integerIRTypeOfSize(szB);
6662 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
6664 IRTemp ea = newTemp(Ity_I64);
6665 assign(ea, getIReg64orSP(nn));
6666 /* FIXME generate check that ea is szB-aligned */
6668 if (isLD && ss == BITS5(1,1,1,1,1)) {
6669 IRTemp res = newTemp(ty);
6670 if (abiinfo->guest__use_fallback_LLSC) {
6671 // Do the load first so we don't update any guest state
6672 // if it faults.
6673 IRTemp loaded_data64 = newTemp(Ity_I64);
6674 assign(loaded_data64, widenUto64(ty, loadLE(ty, mkexpr(ea))));
6675 stmt( IRStmt_Put( OFFB_LLSC_DATA, mkexpr(loaded_data64) ));
6676 stmt( IRStmt_Put( OFFB_LLSC_ADDR, mkexpr(ea) ));
6677 stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(szB) ));
6678 putIReg64orZR(tt, mkexpr(loaded_data64));
6679 } else {
6680 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
6681 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
6683 if (isAcqOrRel) {
6684 stmt(IRStmt_MBE(Imbe_Fence));
6686 DIP("ld%sx%s %s, [%s] %s\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6687 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn),
6688 abiinfo->guest__use_fallback_LLSC
6689 ? "(fallback implementation)" : "");
6690 return True;
6692 if (!isLD) {
6693 if (isAcqOrRel) {
6694 stmt(IRStmt_MBE(Imbe_Fence));
6696 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
6697 if (abiinfo->guest__use_fallback_LLSC) {
6698 // This is really ugly, since we don't have any way to do
6699 // proper if-then-else. First, set up as if the SC failed,
6700 // and jump forwards if it really has failed.
6702 // Continuation address
6703 IRConst* nia = IRConst_U64(guest_PC_curr_instr + 4);
6705 // "the SC failed". Any non-zero value means failure.
6706 putIReg64orZR(ss, mkU64(1));
6708 IRTemp tmp_LLsize = newTemp(Ity_I64);
6709 assign(tmp_LLsize, IRExpr_Get(OFFB_LLSC_SIZE, Ity_I64));
6710 stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(0) // "no transaction"
6712 // Fail if no or wrong-size transaction
6713 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
6714 stmt( IRStmt_Exit(
6715 binop(Iop_CmpNE64, mkexpr(tmp_LLsize), mkU64(szB)),
6716 Ijk_Boring, nia, OFFB_PC
6718 // Fail if the address doesn't match the LL address
6719 stmt( IRStmt_Exit(
6720 binop(Iop_CmpNE64, mkexpr(ea),
6721 IRExpr_Get(OFFB_LLSC_ADDR, Ity_I64)),
6722 Ijk_Boring, nia, OFFB_PC
6724 // Fail if the data doesn't match the LL data
6725 IRTemp llsc_data64 = newTemp(Ity_I64);
6726 assign(llsc_data64, IRExpr_Get(OFFB_LLSC_DATA, Ity_I64));
6727 stmt( IRStmt_Exit(
6728 binop(Iop_CmpNE64, widenUto64(ty, loadLE(ty, mkexpr(ea))),
6729 mkexpr(llsc_data64)),
6730 Ijk_Boring, nia, OFFB_PC
6732 // Try to CAS the new value in.
6733 IRTemp old = newTemp(ty);
6734 IRTemp expd = newTemp(ty);
6735 assign(expd, narrowFrom64(ty, mkexpr(llsc_data64)));
6736 stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID, old,
6737 Iend_LE, mkexpr(ea),
6738 /*expdHi*/NULL, mkexpr(expd),
6739 /*dataHi*/NULL, data
6740 )));
6741 // Fail if the CAS failed (viz, old != expd)
6742 stmt( IRStmt_Exit(
6743 binop(Iop_CmpNE64,
6744 widenUto64(ty, mkexpr(old)),
6745 widenUto64(ty, mkexpr(expd))),
6746 Ijk_Boring, nia, OFFB_PC
6748 // Otherwise we succeeded (!)
6749 putIReg64orZR(ss, mkU64(0));
6750 } else {
6751 IRTemp res = newTemp(Ity_I1);
6752 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
6753 /* IR semantics: res is 1 if store succeeds, 0 if it fails.
6754 Need to set rS to 1 on failure, 0 on success. */
6755 putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
6756 mkU64(1)));
6758 DIP("st%sx%s %s, %s, [%s] %s\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6759 nameIRegOrZR(False, ss),
6760 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn),
6761 abiinfo->guest__use_fallback_LLSC
6762 ? "(fallback implementation)" : "");
6763 return True;
6765 /* else fall through */
6768 /* ------------------ LDA{R,RH,RB} ------------------ */
6769 /* ------------------ STL{R,RH,RB} ------------------ */
6770 /* 31 29 23 20 14 9 4
6771 sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn|SP]
6772 sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn|SP]
6774 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
6775 && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
6776 UInt szBlg2 = INSN(31,30);
6777 Bool isLD = INSN(22,22) == 1;
6778 UInt nn = INSN(9,5);
6779 UInt tt = INSN(4,0);
6781 vassert(szBlg2 < 4);
6782 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6783 IRType ty = integerIRTypeOfSize(szB);
6784 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
6786 IRTemp ea = newTemp(Ity_I64);
6787 assign(ea, getIReg64orSP(nn));
6788 /* FIXME generate check that ea is szB-aligned */
6790 if (isLD) {
6791 IRTemp res = newTemp(ty);
6792 assign(res, loadLE(ty, mkexpr(ea)));
6793 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
6794 stmt(IRStmt_MBE(Imbe_Fence));
6795 DIP("lda%s %s, [%s]\n", suffix[szBlg2],
6796 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6797 } else {
6798 stmt(IRStmt_MBE(Imbe_Fence));
6799 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
6800 storeLE(mkexpr(ea), data);
6801 DIP("stl%s %s, [%s]\n", suffix[szBlg2],
6802 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6804 return True;
6807 /* The PRFM cases that follow are possibly allow Rt values (the
6808 prefetch operation) which are not allowed by the documentation.
6809 This should be looked into. */
6810 /* ------------------ PRFM (immediate) ------------------ */
6811 /* 31 21 9 4
6812 11 111 00110 imm12 n t PRFM pfrop=Rt, [Xn|SP, #pimm]
6814 if (INSN(31,22) == BITS10(1,1,1,1,1,0,0,1,1,0)) {
6815 UInt imm12 = INSN(21,10);
6816 UInt nn = INSN(9,5);
6817 UInt tt = INSN(4,0);
6818 /* Generating any IR here is pointless, except for documentation
6819 purposes, as it will get optimised away later. */
6820 IRTemp ea = newTemp(Ity_I64);
6821 assign(ea, binop(Iop_Add64, getIReg64orSP(nn), mkU64(imm12 * 8)));
6822 DIP("prfm prfop=%u, [%s, #%u]\n", tt, nameIReg64orSP(nn), imm12 * 8);
6823 return True;
6826 /* ------------------ PRFM (register) ------------------ */
6827 /* 31 29 22 20 15 12 11 9 4
6828 11 1110001 01 Rm opt S 10 Rn Rt PRFM pfrop=Rt, [Xn|SP, R<m>{ext/sh}]
6830 if (INSN(31,21) == BITS11(1,1,1,1,1,0,0,0,1,0,1)
6831 && INSN(11,10) == BITS2(1,0)) {
6832 HChar dis_buf[64];
6833 UInt tt = INSN(4,0);
6834 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
6835 if (ea != IRTemp_INVALID) {
6836 /* No actual code to generate. */
6837 DIP("prfm prfop=%u, %s\n", tt, dis_buf);
6838 return True;
6842 /* ------------------ PRFM (unscaled offset) ------------------ */
6843 /* 31 29 22 20 11 9 4
6844 11 1110001 00 imm9 00 Rn Rt PRFM pfrop=Rt, [Xn|SP, #simm]
6846 if (INSN(31,21) == BITS11(1,1, 1,1,1,0,0,0,1, 0,0)
6847 && INSN(11,10) == BITS2(0,0)) {
6848 ULong imm9 = INSN(20,12);
6849 UInt nn = INSN(9,5);
6850 UInt tt = INSN(4,0);
6851 ULong offset = sx_to_64(imm9, 9);
6852 IRTemp ea = newTemp(Ity_I64);
6853 assign(ea, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offset)));
6854 /* No actual code to generate. */
6855 DIP("prfum prfop=%u, [%s, #0x%llx]\n", tt, nameIReg64orSP(nn), offset);
6856 return True;
6859 /* ---------------- ARMv8.1-LSE: Atomic Memory Operations ---------------- */
6860 /* 31 29 23 22 21 20 15 11 9 4
6861 sz 111000 A R 1 s 0000 00 n t LDADD{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6862 sz 111000 A R 1 s 0001 00 n t LDCLR{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6863 sz 111000 A R 1 s 0010 00 n t LDEOR{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6864 sz 111000 A R 1 s 0011 00 n t LDSET{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6865 sz 111000 A R 1 s 0100 00 n t LDSMAX{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6866 sz 111000 A R 1 s 0101 00 n t LDSMIN{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6867 sz 111000 A R 1 s 0110 00 n t LDUMAX{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6868 sz 111000 A R 1 s 0111 00 n t LDUMIN{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6869 sz 111000 A R 1 s 1000 00 n t SWP{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6871 if (INSN(29,24) == BITS6(1,1,1,0,0,0)
6872 && INSN(21,21) == 1
6873 && (INSN(15,12) <= BITS4(1,0,0,0))
6874 && INSN(11,10) == BITS2(0,0)) {
6875 UInt szBlg2 = INSN(31,30);
6876 Bool isAcq = INSN(23,23) == 1;
6877 Bool isRel = INSN(22,22) == 1;
6878 UInt ss = INSN(20,16);
6879 UInt opc = INSN(15,12);
6880 UInt nn = INSN(9,5);
6881 UInt tt = INSN(4,0);
6883 const HChar* nm = NULL;
6884 const HChar* suffix[4] = { "b", "h", "", "" };
6886 vassert(szBlg2 < 4);
6887 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 bytes*/
6888 IRType ty = integerIRTypeOfSize(szB);
6889 Bool is64 = szB == 8;
6890 Bool isSigned = (opc == 4) || (opc == 5) /*smax || smin*/;
6892 // IR used to emulate these atomic memory ops:
6893 // 1) barrier
6894 // 2) load
6895 // 3) widen operands and do arithmetic/logic op
6896 // 4) cas to see if target memory updated
6897 // 5) barrier
6898 // 6) repeat from 1) if cas says target memory not updated
6899 // 7) update register
6901 IRTemp ea = newTemp(Ity_I64);
6902 assign(ea, getIReg64orSP(nn));
6904 // Insert barrier before loading for acquire and acquire-release variants:
6905 // A and AL.
6906 if (isAcq && (tt != 31))
6907 stmt(IRStmt_MBE(Imbe_Fence));
6909 // Load LHS from memory, RHS from register.
6910 IRTemp orig = newTemp(ty);
6911 assign(orig, loadLE(ty, mkexpr(ea)));
6912 IRExpr *lhs = mkexpr(orig);
6913 IRExpr *rhs = narrowFrom64(ty, getIReg64orZR(ss));
6914 IRExpr *res = NULL;
6916 lhs = isSigned ? widenSto64(ty, lhs) : widenUto64(ty, lhs);
6917 rhs = isSigned ? widenSto64(ty, rhs) : widenUto64(ty, rhs);
6919 // Perform the operation.
6920 switch (opc) {
6921 case 0:
6922 nm = "ldadd";
6923 res = binop(Iop_Add64, lhs, rhs);
6924 break;
6925 case 1:
6926 nm = "ldclr";
6927 res = binop(Iop_And64, lhs, unop(mkNOT(Ity_I64), rhs));
6928 break;
6929 case 2:
6930 nm = "ldeor";
6931 res = binop(Iop_Xor64, lhs, rhs);
6932 break;
6933 case 3:
6934 nm = "ldset";
6935 res = binop(Iop_Or64, lhs, rhs);
6936 break;
6937 case 4:
6938 nm = "ldsmax";
6939 res = IRExpr_ITE(binop(Iop_CmpLT64S, lhs, rhs), rhs, lhs);
6940 break;
6941 case 5:
6942 nm = "ldsmin";
6943 res = IRExpr_ITE(binop(Iop_CmpLT64S, lhs, rhs), lhs, rhs);
6944 break;
6945 case 6:
6946 nm = "ldumax";
6947 res = IRExpr_ITE(binop(Iop_CmpLT64U, lhs, rhs), rhs, lhs);
6948 break;
6949 case 7:
6950 nm = "ldumin";
6951 res = IRExpr_ITE(binop(Iop_CmpLT64U, lhs, rhs), lhs, rhs);
6952 break;
6953 case 8:
6954 nm = "swp";
6955 res = rhs;
6956 break;
6957 default:
6958 vassert(0);
6959 break;
6962 // Store the result back if LHS remains unchanged in memory.
6963 IRTemp old = newTemp(ty);
6964 stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID, old,
6965 Iend_LE, mkexpr(ea),
6966 /*expdHi*/NULL, mkexpr(orig),
6967 /*dataHi*/NULL, narrowFrom64(ty, res))) );
6969 // Insert barrier after storing for release and acquire-release variants:
6970 // L and AL.
6971 if (isRel)
6972 stmt(IRStmt_MBE(Imbe_Fence));
6974 // Retry if the CAS failed (i.e. when old != orig).
6975 IRConst* nia = IRConst_U64(guest_PC_curr_instr);
6976 stmt( IRStmt_Exit(
6977 binop(Iop_CasCmpNE64,
6978 widenUto64(ty, mkexpr(old)),
6979 widenUto64(ty, mkexpr(orig))),
6980 Ijk_Boring, nia, OFFB_PC ));
6981 // Otherwise we succeeded.
6982 putIReg64orZR(tt, widenUto64(ty, mkexpr(old)));
6984 DIP("%s%s%s%s %s, %s, [%s]\n", nm, isAcq ? "a" : "", isRel ? "l" : "",
6985 suffix[szBlg2], nameIRegOrZR(is64, ss), nameIRegOrZR(is64, tt),
6986 nameIReg64orSP(nn));
6987 return True;
6990 /* ------------------ ARMv8.1-LSE: Compare-and-Swap ------------------ */
6991 /* 31 29 22 21 20 15 14 9 4
6992 sz 0010001 A 1 s R 11111 n t CAS{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6994 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
6995 && INSN(21,21) == 1
6996 && INSN(14,10) == BITS5(1,1,1,1,1)) {
6997 UInt szBlg2 = INSN(31,30);
6998 Bool isAcq = INSN(22,22) == 1;
6999 Bool isRel = INSN(15,15) == 1;
7000 UInt ss = INSN(20,16);
7001 UInt nn = INSN(9,5);
7002 UInt tt = INSN(4,0);
7004 const HChar* suffix[4] = { "b", "h", "", "" };
7006 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
7007 IRType ty = integerIRTypeOfSize(szB);
7008 Bool is64 = szB == 8;
7010 IRExpr *exp = narrowFrom64(ty, getIReg64orZR(ss));
7011 IRExpr *new = narrowFrom64(ty, getIReg64orZR(tt));
7013 if (isAcq)
7014 stmt(IRStmt_MBE(Imbe_Fence));
7016 // Store the result back if LHS remains unchanged in memory.
7017 IRTemp old = newTemp(ty);
7018 stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID, old,
7019 Iend_LE, getIReg64orSP(nn),
7020 /*expdHi*/NULL, exp,
7021 /*dataHi*/NULL, new)) );
7023 if (isRel)
7024 stmt(IRStmt_MBE(Imbe_Fence));
7026 putIReg64orZR(ss, widenUto64(ty, mkexpr(old)));
7027 DIP("cas%s%s%s %s, %s, [%s]\n",
7028 isAcq ? "a" : "", isRel ? "l" : "", suffix[szBlg2],
7029 nameIRegOrZR(is64, ss), nameIRegOrZR(is64, tt), nameIReg64orSP(nn));
7030 return True;
7033 /* ---------------- ARMv8.1-LSE: Compare-and-Swap Pair --------------- */
7034 /* 31 30 29 22 21 20 15 14 9 4
7035 0 sz 0010000 A 1 s R 11111 n t CASP{,A}{,L} <Rs>, <Rt>, [<Xn|SP>]
7037 if (INSN(31,31) == 0
7038 && INSN(29,23) == BITS7(0,0,1,0,0,0,0)
7039 && INSN(21,21) == 1
7040 && INSN(14,10) == BITS5(1,1,1,1,1)) {
7041 UInt is64 = INSN(30,30);
7042 Bool isAcq = INSN(22,22) == 1;
7043 Bool isRel = INSN(15,15) == 1;
7044 UInt ss = INSN(20,16);
7045 UInt nn = INSN(9,5);
7046 UInt tt = INSN(4,0);
7048 if ((ss & 0x1) || (tt & 0x1)) {
7049 /* undefined; fall through */
7050 } else {
7051 IRExpr *expLo = getIRegOrZR(is64, ss);
7052 IRExpr *expHi = getIRegOrZR(is64, ss + 1);
7053 IRExpr *newLo = getIRegOrZR(is64, tt);
7054 IRExpr *newHi = getIRegOrZR(is64, tt + 1);
7055 IRTemp oldLo = newTemp(is64 ? Ity_I64 : Ity_I32);
7056 IRTemp oldHi = newTemp(is64 ? Ity_I64 : Ity_I32);
7058 if (isAcq)
7059 stmt(IRStmt_MBE(Imbe_Fence));
7061 stmt( IRStmt_CAS(mkIRCAS(oldHi, oldLo,
7062 Iend_LE, getIReg64orSP(nn),
7063 expHi, expLo,
7064 newHi, newLo)) );
7066 if (isRel)
7067 stmt(IRStmt_MBE(Imbe_Fence));
7069 putIRegOrZR(is64, ss, mkexpr(oldLo));
7070 putIRegOrZR(is64, ss+1, mkexpr(oldHi));
7071 DIP("casp%s%s %s, %s, %s, %s, [%s]\n",
7072 isAcq ? "a" : "", isRel ? "l" : "",
7073 nameIRegOrZR(is64, ss), nameIRegOrZR(is64, ss+1),
7074 nameIRegOrZR(is64, tt), nameIRegOrZR(is64, tt+1),
7075 nameIReg64orSP(nn));
7076 return True;
7080 if (sigill_diag) {
7081 vex_printf("ARM64 front end: load_store\n");
7084 return False;
7085 # undef INSN
7089 /*------------------------------------------------------------*/
7090 /*--- Control flow and misc instructions ---*/
7091 /*------------------------------------------------------------*/
7093 static
7094 Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn,
7095 const VexArchInfo* archinfo,
7096 const VexAbiInfo* abiinfo, Bool sigill_diag)
7098 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
7100 /* ---------------------- B cond ----------------------- */
7101 /* 31 24 4 3
7102 0101010 0 imm19 0 cond */
7103 if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
7104 UInt cond = INSN(3,0);
7105 ULong uimm64 = INSN(23,5) << 2;
7106 Long simm64 = (Long)sx_to_64(uimm64, 21);
7107 vassert(dres->whatNext == Dis_Continue);
7108 vassert(dres->len == 4);
7109 vassert(dres->jk_StopHere == Ijk_INVALID);
7110 stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
7111 Ijk_Boring,
7112 IRConst_U64(guest_PC_curr_instr + simm64),
7113 OFFB_PC) );
7114 putPC(mkU64(guest_PC_curr_instr + 4));
7115 dres->whatNext = Dis_StopHere;
7116 dres->jk_StopHere = Ijk_Boring;
7117 DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
7118 return True;
7121 /* -------------------- B{L} uncond -------------------- */
7122 if (INSN(30,26) == BITS5(0,0,1,0,1)) {
7123 /* 000101 imm26 B (PC + sxTo64(imm26 << 2))
7124 100101 imm26 B (PC + sxTo64(imm26 << 2))
7126 UInt bLink = INSN(31,31);
7127 ULong uimm64 = INSN(25,0) << 2;
7128 Long simm64 = (Long)sx_to_64(uimm64, 28);
7129 if (bLink) {
7130 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
7132 putPC(mkU64(guest_PC_curr_instr + simm64));
7133 dres->whatNext = Dis_StopHere;
7134 dres->jk_StopHere = Ijk_Call;
7135 DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
7136 guest_PC_curr_instr + simm64);
7137 return True;
7140 /* --------------------- B{L} reg --------------------- */
7141 /* 31 24 22 20 15 9 4
7142 1101011 00 10 11111 000000 nn 00000 RET Rn
7143 1101011 00 01 11111 000000 nn 00000 CALL Rn
7144 1101011 00 00 11111 000000 nn 00000 JMP Rn
7146 if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
7147 && INSN(20,16) == BITS5(1,1,1,1,1)
7148 && INSN(15,10) == BITS6(0,0,0,0,0,0)
7149 && INSN(4,0) == BITS5(0,0,0,0,0)) {
7150 UInt branch_type = INSN(22,21);
7151 UInt nn = INSN(9,5);
7152 if (branch_type == BITS2(1,0) /* RET */) {
7153 putPC(getIReg64orZR(nn));
7154 dres->whatNext = Dis_StopHere;
7155 dres->jk_StopHere = Ijk_Ret;
7156 DIP("ret %s\n", nameIReg64orZR(nn));
7157 return True;
7159 if (branch_type == BITS2(0,1) /* CALL */) {
7160 IRTemp dst = newTemp(Ity_I64);
7161 assign(dst, getIReg64orZR(nn));
7162 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
7163 putPC(mkexpr(dst));
7164 dres->whatNext = Dis_StopHere;
7165 dres->jk_StopHere = Ijk_Call;
7166 DIP("blr %s\n", nameIReg64orZR(nn));
7167 return True;
7169 if (branch_type == BITS2(0,0) /* JMP */) {
7170 putPC(getIReg64orZR(nn));
7171 dres->whatNext = Dis_StopHere;
7172 dres->jk_StopHere = Ijk_Boring;
7173 DIP("jmp %s\n", nameIReg64orZR(nn));
7174 return True;
7178 /* -------------------- CB{N}Z -------------------- */
7179 /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
7180 sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2))
7182 if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
7183 Bool is64 = INSN(31,31) == 1;
7184 Bool bIfZ = INSN(24,24) == 0;
7185 ULong uimm64 = INSN(23,5) << 2;
7186 UInt rT = INSN(4,0);
7187 Long simm64 = (Long)sx_to_64(uimm64, 21);
7188 IRExpr* cond = NULL;
7189 if (is64) {
7190 cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
7191 getIReg64orZR(rT), mkU64(0));
7192 } else {
7193 cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
7194 getIReg32orZR(rT), mkU32(0));
7196 stmt( IRStmt_Exit(cond,
7197 Ijk_Boring,
7198 IRConst_U64(guest_PC_curr_instr + simm64),
7199 OFFB_PC) );
7200 putPC(mkU64(guest_PC_curr_instr + 4));
7201 dres->whatNext = Dis_StopHere;
7202 dres->jk_StopHere = Ijk_Boring;
7203 DIP("cb%sz %s, 0x%llx\n",
7204 bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
7205 guest_PC_curr_instr + simm64);
7206 return True;
7209 /* -------------------- TB{N}Z -------------------- */
7210 /* 31 30 24 23 18 5 4
7211 b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
7212 b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
7214 if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
7215 UInt b5 = INSN(31,31);
7216 Bool bIfZ = INSN(24,24) == 0;
7217 UInt b40 = INSN(23,19);
7218 UInt imm14 = INSN(18,5);
7219 UInt tt = INSN(4,0);
7220 UInt bitNo = (b5 << 5) | b40;
7221 ULong uimm64 = imm14 << 2;
7222 Long simm64 = sx_to_64(uimm64, 16);
7223 IRExpr* cond
7224 = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
7225 binop(Iop_And64,
7226 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
7227 mkU64(1)),
7228 mkU64(0));
7229 stmt( IRStmt_Exit(cond,
7230 Ijk_Boring,
7231 IRConst_U64(guest_PC_curr_instr + simm64),
7232 OFFB_PC) );
7233 putPC(mkU64(guest_PC_curr_instr + 4));
7234 dres->whatNext = Dis_StopHere;
7235 dres->jk_StopHere = Ijk_Boring;
7236 DIP("tb%sz %s, #%u, 0x%llx\n",
7237 bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
7238 guest_PC_curr_instr + simm64);
7239 return True;
7242 /* -------------------- SVC -------------------- */
7243 /* 11010100 000 imm16 000 01
7244 Don't bother with anything except the imm16==0 case.
7246 if (INSN(31,0) == 0xD4000001) {
7247 putPC(mkU64(guest_PC_curr_instr + 4));
7248 dres->whatNext = Dis_StopHere;
7249 dres->jk_StopHere = Ijk_Sys_syscall;
7250 DIP("svc #0\n");
7251 return True;
7254 /* ------------------ M{SR,RS} ------------------ */
7255 /* ---- Cases for TPIDR_EL0 ----
7256 0xD51BD0 010 Rt MSR tpidr_el0, rT
7257 0xD53BD0 010 Rt MRS rT, tpidr_el0
7259 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
7260 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
7261 Bool toSys = INSN(21,21) == 0;
7262 UInt tt = INSN(4,0);
7263 if (toSys) {
7264 stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
7265 DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
7266 } else {
7267 putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
7268 DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
7270 return True;
7272 /* ---- Cases for FPCR ----
7273 0xD51B44 000 Rt MSR fpcr, rT
7274 0xD53B44 000 Rt MSR rT, fpcr
7276 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
7277 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
7278 Bool toSys = INSN(21,21) == 0;
7279 UInt tt = INSN(4,0);
7280 if (toSys) {
7281 stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
7282 DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
7283 } else {
7284 putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
7285 DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
7287 return True;
7289 /* ---- Cases for FPSR ----
7290 0xD51B44 001 Rt MSR fpsr, rT
7291 0xD53B44 001 Rt MSR rT, fpsr
7292 The only part of this we model is FPSR.QC. All other bits
7293 are ignored when writing to it and RAZ when reading from it.
7295 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
7296 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
7297 Bool toSys = INSN(21,21) == 0;
7298 UInt tt = INSN(4,0);
7299 if (toSys) {
7300 /* Just deal with FPSR.QC. Make up a V128 value which is
7301 zero if Xt[27] is zero and any other value if Xt[27] is
7302 nonzero. */
7303 IRTemp qc64 = newTemp(Ity_I64);
7304 assign(qc64, binop(Iop_And64,
7305 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(27)),
7306 mkU64(1)));
7307 IRExpr* qcV128 = binop(Iop_64HLtoV128, mkexpr(qc64), mkexpr(qc64));
7308 stmt( IRStmt_Put( OFFB_QCFLAG, qcV128 ) );
7309 DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
7310 } else {
7311 /* Generate a value which is all zeroes except for bit 27,
7312 which must be zero if QCFLAG is all zeroes and one otherwise. */
7313 IRTemp qcV128 = newTempV128();
7314 assign(qcV128, IRExpr_Get( OFFB_QCFLAG, Ity_V128 ));
7315 IRTemp qc64 = newTemp(Ity_I64);
7316 assign(qc64, binop(Iop_Or64, unop(Iop_V128HIto64, mkexpr(qcV128)),
7317 unop(Iop_V128to64, mkexpr(qcV128))));
7318 IRExpr* res = binop(Iop_Shl64,
7319 unop(Iop_1Uto64,
7320 binop(Iop_CmpNE64, mkexpr(qc64), mkU64(0))),
7321 mkU8(27));
7322 putIReg64orZR(tt, res);
7323 DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
7325 return True;
7327 /* ---- Cases for NZCV ----
7328 D51B42 000 Rt MSR nzcv, rT
7329 D53B42 000 Rt MRS rT, nzcv
7330 The only parts of NZCV that actually exist are bits 31:28, which
7331 are the N Z C and V bits themselves. Hence the flags thunk provides
7332 all the state we need.
7334 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
7335 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
7336 Bool toSys = INSN(21,21) == 0;
7337 UInt tt = INSN(4,0);
7338 if (toSys) {
7339 IRTemp t = newTemp(Ity_I64);
7340 assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
7341 setFlags_COPY(t);
7342 DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
7343 } else {
7344 IRTemp res = newTemp(Ity_I64);
7345 assign(res, mk_arm64g_calculate_flags_nzcv());
7346 putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
7347 DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
7349 return True;
7351 /* ---- Cases for DCZID_EL0 ----
7352 Don't support arbitrary reads and writes to this register. Just
7353 return the value 16, which indicates that the DC ZVA instruction
7354 is not permitted, so we don't have to emulate it.
7355 D5 3B 00 111 Rt MRS rT, dczid_el0
7357 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
7358 UInt tt = INSN(4,0);
7359 putIReg64orZR(tt, mkU64(1<<4));
7360 DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt));
7361 return True;
7363 /* ---- Cases for CTR_EL0 ----
7364 We just handle reads, and make up a value from the D and I line
7365 sizes in the VexArchInfo we are given, and patch in the following
7366 fields that the Foundation model gives ("natively"):
7367 CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
7368 D5 3B 00 001 Rt MRS rT, dczid_el0
7370 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
7371 UInt tt = INSN(4,0);
7372 /* Need to generate a value from dMinLine_lg2_szB and
7373 dMinLine_lg2_szB. The value in the register is in 32-bit
7374 units, so need to subtract 2 from the values in the
7375 VexArchInfo. We can assume that the values here are valid --
7376 disInstr_ARM64 checks them -- so there's no need to deal with
7377 out-of-range cases. */
7378 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
7379 && archinfo->arm64_dMinLine_lg2_szB <= 17
7380 && archinfo->arm64_iMinLine_lg2_szB >= 2
7381 && archinfo->arm64_iMinLine_lg2_szB <= 17);
7382 UInt val
7383 = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16)
7384 | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0);
7385 putIReg64orZR(tt, mkU64(val));
7386 DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt));
7387 return True;
7389 /* ---- Cases for CNTVCT_EL0 ----
7390 This is a timestamp counter of some sort. Support reads of it only
7391 by passing through to the host.
7392 D5 3B E0 010 Rt MRS Xt, cntvct_el0
7394 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE040) {
7395 UInt tt = INSN(4,0);
7396 IRTemp val = newTemp(Ity_I64);
7397 IRExpr** args = mkIRExprVec_0();
7398 IRDirty* d = unsafeIRDirty_1_N (
7399 val,
7400 0/*regparms*/,
7401 "arm64g_dirtyhelper_MRS_CNTVCT_EL0",
7402 &arm64g_dirtyhelper_MRS_CNTVCT_EL0,
7403 args
7405 /* execute the dirty call, dumping the result in val. */
7406 stmt( IRStmt_Dirty(d) );
7407 putIReg64orZR(tt, mkexpr(val));
7408 DIP("mrs %s, cntvct_el0\n", nameIReg64orZR(tt));
7409 return True;
7411 /* ---- Cases for CNTFRQ_EL0 ----
7412 This is always RO at EL0, so it's safe to pass through to the host.
7413 D5 3B E0 000 Rt MRS Xt, cntfrq_el0
7415 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE000) {
7416 UInt tt = INSN(4,0);
7417 IRTemp val = newTemp(Ity_I64);
7418 IRExpr** args = mkIRExprVec_0();
7419 IRDirty* d = unsafeIRDirty_1_N (
7420 val,
7421 0/*regparms*/,
7422 "arm64g_dirtyhelper_MRS_CNTFRQ_EL0",
7423 &arm64g_dirtyhelper_MRS_CNTFRQ_EL0,
7424 args
7426 /* execute the dirty call, dumping the result in val. */
7427 stmt( IRStmt_Dirty(d) );
7428 putIReg64orZR(tt, mkexpr(val));
7429 DIP("mrs %s, cntfrq_el0\n", nameIReg64orZR(tt));
7430 return True;
7433 /* ------------------ IC_IVAU ------------------ */
7434 /* D5 0B 75 001 Rt ic ivau, rT
7436 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
7437 /* We will always be provided with a valid iMinLine value. */
7438 vassert(archinfo->arm64_iMinLine_lg2_szB >= 2
7439 && archinfo->arm64_iMinLine_lg2_szB <= 17);
7440 /* Round the requested address, in rT, down to the start of the
7441 containing block. */
7442 UInt tt = INSN(4,0);
7443 ULong lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB;
7444 IRTemp addr = newTemp(Ity_I64);
7445 assign( addr, binop( Iop_And64,
7446 getIReg64orZR(tt),
7447 mkU64(~(lineszB - 1))) );
7448 /* Set the invalidation range, request exit-and-invalidate, with
7449 continuation at the next instruction. */
7450 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
7451 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
7452 /* be paranoid ... */
7453 stmt( IRStmt_MBE(Imbe_Fence) );
7454 putPC(mkU64( guest_PC_curr_instr + 4 ));
7455 dres->whatNext = Dis_StopHere;
7456 dres->jk_StopHere = Ijk_InvalICache;
7457 DIP("ic ivau, %s\n", nameIReg64orZR(tt));
7458 return True;
7461 /* ------------------ DC_CVAU ------------------ */
7462 /* D5 0B 7B 001 Rt dc cvau, rT
7463 D5 0B 7E 001 Rt dc civac, rT
7465 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20
7466 || (INSN(31,0) & 0xFFFFFFE0) == 0xD50B7E20) {
7467 /* Exactly the same scheme as for IC IVAU, except we observe the
7468 dMinLine size, and request an Ijk_FlushDCache instead of
7469 Ijk_InvalICache. */
7470 /* We will always be provided with a valid dMinLine value. */
7471 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
7472 && archinfo->arm64_dMinLine_lg2_szB <= 17);
7473 /* Round the requested address, in rT, down to the start of the
7474 containing block. */
7475 UInt tt = INSN(4,0);
7476 ULong lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB;
7477 IRTemp addr = newTemp(Ity_I64);
7478 assign( addr, binop( Iop_And64,
7479 getIReg64orZR(tt),
7480 mkU64(~(lineszB - 1))) );
7481 /* Set the flush range, request exit-and-flush, with
7482 continuation at the next instruction. */
7483 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
7484 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
7485 /* be paranoid ... */
7486 stmt( IRStmt_MBE(Imbe_Fence) );
7487 putPC(mkU64( guest_PC_curr_instr + 4 ));
7488 dres->whatNext = Dis_StopHere;
7489 dres->jk_StopHere = Ijk_FlushDCache;
7490 DIP("dc cvau, %s\n", nameIReg64orZR(tt));
7491 return True;
7494 /* ------------------ ISB, DMB, DSB ------------------ */
7495 /* 31 21 11 7 6 4
7496 11010 10100 0 00 011 0011 CRm 1 01 11111 DMB opt
7497 11010 10100 0 00 011 0011 CRm 1 00 11111 DSB opt
7498 11010 10100 0 00 011 0011 CRm 1 10 11111 ISB opt
7500 if (INSN(31,22) == BITS10(1,1,0,1,0,1,0,1,0,0)
7501 && INSN(21,12) == BITS10(0,0,0,0,1,1,0,0,1,1)
7502 && INSN(7,7) == 1
7503 && INSN(6,5) <= BITS2(1,0) && INSN(4,0) == BITS5(1,1,1,1,1)) {
7504 UInt opc = INSN(6,5);
7505 UInt CRm = INSN(11,8);
7506 vassert(opc <= 2 && CRm <= 15);
7507 stmt(IRStmt_MBE(Imbe_Fence));
7508 const HChar* opNames[3]
7509 = { "dsb", "dmb", "isb" };
7510 const HChar* howNames[16]
7511 = { "#0", "oshld", "oshst", "osh", "#4", "nshld", "nshst", "nsh",
7512 "#8", "ishld", "ishst", "ish", "#12", "ld", "st", "sy" };
7513 DIP("%s %s\n", opNames[opc], howNames[CRm]);
7514 return True;
7517 /* -------------------- NOP -------------------- */
7518 if (INSN(31,0) == 0xD503201F) {
7519 DIP("nop\n");
7520 return True;
7523 /* -------------------- BRK -------------------- */
7524 /* 31 23 20 4
7525 1101 0100 001 imm16 00000 BRK #imm16
7527 if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,0)
7528 && INSN(23,21) == BITS3(0,0,1) && INSN(4,0) == BITS5(0,0,0,0,0)) {
7529 UInt imm16 = INSN(20,5);
7530 /* Request SIGTRAP and then restart of this insn. */
7531 putPC(mkU64(guest_PC_curr_instr + 0));
7532 dres->whatNext = Dis_StopHere;
7533 dres->jk_StopHere = Ijk_SigTRAP;
7534 DIP("brk #%u\n", imm16);
7535 return True;
7538 /* ------------------- YIELD ------------------- */
7539 /* 31 23 15 7
7540 1101 0101 0000 0011 0010 0000 0011 1111
7542 if (INSN(31,0) == 0xD503203F) {
7543 /* Request yield followed by continuation at the next insn. */
7544 putPC(mkU64(guest_PC_curr_instr + 4));
7545 dres->whatNext = Dis_StopHere;
7546 dres->jk_StopHere = Ijk_Yield;
7547 DIP("yield\n");
7548 return True;
7551 /* -------------------- HINT ------------------- */
7552 /* 31 23 15 11 4 3
7553 1101 0101 0000 0011 0010 imm7 1 1111
7554 Catch otherwise unhandled HINT instructions - any
7555 like YIELD which are explicitly handled should go
7556 above this case.
7558 if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,1)
7559 && INSN(23,16) == BITS8(0,0,0,0,0,0,1,1)
7560 && INSN(15,12) == BITS4(0,0,1,0)
7561 && INSN(4,0) == BITS5(1,1,1,1,1)) {
7562 UInt imm7 = INSN(11,5);
7563 DIP("hint #%u\n", imm7);
7564 return True;
7567 /* ------------------- CLREX ------------------ */
7568 /* 31 23 15 11 7
7569 1101 0101 0000 0011 0011 m 0101 1111 CLREX CRm
7570 CRm is apparently ignored.
7572 if ((INSN(31,0) & 0xFFFFF0FF) == 0xD503305F) {
7573 UInt mm = INSN(11,8);
7574 /* AFAICS, this simply cancels a (all?) reservations made by a
7575 (any?) preceding LDREX(es). Arrange to hand it through to
7576 the back end. */
7577 if (abiinfo->guest__use_fallback_LLSC) {
7578 stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(0) )); // "no transaction"
7579 } else {
7580 stmt( IRStmt_MBE(Imbe_CancelReservation) );
7582 DIP("clrex #%u\n", mm);
7583 return True;
7586 if (sigill_diag) {
7587 vex_printf("ARM64 front end: branch_etc\n");
7589 return False;
7590 # undef INSN
7594 /*------------------------------------------------------------*/
7595 /*--- SIMD and FP instructions: helper functions ---*/
7596 /*------------------------------------------------------------*/
7598 /* Some constructors for interleave/deinterleave expressions. */
7600 static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) {
7601 // returns a0 b0
7602 return binop(Iop_InterleaveLO64x2, mkexpr(a10), mkexpr(b10));
7605 static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) {
7606 // returns a1 b1
7607 return binop(Iop_InterleaveHI64x2, mkexpr(a10), mkexpr(b10));
7610 static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
7611 // returns a2 a0 b2 b0
7612 return binop(Iop_CatEvenLanes32x4, mkexpr(a3210), mkexpr(b3210));
7615 static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
7616 // returns a3 a1 b3 b1
7617 return binop(Iop_CatOddLanes32x4, mkexpr(a3210), mkexpr(b3210));
7620 static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) {
7621 // returns a1 b1 a0 b0
7622 return binop(Iop_InterleaveLO32x4, mkexpr(a3210), mkexpr(b3210));
7625 static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) {
7626 // returns a3 b3 a2 b2
7627 return binop(Iop_InterleaveHI32x4, mkexpr(a3210), mkexpr(b3210));
7630 static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7631 // returns a6 a4 a2 a0 b6 b4 b2 b0
7632 return binop(Iop_CatEvenLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
7635 static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7636 // returns a7 a5 a3 a1 b7 b5 b3 b1
7637 return binop(Iop_CatOddLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
7640 static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7641 // returns a3 b3 a2 b2 a1 b1 a0 b0
7642 return binop(Iop_InterleaveLO16x8, mkexpr(a76543210), mkexpr(b76543210));
7645 static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7646 // returns a7 b7 a6 b6 a5 b5 a4 b4
7647 return binop(Iop_InterleaveHI16x8, mkexpr(a76543210), mkexpr(b76543210));
7650 static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210,
7651 IRTemp bFEDCBA9876543210 ) {
7652 // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
7653 return binop(Iop_CatEvenLanes8x16, mkexpr(aFEDCBA9876543210),
7654 mkexpr(bFEDCBA9876543210));
7657 static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210,
7658 IRTemp bFEDCBA9876543210 ) {
7659 // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
7660 return binop(Iop_CatOddLanes8x16, mkexpr(aFEDCBA9876543210),
7661 mkexpr(bFEDCBA9876543210));
7664 static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210,
7665 IRTemp bFEDCBA9876543210 ) {
7666 // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
7667 return binop(Iop_InterleaveLO8x16, mkexpr(aFEDCBA9876543210),
7668 mkexpr(bFEDCBA9876543210));
7671 static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210,
7672 IRTemp bFEDCBA9876543210 ) {
7673 // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
7674 return binop(Iop_InterleaveHI8x16, mkexpr(aFEDCBA9876543210),
7675 mkexpr(bFEDCBA9876543210));
7678 /* Generate N copies of |bit| in the bottom of a ULong. */
7679 static ULong Replicate ( ULong bit, Int N )
7681 vassert(bit <= 1 && N >= 1 && N < 64);
7682 if (bit == 0) {
7683 return 0;
7684 } else {
7685 /* Careful. This won't work for N == 64. */
7686 return (1ULL << N) - 1;
7690 static ULong Replicate32x2 ( ULong bits32 )
7692 vassert(0 == (bits32 & ~0xFFFFFFFFULL));
7693 return (bits32 << 32) | bits32;
7696 static ULong Replicate16x4 ( ULong bits16 )
7698 vassert(0 == (bits16 & ~0xFFFFULL));
7699 return Replicate32x2((bits16 << 16) | bits16);
7702 static ULong Replicate8x8 ( ULong bits8 )
7704 vassert(0 == (bits8 & ~0xFFULL));
7705 return Replicate16x4((bits8 << 8) | bits8);
7708 /* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
7709 |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
7710 is 64. In the former case, the upper 32 bits of the returned value
7711 are guaranteed to be zero. */
7712 static ULong VFPExpandImm ( ULong imm8, Int N )
7714 vassert(imm8 <= 0xFF);
7715 vassert(N == 32 || N == 64);
7716 Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
7717 Int F = N - E - 1;
7718 ULong imm8_6 = (imm8 >> 6) & 1;
7719 /* sign: 1 bit */
7720 /* exp: E bits */
7721 /* frac: F bits */
7722 ULong sign = (imm8 >> 7) & 1;
7723 ULong exp = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1);
7724 ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6);
7725 vassert(sign < (1ULL << 1));
7726 vassert(exp < (1ULL << E));
7727 vassert(frac < (1ULL << F));
7728 vassert(1 + E + F == N);
7729 ULong res = (sign << (E+F)) | (exp << F) | frac;
7730 return res;
7733 /* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
7734 This might fail, as indicated by the returned Bool. Page 2530 of
7735 the manual. */
7736 static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res,
7737 UInt op, UInt cmode, UInt imm8 )
7739 vassert(op <= 1);
7740 vassert(cmode <= 15);
7741 vassert(imm8 <= 255);
7743 *res = 0; /* will overwrite iff returning True */
7745 ULong imm64 = 0;
7746 Bool testimm8 = False;
7748 switch (cmode >> 1) {
7749 case 0:
7750 testimm8 = False; imm64 = Replicate32x2(imm8); break;
7751 case 1:
7752 testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break;
7753 case 2:
7754 testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break;
7755 case 3:
7756 testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break;
7757 case 4:
7758 testimm8 = False; imm64 = Replicate16x4(imm8); break;
7759 case 5:
7760 testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break;
7761 case 6:
7762 testimm8 = True;
7763 if ((cmode & 1) == 0)
7764 imm64 = Replicate32x2((imm8 << 8) | 0xFF);
7765 else
7766 imm64 = Replicate32x2((imm8 << 16) | 0xFFFF);
7767 break;
7768 case 7:
7769 testimm8 = False;
7770 if ((cmode & 1) == 0 && op == 0)
7771 imm64 = Replicate8x8(imm8);
7772 if ((cmode & 1) == 0 && op == 1) {
7773 imm64 = 0; imm64 |= (imm8 & 0x80) ? 0xFF : 0x00;
7774 imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00;
7775 imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00;
7776 imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00;
7777 imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00;
7778 imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00;
7779 imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00;
7780 imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00;
7782 if ((cmode & 1) == 1 && op == 0) {
7783 ULong imm8_7 = (imm8 >> 7) & 1;
7784 ULong imm8_6 = (imm8 >> 6) & 1;
7785 ULong imm8_50 = imm8 & 63;
7786 ULong imm32 = (imm8_7 << (1 + 5 + 6 + 19))
7787 | ((imm8_6 ^ 1) << (5 + 6 + 19))
7788 | (Replicate(imm8_6, 5) << (6 + 19))
7789 | (imm8_50 << 19);
7790 imm64 = Replicate32x2(imm32);
7792 if ((cmode & 1) == 1 && op == 1) {
7793 // imm64 = imm8<7>:NOT(imm8<6>)
7794 // :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
7795 ULong imm8_7 = (imm8 >> 7) & 1;
7796 ULong imm8_6 = (imm8 >> 6) & 1;
7797 ULong imm8_50 = imm8 & 63;
7798 imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62)
7799 | (Replicate(imm8_6, 8) << 54)
7800 | (imm8_50 << 48);
7802 break;
7803 default:
7804 vassert(0);
7807 if (testimm8 && imm8 == 0)
7808 return False;
7810 *res = imm64;
7811 return True;
7814 /* Help a bit for decoding laneage for vector operations that can be
7815 of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
7816 and SZ bits, typically for vector floating point. */
7817 static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI, /*OUT*/IRType* tyF,
7818 /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper,
7819 /*OUT*/const HChar** arrSpec,
7820 Bool bitQ, Bool bitSZ )
7822 vassert(bitQ == True || bitQ == False);
7823 vassert(bitSZ == True || bitSZ == False);
7824 if (bitQ && bitSZ) { // 2x64
7825 if (tyI) *tyI = Ity_I64;
7826 if (tyF) *tyF = Ity_F64;
7827 if (nLanes) *nLanes = 2;
7828 if (zeroUpper) *zeroUpper = False;
7829 if (arrSpec) *arrSpec = "2d";
7830 return True;
7832 if (bitQ && !bitSZ) { // 4x32
7833 if (tyI) *tyI = Ity_I32;
7834 if (tyF) *tyF = Ity_F32;
7835 if (nLanes) *nLanes = 4;
7836 if (zeroUpper) *zeroUpper = False;
7837 if (arrSpec) *arrSpec = "4s";
7838 return True;
7840 if (!bitQ && !bitSZ) { // 2x32
7841 if (tyI) *tyI = Ity_I32;
7842 if (tyF) *tyF = Ity_F32;
7843 if (nLanes) *nLanes = 2;
7844 if (zeroUpper) *zeroUpper = True;
7845 if (arrSpec) *arrSpec = "2s";
7846 return True;
7848 // Else impliedly 1x64, which isn't allowed.
7849 return False;
7852 /* Helper for decoding laneage for shift-style vector operations
7853 that involve an immediate shift amount. */
7854 static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2,
7855 UInt immh, UInt immb )
7857 vassert(immh < (1<<4));
7858 vassert(immb < (1<<3));
7859 UInt immhb = (immh << 3) | immb;
7860 if (immh & 8) {
7861 if (shift) *shift = 128 - immhb;
7862 if (szBlg2) *szBlg2 = 3;
7863 return True;
7865 if (immh & 4) {
7866 if (shift) *shift = 64 - immhb;
7867 if (szBlg2) *szBlg2 = 2;
7868 return True;
7870 if (immh & 2) {
7871 if (shift) *shift = 32 - immhb;
7872 if (szBlg2) *szBlg2 = 1;
7873 return True;
7875 if (immh & 1) {
7876 if (shift) *shift = 16 - immhb;
7877 if (szBlg2) *szBlg2 = 0;
7878 return True;
7880 return False;
7883 /* Generate IR to fold all lanes of the V128 value in 'src' as
7884 characterised by the operator 'op', and return the result in the
7885 bottom bits of a V128, with all other bits set to zero. */
7886 static IRTemp math_FOLDV ( IRTemp src, IROp op )
7888 /* The basic idea is to use repeated applications of Iop_CatEven*
7889 and Iop_CatOdd* operators to 'src' so as to clone each lane into
7890 a complete vector. Then fold all those vectors with 'op' and
7891 zero out all but the least significant lane. */
7892 switch (op) {
7893 case Iop_Min8Sx16: case Iop_Min8Ux16:
7894 case Iop_Max8Sx16: case Iop_Max8Ux16: case Iop_Add8x16: {
7895 /* NB: temp naming here is misleading -- the naming is for 8
7896 lanes of 16 bit, whereas what is being operated on is 16
7897 lanes of 8 bits. */
7898 IRTemp x76543210 = src;
7899 IRTemp x76547654 = newTempV128();
7900 IRTemp x32103210 = newTempV128();
7901 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
7902 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
7903 IRTemp x76767676 = newTempV128();
7904 IRTemp x54545454 = newTempV128();
7905 IRTemp x32323232 = newTempV128();
7906 IRTemp x10101010 = newTempV128();
7907 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
7908 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
7909 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
7910 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
7911 IRTemp x77777777 = newTempV128();
7912 IRTemp x66666666 = newTempV128();
7913 IRTemp x55555555 = newTempV128();
7914 IRTemp x44444444 = newTempV128();
7915 IRTemp x33333333 = newTempV128();
7916 IRTemp x22222222 = newTempV128();
7917 IRTemp x11111111 = newTempV128();
7918 IRTemp x00000000 = newTempV128();
7919 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
7920 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
7921 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
7922 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
7923 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
7924 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
7925 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
7926 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
7927 /* Naming not misleading after here. */
7928 IRTemp xAllF = newTempV128();
7929 IRTemp xAllE = newTempV128();
7930 IRTemp xAllD = newTempV128();
7931 IRTemp xAllC = newTempV128();
7932 IRTemp xAllB = newTempV128();
7933 IRTemp xAllA = newTempV128();
7934 IRTemp xAll9 = newTempV128();
7935 IRTemp xAll8 = newTempV128();
7936 IRTemp xAll7 = newTempV128();
7937 IRTemp xAll6 = newTempV128();
7938 IRTemp xAll5 = newTempV128();
7939 IRTemp xAll4 = newTempV128();
7940 IRTemp xAll3 = newTempV128();
7941 IRTemp xAll2 = newTempV128();
7942 IRTemp xAll1 = newTempV128();
7943 IRTemp xAll0 = newTempV128();
7944 assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777));
7945 assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777));
7946 assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666));
7947 assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666));
7948 assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555));
7949 assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555));
7950 assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444));
7951 assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444));
7952 assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333));
7953 assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333));
7954 assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222));
7955 assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222));
7956 assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111));
7957 assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111));
7958 assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000));
7959 assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000));
7960 IRTemp maxFE = newTempV128();
7961 IRTemp maxDC = newTempV128();
7962 IRTemp maxBA = newTempV128();
7963 IRTemp max98 = newTempV128();
7964 IRTemp max76 = newTempV128();
7965 IRTemp max54 = newTempV128();
7966 IRTemp max32 = newTempV128();
7967 IRTemp max10 = newTempV128();
7968 assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE)));
7969 assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC)));
7970 assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA)));
7971 assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8)));
7972 assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6)));
7973 assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4)));
7974 assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2)));
7975 assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0)));
7976 IRTemp maxFEDC = newTempV128();
7977 IRTemp maxBA98 = newTempV128();
7978 IRTemp max7654 = newTempV128();
7979 IRTemp max3210 = newTempV128();
7980 assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC)));
7981 assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98)));
7982 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
7983 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7984 IRTemp maxFEDCBA98 = newTempV128();
7985 IRTemp max76543210 = newTempV128();
7986 assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98)));
7987 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
7988 IRTemp maxAllLanes = newTempV128();
7989 assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98),
7990 mkexpr(max76543210)));
7991 IRTemp res = newTempV128();
7992 assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes)));
7993 return res;
7995 case Iop_Min16Sx8: case Iop_Min16Ux8:
7996 case Iop_Max16Sx8: case Iop_Max16Ux8: case Iop_Add16x8: {
7997 IRTemp x76543210 = src;
7998 IRTemp x76547654 = newTempV128();
7999 IRTemp x32103210 = newTempV128();
8000 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
8001 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
8002 IRTemp x76767676 = newTempV128();
8003 IRTemp x54545454 = newTempV128();
8004 IRTemp x32323232 = newTempV128();
8005 IRTemp x10101010 = newTempV128();
8006 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
8007 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
8008 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
8009 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
8010 IRTemp x77777777 = newTempV128();
8011 IRTemp x66666666 = newTempV128();
8012 IRTemp x55555555 = newTempV128();
8013 IRTemp x44444444 = newTempV128();
8014 IRTemp x33333333 = newTempV128();
8015 IRTemp x22222222 = newTempV128();
8016 IRTemp x11111111 = newTempV128();
8017 IRTemp x00000000 = newTempV128();
8018 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
8019 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
8020 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
8021 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
8022 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
8023 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
8024 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
8025 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
8026 IRTemp max76 = newTempV128();
8027 IRTemp max54 = newTempV128();
8028 IRTemp max32 = newTempV128();
8029 IRTemp max10 = newTempV128();
8030 assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
8031 assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
8032 assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
8033 assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
8034 IRTemp max7654 = newTempV128();
8035 IRTemp max3210 = newTempV128();
8036 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
8037 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
8038 IRTemp max76543210 = newTempV128();
8039 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
8040 IRTemp res = newTempV128();
8041 assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
8042 return res;
8044 case Iop_Max32Fx4: case Iop_Min32Fx4:
8045 case Iop_Min32Sx4: case Iop_Min32Ux4:
8046 case Iop_Max32Sx4: case Iop_Max32Ux4: case Iop_Add32x4: {
8047 IRTemp x3210 = src;
8048 IRTemp x3232 = newTempV128();
8049 IRTemp x1010 = newTempV128();
8050 assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
8051 assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
8052 IRTemp x3333 = newTempV128();
8053 IRTemp x2222 = newTempV128();
8054 IRTemp x1111 = newTempV128();
8055 IRTemp x0000 = newTempV128();
8056 assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
8057 assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
8058 assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
8059 assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
8060 IRTemp max32 = newTempV128();
8061 IRTemp max10 = newTempV128();
8062 assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
8063 assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
8064 IRTemp max3210 = newTempV128();
8065 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
8066 IRTemp res = newTempV128();
8067 assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
8068 return res;
8070 case Iop_Add64x2: {
8071 IRTemp x10 = src;
8072 IRTemp x00 = newTempV128();
8073 IRTemp x11 = newTempV128();
8074 assign(x11, binop(Iop_InterleaveHI64x2, mkexpr(x10), mkexpr(x10)));
8075 assign(x00, binop(Iop_InterleaveLO64x2, mkexpr(x10), mkexpr(x10)));
8076 IRTemp max10 = newTempV128();
8077 assign(max10, binop(op, mkexpr(x11), mkexpr(x00)));
8078 IRTemp res = newTempV128();
8079 assign(res, unop(Iop_ZeroHI64ofV128, mkexpr(max10)));
8080 return res;
8082 default:
8083 vassert(0);
8088 /* Generate IR for TBL and TBX. This deals with the 128 bit case
8089 only. */
8090 static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
8091 IRTemp oor_values )
8093 vassert(len >= 0 && len <= 3);
8095 /* Generate some useful constants as concisely as possible. */
8096 IRTemp half15 = newTemp(Ity_I64);
8097 assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
8098 IRTemp half16 = newTemp(Ity_I64);
8099 assign(half16, mkU64(0x1010101010101010ULL));
8101 /* A zero vector */
8102 IRTemp allZero = newTempV128();
8103 assign(allZero, mkV128(0x0000));
8104 /* A vector containing 15 in each 8-bit lane */
8105 IRTemp all15 = newTempV128();
8106 assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
8107 /* A vector containing 16 in each 8-bit lane */
8108 IRTemp all16 = newTempV128();
8109 assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
8110 /* A vector containing 32 in each 8-bit lane */
8111 IRTemp all32 = newTempV128();
8112 assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
8113 /* A vector containing 48 in each 8-bit lane */
8114 IRTemp all48 = newTempV128();
8115 assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
8116 /* A vector containing 64 in each 8-bit lane */
8117 IRTemp all64 = newTempV128();
8118 assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
8120 /* Group the 16/32/48/64 vectors so as to be indexable. */
8121 IRTemp allXX[4] = { all16, all32, all48, all64 };
8123 /* Compute the result for each table vector, with zeroes in places
8124 where the index values are out of range, and OR them into the
8125 running vector. */
8126 IRTemp running_result = newTempV128();
8127 assign(running_result, mkV128(0));
8129 UInt tabent;
8130 for (tabent = 0; tabent <= len; tabent++) {
8131 vassert(tabent >= 0 && tabent < 4);
8132 IRTemp bias = newTempV128();
8133 assign(bias,
8134 mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
8135 IRTemp biased_indices = newTempV128();
8136 assign(biased_indices,
8137 binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
8138 IRTemp valid_mask = newTempV128();
8139 assign(valid_mask,
8140 binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
8141 IRTemp safe_biased_indices = newTempV128();
8142 assign(safe_biased_indices,
8143 binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
8144 IRTemp results_or_junk = newTempV128();
8145 assign(results_or_junk,
8146 binop(Iop_Perm8x16, mkexpr(tab[tabent]),
8147 mkexpr(safe_biased_indices)));
8148 IRTemp results_or_zero = newTempV128();
8149 assign(results_or_zero,
8150 binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
8151 /* And OR that into the running result. */
8152 IRTemp tmp = newTempV128();
8153 assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
8154 mkexpr(running_result)));
8155 running_result = tmp;
8158 /* So now running_result holds the overall result where the indices
8159 are in range, and zero in out-of-range lanes. Now we need to
8160 compute an overall validity mask and use this to copy in the
8161 lanes in the oor_values for out of range indices. This is
8162 unnecessary for TBL but will get folded out by iropt, so we lean
8163 on that and generate the same code for TBL and TBX here. */
8164 IRTemp overall_valid_mask = newTempV128();
8165 assign(overall_valid_mask,
8166 binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
8167 IRTemp result = newTempV128();
8168 assign(result,
8169 binop(Iop_OrV128,
8170 mkexpr(running_result),
8171 binop(Iop_AndV128,
8172 mkexpr(oor_values),
8173 unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
8174 return result;
8178 /* Let |argL| and |argR| be V128 values, and let |opI64x2toV128| be
8179 an op which takes two I64s and produces a V128. That is, a widening
8180 operator. Generate IR which applies |opI64x2toV128| to either the
8181 lower (if |is2| is False) or upper (if |is2| is True) halves of
8182 |argL| and |argR|, and return the value in a new IRTemp.
8184 static
8185 IRTemp math_BINARY_WIDENING_V128 ( Bool is2, IROp opI64x2toV128,
8186 IRExpr* argL, IRExpr* argR )
8188 IRTemp res = newTempV128();
8189 IROp slice = is2 ? Iop_V128HIto64 : Iop_V128to64;
8190 assign(res, binop(opI64x2toV128, unop(slice, argL),
8191 unop(slice, argR)));
8192 return res;
8196 /* Generate signed/unsigned absolute difference vector IR. */
8197 static
8198 IRTemp math_ABD ( Bool isU, UInt size, IRExpr* argLE, IRExpr* argRE )
8200 vassert(size <= 3);
8201 IRTemp argL = newTempV128();
8202 IRTemp argR = newTempV128();
8203 IRTemp msk = newTempV128();
8204 IRTemp res = newTempV128();
8205 assign(argL, argLE);
8206 assign(argR, argRE);
8207 assign(msk, binop(isU ? mkVecCMPGTU(size) : mkVecCMPGTS(size),
8208 mkexpr(argL), mkexpr(argR)));
8209 assign(res,
8210 binop(Iop_OrV128,
8211 binop(Iop_AndV128,
8212 binop(mkVecSUB(size), mkexpr(argL), mkexpr(argR)),
8213 mkexpr(msk)),
8214 binop(Iop_AndV128,
8215 binop(mkVecSUB(size), mkexpr(argR), mkexpr(argL)),
8216 unop(Iop_NotV128, mkexpr(msk)))));
8217 return res;
8221 /* Generate IR that takes a V128 and sign- or zero-widens
8222 either the lower or upper set of lanes to twice-as-wide,
8223 resulting in a new V128 value. */
8224 static
8225 IRTemp math_WIDEN_LO_OR_HI_LANES ( Bool zWiden, Bool fromUpperHalf,
8226 UInt sizeNarrow, IRExpr* srcE )
8228 IRTemp src = newTempV128();
8229 IRTemp res = newTempV128();
8230 assign(src, srcE);
8231 switch (sizeNarrow) {
8232 case X10:
8233 assign(res,
8234 binop(zWiden ? Iop_ShrN64x2 : Iop_SarN64x2,
8235 binop(fromUpperHalf ? Iop_InterleaveHI32x4
8236 : Iop_InterleaveLO32x4,
8237 mkexpr(src),
8238 mkexpr(src)),
8239 mkU8(32)));
8240 break;
8241 case X01:
8242 assign(res,
8243 binop(zWiden ? Iop_ShrN32x4 : Iop_SarN32x4,
8244 binop(fromUpperHalf ? Iop_InterleaveHI16x8
8245 : Iop_InterleaveLO16x8,
8246 mkexpr(src),
8247 mkexpr(src)),
8248 mkU8(16)));
8249 break;
8250 case X00:
8251 assign(res,
8252 binop(zWiden ? Iop_ShrN16x8 : Iop_SarN16x8,
8253 binop(fromUpperHalf ? Iop_InterleaveHI8x16
8254 : Iop_InterleaveLO8x16,
8255 mkexpr(src),
8256 mkexpr(src)),
8257 mkU8(8)));
8258 break;
8259 default:
8260 vassert(0);
8262 return res;
8266 /* Generate IR that takes a V128 and sign- or zero-widens
8267 either the even or odd lanes to twice-as-wide,
8268 resulting in a new V128 value. */
8269 static
8270 IRTemp math_WIDEN_EVEN_OR_ODD_LANES ( Bool zWiden, Bool fromOdd,
8271 UInt sizeNarrow, IRExpr* srcE )
8273 IRTemp src = newTempV128();
8274 IRTemp res = newTempV128();
8275 IROp opSAR = mkVecSARN(sizeNarrow+1);
8276 IROp opSHR = mkVecSHRN(sizeNarrow+1);
8277 IROp opSHL = mkVecSHLN(sizeNarrow+1);
8278 IROp opSxR = zWiden ? opSHR : opSAR;
8279 UInt amt = 0;
8280 switch (sizeNarrow) {
8281 case X10: amt = 32; break;
8282 case X01: amt = 16; break;
8283 case X00: amt = 8; break;
8284 default: vassert(0);
8286 assign(src, srcE);
8287 if (fromOdd) {
8288 assign(res, binop(opSxR, mkexpr(src), mkU8(amt)));
8289 } else {
8290 assign(res, binop(opSxR, binop(opSHL, mkexpr(src), mkU8(amt)),
8291 mkU8(amt)));
8293 return res;
8297 /* Generate IR that takes two V128s and narrows (takes lower half)
8298 of each lane, producing a single V128 value. */
8299 static
8300 IRTemp math_NARROW_LANES ( IRTemp argHi, IRTemp argLo, UInt sizeNarrow )
8302 IRTemp res = newTempV128();
8303 assign(res, binop(mkVecCATEVENLANES(sizeNarrow),
8304 mkexpr(argHi), mkexpr(argLo)));
8305 return res;
8309 /* Return a temp which holds the vector dup of the lane of width
8310 (1 << size) obtained from src[laneNo]. */
8311 static
8312 IRTemp math_DUP_VEC_ELEM ( IRExpr* src, UInt size, UInt laneNo )
8314 vassert(size <= 3);
8315 /* Normalise |laneNo| so it is of the form
8316 x000 for D, xx00 for S, xxx0 for H, and xxxx for B.
8317 This puts the bits we want to inspect at constant offsets
8318 regardless of the value of |size|.
8320 UInt ix = laneNo << size;
8321 vassert(ix <= 15);
8322 IROp ops[4] = { Iop_INVALID, Iop_INVALID, Iop_INVALID, Iop_INVALID };
8323 switch (size) {
8324 case 0: /* B */
8325 ops[0] = (ix & 1) ? Iop_CatOddLanes8x16 : Iop_CatEvenLanes8x16;
8326 /* fallthrough */
8327 case 1: /* H */
8328 ops[1] = (ix & 2) ? Iop_CatOddLanes16x8 : Iop_CatEvenLanes16x8;
8329 /* fallthrough */
8330 case 2: /* S */
8331 ops[2] = (ix & 4) ? Iop_CatOddLanes32x4 : Iop_CatEvenLanes32x4;
8332 /* fallthrough */
8333 case 3: /* D */
8334 ops[3] = (ix & 8) ? Iop_InterleaveHI64x2 : Iop_InterleaveLO64x2;
8335 break;
8336 default:
8337 vassert(0);
8339 IRTemp res = newTempV128();
8340 assign(res, src);
8341 Int i;
8342 for (i = 3; i >= 0; i--) {
8343 if (ops[i] == Iop_INVALID)
8344 break;
8345 IRTemp tmp = newTempV128();
8346 assign(tmp, binop(ops[i], mkexpr(res), mkexpr(res)));
8347 res = tmp;
8349 return res;
8353 /* Let |srcV| be a V128 value, and let |imm5| be a lane-and-size
8354 selector encoded as shown below. Return a new V128 holding the
8355 selected lane from |srcV| dup'd out to V128, and also return the
8356 lane number, log2 of the lane size in bytes, and width-character via
8357 *laneNo, *laneSzLg2 and *laneCh respectively. It may be that imm5
8358 is an invalid selector, in which case return
8359 IRTemp_INVALID, 0, 0 and '?' respectively.
8361 imm5 = xxxx1 signifies .b[xxxx]
8362 = xxx10 .h[xxx]
8363 = xx100 .s[xx]
8364 = x1000 .d[x]
8365 otherwise invalid
8367 static
8368 IRTemp handle_DUP_VEC_ELEM ( /*OUT*/UInt* laneNo,
8369 /*OUT*/UInt* laneSzLg2, /*OUT*/HChar* laneCh,
8370 IRExpr* srcV, UInt imm5 )
8372 *laneNo = 0;
8373 *laneSzLg2 = 0;
8374 *laneCh = '?';
8376 if (imm5 & 1) {
8377 *laneNo = (imm5 >> 1) & 15;
8378 *laneSzLg2 = 0;
8379 *laneCh = 'b';
8381 else if (imm5 & 2) {
8382 *laneNo = (imm5 >> 2) & 7;
8383 *laneSzLg2 = 1;
8384 *laneCh = 'h';
8386 else if (imm5 & 4) {
8387 *laneNo = (imm5 >> 3) & 3;
8388 *laneSzLg2 = 2;
8389 *laneCh = 's';
8391 else if (imm5 & 8) {
8392 *laneNo = (imm5 >> 4) & 1;
8393 *laneSzLg2 = 3;
8394 *laneCh = 'd';
8396 else {
8397 /* invalid */
8398 return IRTemp_INVALID;
8401 return math_DUP_VEC_ELEM(srcV, *laneSzLg2, *laneNo);
8405 /* Clone |imm| to every lane of a V128, with lane size log2 of |size|. */
8406 static
8407 IRTemp math_VEC_DUP_IMM ( UInt size, ULong imm )
8409 IRType ty = Ity_INVALID;
8410 IRTemp rcS = IRTemp_INVALID;
8411 switch (size) {
8412 case X01:
8413 vassert(imm <= 0xFFFFULL);
8414 ty = Ity_I16;
8415 rcS = newTemp(ty); assign(rcS, mkU16( (UShort)imm ));
8416 break;
8417 case X10:
8418 vassert(imm <= 0xFFFFFFFFULL);
8419 ty = Ity_I32;
8420 rcS = newTemp(ty); assign(rcS, mkU32( (UInt)imm ));
8421 break;
8422 case X11:
8423 ty = Ity_I64;
8424 rcS = newTemp(ty); assign(rcS, mkU64(imm)); break;
8425 default:
8426 vassert(0);
8428 IRTemp rcV = math_DUP_TO_V128(rcS, ty);
8429 return rcV;
8433 /* Let |new64| be a V128 in which only the lower 64 bits are interesting,
8434 and the upper can contain any value -- it is ignored. If |is2| is False,
8435 generate IR to put |new64| in the lower half of vector reg |dd| and zero
8436 the upper half. If |is2| is True, generate IR to put |new64| in the upper
8437 half of vector reg |dd| and leave the lower half unchanged. This
8438 simulates the behaviour of the "foo/foo2" instructions in which the
8439 destination is half the width of sources, for example addhn/addhn2.
8441 static
8442 void putLO64andZUorPutHI64 ( Bool is2, UInt dd, IRTemp new64 )
8444 if (is2) {
8445 /* Get the old contents of Vdd, zero the upper half, and replace
8446 it with 'x'. */
8447 IRTemp t_zero_oldLO = newTempV128();
8448 assign(t_zero_oldLO, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
8449 IRTemp t_newHI_zero = newTempV128();
8450 assign(t_newHI_zero, binop(Iop_InterleaveLO64x2, mkexpr(new64),
8451 mkV128(0x0000)));
8452 IRTemp res = newTempV128();
8453 assign(res, binop(Iop_OrV128, mkexpr(t_zero_oldLO),
8454 mkexpr(t_newHI_zero)));
8455 putQReg128(dd, mkexpr(res));
8456 } else {
8457 /* This is simple. */
8458 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(new64)));
8463 /* Compute vector SQABS at lane size |size| for |srcE|, returning
8464 the q result in |*qabs| and the normal result in |*nabs|. */
8465 static
8466 void math_SQABS ( /*OUT*/IRTemp* qabs, /*OUT*/IRTemp* nabs,
8467 IRExpr* srcE, UInt size )
8469 IRTemp src, mask, maskn, nsub, qsub;
8470 src = mask = maskn = nsub = qsub = IRTemp_INVALID;
8471 newTempsV128_7(&src, &mask, &maskn, &nsub, &qsub, nabs, qabs);
8472 assign(src, srcE);
8473 assign(mask, binop(mkVecCMPGTS(size), mkV128(0x0000), mkexpr(src)));
8474 assign(maskn, unop(Iop_NotV128, mkexpr(mask)));
8475 assign(nsub, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
8476 assign(qsub, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
8477 assign(*nabs, binop(Iop_OrV128,
8478 binop(Iop_AndV128, mkexpr(nsub), mkexpr(mask)),
8479 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
8480 assign(*qabs, binop(Iop_OrV128,
8481 binop(Iop_AndV128, mkexpr(qsub), mkexpr(mask)),
8482 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
8486 /* Compute vector SQNEG at lane size |size| for |srcE|, returning
8487 the q result in |*qneg| and the normal result in |*nneg|. */
8488 static
8489 void math_SQNEG ( /*OUT*/IRTemp* qneg, /*OUT*/IRTemp* nneg,
8490 IRExpr* srcE, UInt size )
8492 IRTemp src = IRTemp_INVALID;
8493 newTempsV128_3(&src, nneg, qneg);
8494 assign(src, srcE);
8495 assign(*nneg, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
8496 assign(*qneg, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
8500 /* Zero all except the least significant lane of |srcE|, where |size|
8501 indicates the lane size in the usual way. */
8502 static IRTemp math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size, IRExpr* srcE )
8504 vassert(size < 4);
8505 IRTemp t = newTempV128();
8506 assign(t, unop(mkVecZEROHIxxOFV128(size), srcE));
8507 return t;
8511 /* Generate IR to compute vector widening MULL from either the lower
8512 (is2==False) or upper (is2==True) halves of vecN and vecM. The
8513 widening multiplies are unsigned when isU==True and signed when
8514 isU==False. |size| is the narrow lane size indication. Optionally,
8515 the product may be added to or subtracted from vecD, at the wide lane
8516 size. This happens when |mas| is 'a' (add) or 's' (sub). When |mas|
8517 is 'm' (only multiply) then the accumulate part does not happen, and
8518 |vecD| is expected to == IRTemp_INVALID.
8520 Only size==0 (h_b_b), size==1 (s_h_h) and size==2 (d_s_s) variants
8521 are allowed. The result is returned in a new IRTemp, which is
8522 returned in *res. */
8523 static
8524 void math_MULL_ACC ( /*OUT*/IRTemp* res,
8525 Bool is2, Bool isU, UInt size, HChar mas,
8526 IRTemp vecN, IRTemp vecM, IRTemp vecD )
8528 vassert(res && *res == IRTemp_INVALID);
8529 vassert(size <= 2);
8530 vassert(mas == 'm' || mas == 'a' || mas == 's');
8531 if (mas == 'm') vassert(vecD == IRTemp_INVALID);
8532 IROp mulOp = isU ? mkVecMULLU(size) : mkVecMULLS(size);
8533 IROp accOp = (mas == 'a') ? mkVecADD(size+1)
8534 : (mas == 's' ? mkVecSUB(size+1)
8535 : Iop_INVALID);
8536 IRTemp mul = math_BINARY_WIDENING_V128(is2, mulOp,
8537 mkexpr(vecN), mkexpr(vecM));
8538 *res = newTempV128();
8539 assign(*res, mas == 'm' ? mkexpr(mul)
8540 : binop(accOp, mkexpr(vecD), mkexpr(mul)));
8544 /* Same as math_MULL_ACC, except the multiply is signed widening,
8545 the multiplied value is then doubled, before being added to or
8546 subtracted from the accumulated value. And everything is
8547 saturated. In all cases, saturation residuals are returned
8548 via (sat1q, sat1n), and in the accumulate cases,
8549 via (sat2q, sat2n) too. All results are returned in new temporaries.
8550 In the no-accumulate case, *sat2q and *sat2n are never instantiated,
8551 so the caller can tell this has happened. */
8552 static
8553 void math_SQDMULL_ACC ( /*OUT*/IRTemp* res,
8554 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
8555 /*OUT*/IRTemp* sat2q, /*OUT*/IRTemp* sat2n,
8556 Bool is2, UInt size, HChar mas,
8557 IRTemp vecN, IRTemp vecM, IRTemp vecD )
8559 vassert(size <= 2);
8560 vassert(mas == 'm' || mas == 'a' || mas == 's');
8561 /* Compute
8562 sat1q = vecN.D[is2] *sq vecM.d[is2] *q 2
8563 sat1n = vecN.D[is2] *s vecM.d[is2] * 2
8564 IOW take either the low or high halves of vecN and vecM, signed widen,
8565 multiply, double that, and signedly saturate. Also compute the same
8566 but without saturation.
8568 vassert(sat2q && *sat2q == IRTemp_INVALID);
8569 vassert(sat2n && *sat2n == IRTemp_INVALID);
8570 newTempsV128_3(sat1q, sat1n, res);
8571 IRTemp tq = math_BINARY_WIDENING_V128(is2, mkVecQDMULLS(size),
8572 mkexpr(vecN), mkexpr(vecM));
8573 IRTemp tn = math_BINARY_WIDENING_V128(is2, mkVecMULLS(size),
8574 mkexpr(vecN), mkexpr(vecM));
8575 assign(*sat1q, mkexpr(tq));
8576 assign(*sat1n, binop(mkVecADD(size+1), mkexpr(tn), mkexpr(tn)));
8578 /* If there is no accumulation, the final result is sat1q,
8579 and there's no assignment to sat2q or sat2n. */
8580 if (mas == 'm') {
8581 assign(*res, mkexpr(*sat1q));
8582 return;
8585 /* Compute
8586 sat2q = vecD +sq/-sq sat1q
8587 sat2n = vecD +/- sat1n
8588 result = sat2q
8590 newTempsV128_2(sat2q, sat2n);
8591 assign(*sat2q, binop(mas == 'a' ? mkVecQADDS(size+1) : mkVecQSUBS(size+1),
8592 mkexpr(vecD), mkexpr(*sat1q)));
8593 assign(*sat2n, binop(mas == 'a' ? mkVecADD(size+1) : mkVecSUB(size+1),
8594 mkexpr(vecD), mkexpr(*sat1n)));
8595 assign(*res, mkexpr(*sat2q));
8599 /* Generate IR for widening signed vector multiplies. The operands
8600 have their lane width signedly widened, and they are then multiplied
8601 at the wider width, returning results in two new IRTemps. */
8602 static
8603 void math_MULLS ( /*OUT*/IRTemp* resHI, /*OUT*/IRTemp* resLO,
8604 UInt sizeNarrow, IRTemp argL, IRTemp argR )
8606 vassert(sizeNarrow <= 2);
8607 newTempsV128_2(resHI, resLO);
8608 IRTemp argLhi = newTemp(Ity_I64);
8609 IRTemp argLlo = newTemp(Ity_I64);
8610 IRTemp argRhi = newTemp(Ity_I64);
8611 IRTemp argRlo = newTemp(Ity_I64);
8612 assign(argLhi, unop(Iop_V128HIto64, mkexpr(argL)));
8613 assign(argLlo, unop(Iop_V128to64, mkexpr(argL)));
8614 assign(argRhi, unop(Iop_V128HIto64, mkexpr(argR)));
8615 assign(argRlo, unop(Iop_V128to64, mkexpr(argR)));
8616 IROp opMulls = mkVecMULLS(sizeNarrow);
8617 assign(*resHI, binop(opMulls, mkexpr(argLhi), mkexpr(argRhi)));
8618 assign(*resLO, binop(opMulls, mkexpr(argLlo), mkexpr(argRlo)));
8622 /* Generate IR for SQDMULH and SQRDMULH: signedly wideningly multiply,
8623 double that, possibly add a rounding constant (R variants), and take
8624 the high half. */
8625 static
8626 void math_SQDMULH ( /*OUT*/IRTemp* res,
8627 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
8628 Bool isR, UInt size, IRTemp vN, IRTemp vM )
8630 vassert(size == X01 || size == X10); /* s or h only */
8632 newTempsV128_3(res, sat1q, sat1n);
8634 IRTemp mullsHI = IRTemp_INVALID, mullsLO = IRTemp_INVALID;
8635 math_MULLS(&mullsHI, &mullsLO, size, vN, vM);
8637 IRTemp addWide = mkVecADD(size+1);
8639 if (isR) {
8640 assign(*sat1q, binop(mkVecQRDMULHIS(size), mkexpr(vN), mkexpr(vM)));
8642 Int rcShift = size == X01 ? 15 : 31;
8643 IRTemp roundConst = math_VEC_DUP_IMM(size+1, 1ULL << rcShift);
8644 assign(*sat1n,
8645 binop(mkVecCATODDLANES(size),
8646 binop(addWide,
8647 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
8648 mkexpr(roundConst)),
8649 binop(addWide,
8650 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO)),
8651 mkexpr(roundConst))));
8652 } else {
8653 assign(*sat1q, binop(mkVecQDMULHIS(size), mkexpr(vN), mkexpr(vM)));
8655 assign(*sat1n,
8656 binop(mkVecCATODDLANES(size),
8657 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
8658 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO))));
8661 assign(*res, mkexpr(*sat1q));
8664 /* Generate IR for SQRDMLAH and SQRDMLSH: signedly wideningly multiply,
8665 double, add a rounding constant, take the high half and accumulate. */
8666 static
8667 void math_SQRDMLAH ( /*OUT*/IRTemp* res, /*OUT*/IRTemp* res_nosat, Bool isAdd,
8668 UInt size, IRTemp vD, IRTemp vN, IRTemp vM )
8670 vassert(size == X01 || size == X10); /* s or h only */
8672 /* SQRDMLAH = SQADD(A, SQRDMULH(B, C)) */
8674 IRTemp mul, mul_nosat, dummy;
8675 mul = mul_nosat = dummy = IRTemp_INVALID;
8676 math_SQDMULH(&mul, &dummy, &mul_nosat, True/*R*/, size, vN, vM);
8678 IROp op = isAdd ? mkVecADD(size) : mkVecSUB(size);
8679 IROp qop = isAdd ? mkVecQADDS(size) : mkVecQSUBS(size);
8680 newTempsV128_2(res, res_nosat);
8681 assign(*res, binop(qop, mkexpr(vD), mkexpr(mul)));
8682 assign(*res_nosat, binop(op, mkexpr(vD), mkexpr(mul_nosat)));
8686 /* Generate IR for SQSHL, UQSHL, SQSHLU by imm. Put the result in
8687 a new temp in *res, and the Q difference pair in new temps in
8688 *qDiff1 and *qDiff2 respectively. |nm| denotes which of the
8689 three operations it is. */
8690 static
8691 void math_QSHL_IMM ( /*OUT*/IRTemp* res,
8692 /*OUT*/IRTemp* qDiff1, /*OUT*/IRTemp* qDiff2,
8693 IRTemp src, UInt size, UInt shift, const HChar* nm )
8695 vassert(size <= 3);
8696 UInt laneBits = 8 << size;
8697 vassert(shift < laneBits);
8698 newTempsV128_3(res, qDiff1, qDiff2);
8699 IRTemp z128 = newTempV128();
8700 assign(z128, mkV128(0x0000));
8702 /* UQSHL */
8703 if (vex_streq(nm, "uqshl")) {
8704 IROp qop = mkVecQSHLNSATUU(size);
8705 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
8706 if (shift == 0) {
8707 /* No shift means no saturation. */
8708 assign(*qDiff1, mkexpr(z128));
8709 assign(*qDiff2, mkexpr(z128));
8710 } else {
8711 /* Saturation has occurred if any of the shifted-out bits are
8712 nonzero. We get the shifted-out bits by right-shifting the
8713 original value. */
8714 UInt rshift = laneBits - shift;
8715 vassert(rshift >= 1 && rshift < laneBits);
8716 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
8717 assign(*qDiff2, mkexpr(z128));
8719 return;
8722 /* SQSHL */
8723 if (vex_streq(nm, "sqshl")) {
8724 IROp qop = mkVecQSHLNSATSS(size);
8725 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
8726 if (shift == 0) {
8727 /* No shift means no saturation. */
8728 assign(*qDiff1, mkexpr(z128));
8729 assign(*qDiff2, mkexpr(z128));
8730 } else {
8731 /* Saturation has occurred if any of the shifted-out bits are
8732 different from the top bit of the original value. */
8733 UInt rshift = laneBits - 1 - shift;
8734 vassert(rshift >= 0 && rshift < laneBits-1);
8735 /* qDiff1 is the shifted out bits, and the top bit of the original
8736 value, preceded by zeroes. */
8737 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
8738 /* qDiff2 is the top bit of the original value, cloned the
8739 correct number of times. */
8740 assign(*qDiff2, binop(mkVecSHRN(size),
8741 binop(mkVecSARN(size), mkexpr(src),
8742 mkU8(laneBits-1)),
8743 mkU8(rshift)));
8744 /* This also succeeds in comparing the top bit of the original
8745 value to itself, which is a bit stupid, but not wrong. */
8747 return;
8750 /* SQSHLU */
8751 if (vex_streq(nm, "sqshlu")) {
8752 IROp qop = mkVecQSHLNSATSU(size);
8753 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
8754 if (shift == 0) {
8755 /* If there's no shift, saturation depends on the top bit
8756 of the source. */
8757 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(laneBits-1)));
8758 assign(*qDiff2, mkexpr(z128));
8759 } else {
8760 /* Saturation has occurred if any of the shifted-out bits are
8761 nonzero. We get the shifted-out bits by right-shifting the
8762 original value. */
8763 UInt rshift = laneBits - shift;
8764 vassert(rshift >= 1 && rshift < laneBits);
8765 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
8766 assign(*qDiff2, mkexpr(z128));
8768 return;
8771 vassert(0);
8775 /* Generate IR to do SRHADD and URHADD. */
8776 static
8777 IRTemp math_RHADD ( UInt size, Bool isU, IRTemp aa, IRTemp bb )
8779 /* Generate this:
8780 (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1)
8782 vassert(size <= 3);
8783 IROp opSHR = isU ? mkVecSHRN(size) : mkVecSARN(size);
8784 IROp opADD = mkVecADD(size);
8785 /* The only tricky bit is to generate the correct vector 1 constant. */
8786 const ULong ones64[4]
8787 = { 0x0101010101010101ULL, 0x0001000100010001ULL,
8788 0x0000000100000001ULL, 0x0000000000000001ULL };
8789 IRTemp imm64 = newTemp(Ity_I64);
8790 assign(imm64, mkU64(ones64[size]));
8791 IRTemp vecOne = newTempV128();
8792 assign(vecOne, binop(Iop_64HLtoV128, mkexpr(imm64), mkexpr(imm64)));
8793 IRTemp scaOne = newTemp(Ity_I8);
8794 assign(scaOne, mkU8(1));
8795 IRTemp res = newTempV128();
8796 assign(res,
8797 binop(opADD,
8798 binop(opSHR, mkexpr(aa), mkexpr(scaOne)),
8799 binop(opADD,
8800 binop(opSHR, mkexpr(bb), mkexpr(scaOne)),
8801 binop(opSHR,
8802 binop(opADD,
8803 binop(opADD,
8804 binop(Iop_AndV128, mkexpr(aa),
8805 mkexpr(vecOne)),
8806 binop(Iop_AndV128, mkexpr(bb),
8807 mkexpr(vecOne))
8809 mkexpr(vecOne)
8811 mkexpr(scaOne)
8816 return res;
8820 /* QCFLAG tracks the SIMD sticky saturation status. Update the status
8821 thusly: if, after application of |opZHI| to both |qres| and |nres|,
8822 they have the same value, leave QCFLAG unchanged. Otherwise, set it
8823 (implicitly) to 1. |opZHI| may only be one of the Iop_ZeroHIxxofV128
8824 operators, or Iop_INVALID, in which case |qres| and |nres| are used
8825 unmodified. The presence |opZHI| means this function can be used to
8826 generate QCFLAG update code for both scalar and vector SIMD operations.
8828 static
8829 void updateQCFLAGwithDifferenceZHI ( IRTemp qres, IRTemp nres, IROp opZHI )
8831 IRTemp diff = newTempV128();
8832 IRTemp oldQCFLAG = newTempV128();
8833 IRTemp newQCFLAG = newTempV128();
8834 if (opZHI == Iop_INVALID) {
8835 assign(diff, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres)));
8836 } else {
8837 vassert(opZHI == Iop_ZeroHI64ofV128
8838 || opZHI == Iop_ZeroHI96ofV128 || opZHI == Iop_ZeroHI112ofV128);
8839 assign(diff, unop(opZHI, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres))));
8841 assign(oldQCFLAG, IRExpr_Get(OFFB_QCFLAG, Ity_V128));
8842 assign(newQCFLAG, binop(Iop_OrV128, mkexpr(oldQCFLAG), mkexpr(diff)));
8843 stmt(IRStmt_Put(OFFB_QCFLAG, mkexpr(newQCFLAG)));
8847 /* A variant of updateQCFLAGwithDifferenceZHI in which |qres| and |nres|
8848 are used unmodified, hence suitable for QCFLAG updates for whole-vector
8849 operations. */
8850 static
8851 void updateQCFLAGwithDifference ( IRTemp qres, IRTemp nres )
8853 updateQCFLAGwithDifferenceZHI(qres, nres, Iop_INVALID);
8857 /* Generate IR to rearrange two vector values in a way which is useful
8858 for doing S/D/H add-pair etc operations. There are 5 cases:
8860 2d: [m1 m0] [n1 n0] --> [m1 n1] [m0 n0]
8862 4s: [m3 m2 m1 m0] [n3 n2 n1 n0] --> [m3 m1 n3 n1] [m2 m0 n2 n0]
8864 8h: [m7 m6 m5 m4 m3 m2 m1 m0] [n7 n6 n5 n4 n3 n2 n1 n0] -->
8865 [m7 m5 n7 n5 m3 m1 n3 n1] [m6 m4 n6 n4 m2 m0 n2 n0]
8867 2s: [m3 m2 m1 m0] [n3 n2 n1 n0] --> [0 0 m1 n1] [0 0 m0 n0]
8869 4h: [m7 m6 m5 m4 m3 m2 m1 m0] [n7 n6 n5 n4 n3 n2 n1 n0] -->
8870 [ 0 0 0 0 m3 m1 n3 n1] [ 0 0 0 0 m2 m0 n2 n0]
8872 static
8873 void math_REARRANGE_FOR_FLOATING_PAIRWISE (
8874 /*OUT*/IRTemp* rearrL, /*OUT*/IRTemp* rearrR,
8875 IRTemp vecM, IRTemp vecN, ARM64VecESize sz, UInt bitQ
8878 vassert(rearrL && *rearrL == IRTemp_INVALID);
8879 vassert(rearrR && *rearrR == IRTemp_INVALID);
8880 *rearrL = newTempV128();
8881 *rearrR = newTempV128();
8883 switch (sz) {
8884 case ARM64VSizeD:
8885 // 2d case
8886 vassert(bitQ == 1);
8887 assign(*rearrL, binop(Iop_InterleaveHI64x2, mkexpr(vecM), mkexpr(vecN)));
8888 assign(*rearrR, binop(Iop_InterleaveLO64x2, mkexpr(vecM), mkexpr(vecN)));
8889 break;
8891 case ARM64VSizeS:
8892 if (bitQ == 1) {
8893 // 4s case
8894 assign(*rearrL, binop(Iop_CatOddLanes32x4, mkexpr(vecM), mkexpr(vecN)));
8895 assign(*rearrR, binop(Iop_CatEvenLanes32x4, mkexpr(vecM), mkexpr(vecN)));
8896 } else {
8897 // 2s case
8898 IRTemp m1n1m0n0 = newTempV128();
8899 IRTemp m0n0m1n1 = newTempV128();
8900 assign(m1n1m0n0, binop(Iop_InterleaveLO32x4,
8901 mkexpr(vecM), mkexpr(vecN)));
8902 assign(m0n0m1n1, triop(Iop_SliceV128,
8903 mkexpr(m1n1m0n0), mkexpr(m1n1m0n0), mkU8(8)));
8904 assign(*rearrL, unop(Iop_ZeroHI64ofV128, mkexpr(m1n1m0n0)));
8905 assign(*rearrR, unop(Iop_ZeroHI64ofV128, mkexpr(m0n0m1n1)));
8907 break;
8909 case ARM64VSizeH:
8910 if (bitQ == 1) {
8911 // 8h case
8912 assign(*rearrL, binop(Iop_CatOddLanes16x8, mkexpr(vecM), mkexpr(vecN)));
8913 assign(*rearrR, binop(Iop_CatEvenLanes16x8, mkexpr(vecM), mkexpr(vecN)));
8914 } else {
8915 // 4h case
8916 IRTemp m3m1n3n1 = newTempV128();
8917 IRTemp m2m0n2n0 = newTempV128();
8918 assign(m3m1n3n1, binop(Iop_CatOddLanes16x8, mkexpr(vecM), mkexpr(vecN)));
8919 assign(m2m0n2n0, binop(Iop_CatEvenLanes16x8, mkexpr(vecM), mkexpr(vecN)));
8920 assign(*rearrL, unop(Iop_ZeroHI64ofV128,
8921 binop(Iop_CatEvenLanes32x4, mkexpr(m3m1n3n1),
8922 mkexpr(m3m1n3n1))));
8923 assign(*rearrR, unop(Iop_ZeroHI64ofV128,
8924 binop(Iop_CatEvenLanes32x4, mkexpr(m2m0n2n0),
8925 mkexpr(m2m0n2n0))));
8927 break;
8929 default: vpanic("math_REARRANGE_FOR_FLOATING_PAIRWISE");
8934 /* Returns 2.0 ^ (-n) for n in 1 .. 64 */
8935 static Double two_to_the_minus ( Int n )
8937 if (n == 1) return 0.5;
8938 vassert(n >= 2 && n <= 64);
8939 Int half = n / 2;
8940 return two_to_the_minus(half) * two_to_the_minus(n - half);
8944 /* Returns 2.0 ^ n for n in 1 .. 64 */
8945 static Double two_to_the_plus ( Int n )
8947 if (n == 1) return 2.0;
8948 vassert(n >= 2 && n <= 64);
8949 Int half = n / 2;
8950 return two_to_the_plus(half) * two_to_the_plus(n - half);
8954 /*------------------------------------------------------------*/
8955 /*--- SIMD and FP instructions ---*/
8956 /*------------------------------------------------------------*/
8958 static
8959 Bool dis_AdvSIMD_EXT(/*MB_OUT*/DisResult* dres, UInt insn)
8961 /* 31 29 23 21 20 15 14 10 9 4
8962 0 q 101110 op2 0 m 0 imm4 0 n d
8963 Decode fields: op2
8965 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8966 if (INSN(31,31) != 0
8967 || INSN(29,24) != BITS6(1,0,1,1,1,0)
8968 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) {
8969 return False;
8971 UInt bitQ = INSN(30,30);
8972 UInt op2 = INSN(23,22);
8973 UInt mm = INSN(20,16);
8974 UInt imm4 = INSN(14,11);
8975 UInt nn = INSN(9,5);
8976 UInt dd = INSN(4,0);
8978 if (op2 == BITS2(0,0)) {
8979 /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */
8980 IRTemp sHi = newTempV128();
8981 IRTemp sLo = newTempV128();
8982 IRTemp res = newTempV128();
8983 assign(sHi, getQReg128(mm));
8984 assign(sLo, getQReg128(nn));
8985 if (bitQ == 1) {
8986 if (imm4 == 0) {
8987 assign(res, mkexpr(sLo));
8988 } else {
8989 vassert(imm4 >= 1 && imm4 <= 15);
8990 assign(res, triop(Iop_SliceV128,
8991 mkexpr(sHi), mkexpr(sLo), mkU8(imm4)));
8993 putQReg128(dd, mkexpr(res));
8994 DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4);
8995 } else {
8996 if (imm4 >= 8) return False;
8997 if (imm4 == 0) {
8998 assign(res, mkexpr(sLo));
8999 } else {
9000 vassert(imm4 >= 1 && imm4 <= 7);
9001 IRTemp hi64lo64 = newTempV128();
9002 assign(hi64lo64, binop(Iop_InterleaveLO64x2,
9003 mkexpr(sHi), mkexpr(sLo)));
9004 assign(res, triop(Iop_SliceV128,
9005 mkexpr(hi64lo64), mkexpr(hi64lo64), mkU8(imm4)));
9007 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9008 DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4);
9010 return True;
9013 return False;
9014 # undef INSN
9018 static
9019 Bool dis_AdvSIMD_TBL_TBX(/*MB_OUT*/DisResult* dres, UInt insn)
9021 /* 31 29 23 21 20 15 14 12 11 9 4
9022 0 q 001110 op2 0 m 0 len op 00 n d
9023 Decode fields: op2,len,op
9025 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9026 if (INSN(31,31) != 0
9027 || INSN(29,24) != BITS6(0,0,1,1,1,0)
9028 || INSN(21,21) != 0
9029 || INSN(15,15) != 0
9030 || INSN(11,10) != BITS2(0,0)) {
9031 return False;
9033 UInt bitQ = INSN(30,30);
9034 UInt op2 = INSN(23,22);
9035 UInt mm = INSN(20,16);
9036 UInt len = INSN(14,13);
9037 UInt bitOP = INSN(12,12);
9038 UInt nn = INSN(9,5);
9039 UInt dd = INSN(4,0);
9041 if (op2 == X00) {
9042 /* -------- 00,xx,0 TBL, xx register table -------- */
9043 /* -------- 00,xx,1 TBX, xx register table -------- */
9044 /* 31 28 20 15 14 12 9 4
9045 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
9046 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
9047 where Ta = 16b(q=1) or 8b(q=0)
9049 Bool isTBX = bitOP == 1;
9050 /* The out-of-range values to use. */
9051 IRTemp oor_values = newTempV128();
9052 assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
9053 /* src value */
9054 IRTemp src = newTempV128();
9055 assign(src, getQReg128(mm));
9056 /* The table values */
9057 IRTemp tab[4];
9058 UInt i;
9059 for (i = 0; i <= len; i++) {
9060 vassert(i < 4);
9061 tab[i] = newTempV128();
9062 assign(tab[i], getQReg128((nn + i) % 32));
9064 IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
9065 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9066 const HChar* Ta = bitQ ==1 ? "16b" : "8b";
9067 const HChar* nm = isTBX ? "tbx" : "tbl";
9068 DIP("%s %s.%s, {v%u.16b .. v%u.16b}, %s.%s\n",
9069 nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
9070 return True;
9073 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9074 return False;
9075 # undef INSN
9079 static
9080 Bool dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult* dres, UInt insn)
9082 /* 31 29 23 21 20 15 14 11 9 4
9083 0 q 001110 size 0 m 0 opcode 10 n d
9084 Decode fields: opcode
9086 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9087 if (INSN(31,31) != 0
9088 || INSN(29,24) != BITS6(0,0,1,1,1,0)
9089 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(11,10) != BITS2(1,0)) {
9090 return False;
9092 UInt bitQ = INSN(30,30);
9093 UInt size = INSN(23,22);
9094 UInt mm = INSN(20,16);
9095 UInt opcode = INSN(14,12);
9096 UInt nn = INSN(9,5);
9097 UInt dd = INSN(4,0);
9099 if (opcode == BITS3(0,0,1) || opcode == BITS3(1,0,1)) {
9100 /* -------- 001 UZP1 std7_std7_std7 -------- */
9101 /* -------- 101 UZP2 std7_std7_std7 -------- */
9102 if (bitQ == 0 && size == X11) return False; // implied 1d case
9103 Bool isUZP1 = opcode == BITS3(0,0,1);
9104 IROp op = isUZP1 ? mkVecCATEVENLANES(size)
9105 : mkVecCATODDLANES(size);
9106 IRTemp preL = newTempV128();
9107 IRTemp preR = newTempV128();
9108 IRTemp res = newTempV128();
9109 if (bitQ == 0) {
9110 assign(preL, binop(Iop_InterleaveLO64x2, getQReg128(mm),
9111 getQReg128(nn)));
9112 assign(preR, mkexpr(preL));
9113 } else {
9114 assign(preL, getQReg128(mm));
9115 assign(preR, getQReg128(nn));
9117 assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
9118 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9119 const HChar* nm = isUZP1 ? "uzp1" : "uzp2";
9120 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9121 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9122 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9123 return True;
9126 if (opcode == BITS3(0,1,0) || opcode == BITS3(1,1,0)) {
9127 /* -------- 010 TRN1 std7_std7_std7 -------- */
9128 /* -------- 110 TRN2 std7_std7_std7 -------- */
9129 if (bitQ == 0 && size == X11) return False; // implied 1d case
9130 Bool isTRN1 = opcode == BITS3(0,1,0);
9131 IROp op1 = isTRN1 ? mkVecCATEVENLANES(size)
9132 : mkVecCATODDLANES(size);
9133 IROp op2 = mkVecINTERLEAVEHI(size);
9134 IRTemp srcM = newTempV128();
9135 IRTemp srcN = newTempV128();
9136 IRTemp res = newTempV128();
9137 assign(srcM, getQReg128(mm));
9138 assign(srcN, getQReg128(nn));
9139 assign(res, binop(op2, binop(op1, mkexpr(srcM), mkexpr(srcM)),
9140 binop(op1, mkexpr(srcN), mkexpr(srcN))));
9141 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9142 const HChar* nm = isTRN1 ? "trn1" : "trn2";
9143 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9144 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9145 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9146 return True;
9149 if (opcode == BITS3(0,1,1) || opcode == BITS3(1,1,1)) {
9150 /* -------- 011 ZIP1 std7_std7_std7 -------- */
9151 /* -------- 111 ZIP2 std7_std7_std7 -------- */
9152 if (bitQ == 0 && size == X11) return False; // implied 1d case
9153 Bool isZIP1 = opcode == BITS3(0,1,1);
9154 IROp op = isZIP1 ? mkVecINTERLEAVELO(size)
9155 : mkVecINTERLEAVEHI(size);
9156 IRTemp preL = newTempV128();
9157 IRTemp preR = newTempV128();
9158 IRTemp res = newTempV128();
9159 if (bitQ == 0 && !isZIP1) {
9160 IRTemp z128 = newTempV128();
9161 assign(z128, mkV128(0x0000));
9162 // preL = Vm shifted left 32 bits
9163 // preR = Vn shifted left 32 bits
9164 assign(preL, triop(Iop_SliceV128,
9165 getQReg128(mm), mkexpr(z128), mkU8(12)));
9166 assign(preR, triop(Iop_SliceV128,
9167 getQReg128(nn), mkexpr(z128), mkU8(12)));
9169 } else {
9170 assign(preL, getQReg128(mm));
9171 assign(preR, getQReg128(nn));
9173 assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
9174 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9175 const HChar* nm = isZIP1 ? "zip1" : "zip2";
9176 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9177 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9178 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9179 return True;
9182 return False;
9183 # undef INSN
9187 static
9188 Bool dis_AdvSIMD_across_lanes(/*MB_OUT*/DisResult* dres, UInt insn)
9190 /* 31 28 23 21 16 11 9 4
9191 0 q u 01110 size 11000 opcode 10 n d
9192 Decode fields: u,size,opcode
9194 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9195 if (INSN(31,31) != 0
9196 || INSN(28,24) != BITS5(0,1,1,1,0)
9197 || INSN(21,17) != BITS5(1,1,0,0,0) || INSN(11,10) != BITS2(1,0)) {
9198 return False;
9200 UInt bitQ = INSN(30,30);
9201 UInt bitU = INSN(29,29);
9202 UInt size = INSN(23,22);
9203 UInt opcode = INSN(16,12);
9204 UInt nn = INSN(9,5);
9205 UInt dd = INSN(4,0);
9207 if (opcode == BITS5(0,0,0,1,1)) {
9208 /* -------- 0,xx,00011 SADDLV -------- */
9209 /* -------- 1,xx,00011 UADDLV -------- */
9210 /* size is the narrow size */
9211 if (size == X11 || (size == X10 && bitQ == 0)) return False;
9212 Bool isU = bitU == 1;
9213 IRTemp src = newTempV128();
9214 assign(src, getQReg128(nn));
9215 /* The basic plan is to widen the lower half, and if Q = 1,
9216 the upper half too. Add them together (if Q = 1), and in
9217 either case fold with add at twice the lane width.
9219 IRExpr* widened
9220 = mkexpr(math_WIDEN_LO_OR_HI_LANES(
9221 isU, False/*!fromUpperHalf*/, size, mkexpr(src)));
9222 if (bitQ == 1) {
9223 widened
9224 = binop(mkVecADD(size+1),
9225 widened,
9226 mkexpr(math_WIDEN_LO_OR_HI_LANES(
9227 isU, True/*fromUpperHalf*/, size, mkexpr(src)))
9230 /* Now fold. */
9231 IRTemp tWi = newTempV128();
9232 assign(tWi, widened);
9233 IRTemp res = math_FOLDV(tWi, mkVecADD(size+1));
9234 putQReg128(dd, mkexpr(res));
9235 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9236 const HChar ch = "bhsd"[size];
9237 DIP("%s %s.%c, %s.%s\n", isU ? "uaddlv" : "saddlv",
9238 nameQReg128(dd), ch, nameQReg128(nn), arr);
9239 return True;
9242 UInt ix = 0;
9243 /**/ if (opcode == BITS5(0,1,0,1,0)) { ix = bitU == 0 ? 1 : 2; }
9244 else if (opcode == BITS5(1,1,0,1,0)) { ix = bitU == 0 ? 3 : 4; }
9245 else if (opcode == BITS5(1,1,0,1,1) && bitU == 0) { ix = 5; }
9246 /**/
9247 if (ix != 0) {
9248 /* -------- 0,xx,01010: SMAXV -------- (1) */
9249 /* -------- 1,xx,01010: UMAXV -------- (2) */
9250 /* -------- 0,xx,11010: SMINV -------- (3) */
9251 /* -------- 1,xx,11010: UMINV -------- (4) */
9252 /* -------- 0,xx,11011: ADDV -------- (5) */
9253 vassert(ix >= 1 && ix <= 5);
9254 if (size == X11) return False; // 1d,2d cases not allowed
9255 if (size == X10 && bitQ == 0) return False; // 2s case not allowed
9256 const IROp opMAXS[3]
9257 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
9258 const IROp opMAXU[3]
9259 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
9260 const IROp opMINS[3]
9261 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
9262 const IROp opMINU[3]
9263 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
9264 const IROp opADD[3]
9265 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4 };
9266 vassert(size < 3);
9267 IROp op = Iop_INVALID;
9268 const HChar* nm = NULL;
9269 switch (ix) {
9270 case 1: op = opMAXS[size]; nm = "smaxv"; break;
9271 case 2: op = opMAXU[size]; nm = "umaxv"; break;
9272 case 3: op = opMINS[size]; nm = "sminv"; break;
9273 case 4: op = opMINU[size]; nm = "uminv"; break;
9274 case 5: op = opADD[size]; nm = "addv"; break;
9275 default: vassert(0);
9277 vassert(op != Iop_INVALID && nm != NULL);
9278 IRTemp tN1 = newTempV128();
9279 assign(tN1, getQReg128(nn));
9280 /* If Q == 0, we're just folding lanes in the lower half of
9281 the value. In which case, copy the lower half of the
9282 source into the upper half, so we can then treat it the
9283 same as the full width case. Except for the addition case,
9284 in which we have to zero out the upper half. */
9285 IRTemp tN2 = newTempV128();
9286 assign(tN2, bitQ == 0
9287 ? (ix == 5 ? unop(Iop_ZeroHI64ofV128, mkexpr(tN1))
9288 : mk_CatEvenLanes64x2(tN1,tN1))
9289 : mkexpr(tN1));
9290 IRTemp res = math_FOLDV(tN2, op);
9291 if (res == IRTemp_INVALID)
9292 return False; /* means math_FOLDV
9293 doesn't handle this case yet */
9294 putQReg128(dd, mkexpr(res));
9295 const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
9296 IRType laneTy = tys[size];
9297 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9298 DIP("%s %s, %s.%s\n", nm,
9299 nameQRegLO(dd, laneTy), nameQReg128(nn), arr);
9300 return True;
9303 if ((size == X00 || size == X10)
9304 && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
9305 /* -------- 0,00,01100: FMAXMNV s_4s -------- */
9306 /* -------- 0,10,01100: FMINMNV s_4s -------- */
9307 /* -------- 1,00,01111: FMAXV s_4s -------- */
9308 /* -------- 1,10,01111: FMINV s_4s -------- */
9309 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
9310 if (bitQ == 0) return False; // Only 4s is allowed
9311 Bool isMIN = (size & 2) == 2;
9312 Bool isNM = opcode == BITS5(0,1,1,0,0);
9313 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(2);
9314 IRTemp src = newTempV128();
9315 assign(src, getQReg128(nn));
9316 IRTemp res = math_FOLDV(src, opMXX);
9317 putQReg128(dd, mkexpr(res));
9318 DIP("%s%sv s%u, %u.4s\n",
9319 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", dd, nn);
9320 return True;
9323 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9324 return False;
9325 # undef INSN
9329 static
9330 Bool dis_AdvSIMD_copy(/*MB_OUT*/DisResult* dres, UInt insn)
9332 /* 31 28 20 15 14 10 9 4
9333 0 q op 01110000 imm5 0 imm4 1 n d
9334 Decode fields: q,op,imm4
9336 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9337 if (INSN(31,31) != 0
9338 || INSN(28,21) != BITS8(0,1,1,1,0,0,0,0)
9339 || INSN(15,15) != 0 || INSN(10,10) != 1) {
9340 return False;
9342 UInt bitQ = INSN(30,30);
9343 UInt bitOP = INSN(29,29);
9344 UInt imm5 = INSN(20,16);
9345 UInt imm4 = INSN(14,11);
9346 UInt nn = INSN(9,5);
9347 UInt dd = INSN(4,0);
9349 /* -------- x,0,0000: DUP (element, vector) -------- */
9350 /* 31 28 20 15 9 4
9351 0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index]
9353 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
9354 UInt laneNo = 0;
9355 UInt laneSzLg2 = 0;
9356 HChar laneCh = '?';
9357 IRTemp res = handle_DUP_VEC_ELEM(&laneNo, &laneSzLg2, &laneCh,
9358 getQReg128(nn), imm5);
9359 if (res == IRTemp_INVALID)
9360 return False;
9361 if (bitQ == 0 && laneSzLg2 == X11)
9362 return False; /* .1d case */
9363 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9364 const HChar* arT = nameArr_Q_SZ(bitQ, laneSzLg2);
9365 DIP("dup %s.%s, %s.%c[%u]\n",
9366 nameQReg128(dd), arT, nameQReg128(nn), laneCh, laneNo);
9367 return True;
9370 /* -------- x,0,0001: DUP (general, vector) -------- */
9371 /* 31 28 20 15 9 4
9372 0q0 01110000 imm5 0 0001 1 n d DUP Vd.T, Rn
9373 Q=0 writes 64, Q=1 writes 128
9374 imm5: xxxx1 8B(q=0) or 16b(q=1), R=W
9375 xxx10 4H(q=0) or 8H(q=1), R=W
9376 xx100 2S(q=0) or 4S(q=1), R=W
9377 x1000 Invalid(q=0) or 2D(q=1), R=X
9378 x0000 Invalid(q=0) or Invalid(q=1)
9379 Require op=0, imm4=0001
9381 if (bitOP == 0 && imm4 == BITS4(0,0,0,1)) {
9382 Bool isQ = bitQ == 1;
9383 IRTemp w0 = newTemp(Ity_I64);
9384 const HChar* arT = "??";
9385 IRType laneTy = Ity_INVALID;
9386 if (imm5 & 1) {
9387 arT = isQ ? "16b" : "8b";
9388 laneTy = Ity_I8;
9389 assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
9391 else if (imm5 & 2) {
9392 arT = isQ ? "8h" : "4h";
9393 laneTy = Ity_I16;
9394 assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
9396 else if (imm5 & 4) {
9397 arT = isQ ? "4s" : "2s";
9398 laneTy = Ity_I32;
9399 assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
9401 else if ((imm5 & 8) && isQ) {
9402 arT = "2d";
9403 laneTy = Ity_I64;
9404 assign(w0, getIReg64orZR(nn));
9406 else {
9407 /* invalid; leave laneTy unchanged. */
9409 /* */
9410 if (laneTy != Ity_INVALID) {
9411 IRTemp w1 = math_DUP_TO_64(w0, laneTy);
9412 putQReg128(dd, binop(Iop_64HLtoV128,
9413 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
9414 DIP("dup %s.%s, %s\n",
9415 nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
9416 return True;
9418 /* invalid */
9419 return False;
9422 /* -------- 1,0,0011: INS (general) -------- */
9423 /* 31 28 20 15 9 4
9424 010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn
9425 where Ts,ix = case imm5 of xxxx1 -> B, xxxx
9426 xxx10 -> H, xxx
9427 xx100 -> S, xx
9428 x1000 -> D, x
9430 if (bitQ == 1 && bitOP == 0 && imm4 == BITS4(0,0,1,1)) {
9431 HChar ts = '?';
9432 UInt laneNo = 16;
9433 IRExpr* src = NULL;
9434 if (imm5 & 1) {
9435 src = unop(Iop_64to8, getIReg64orZR(nn));
9436 laneNo = (imm5 >> 1) & 15;
9437 ts = 'b';
9439 else if (imm5 & 2) {
9440 src = unop(Iop_64to16, getIReg64orZR(nn));
9441 laneNo = (imm5 >> 2) & 7;
9442 ts = 'h';
9444 else if (imm5 & 4) {
9445 src = unop(Iop_64to32, getIReg64orZR(nn));
9446 laneNo = (imm5 >> 3) & 3;
9447 ts = 's';
9449 else if (imm5 & 8) {
9450 src = getIReg64orZR(nn);
9451 laneNo = (imm5 >> 4) & 1;
9452 ts = 'd';
9454 /* */
9455 if (src) {
9456 vassert(laneNo < 16);
9457 putQRegLane(dd, laneNo, src);
9458 DIP("ins %s.%c[%u], %s\n",
9459 nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn));
9460 return True;
9462 /* invalid */
9463 return False;
9466 /* -------- x,0,0101: SMOV -------- */
9467 /* -------- x,0,0111: UMOV -------- */
9468 /* 31 28 20 15 9 4
9469 0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index]
9470 0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index]
9471 dest is Xd when q==1, Wd when q==0
9472 UMOV:
9473 Ts,index,ops = case q:imm5 of
9474 0:xxxx1 -> B, xxxx, 8Uto64
9475 1:xxxx1 -> invalid
9476 0:xxx10 -> H, xxx, 16Uto64
9477 1:xxx10 -> invalid
9478 0:xx100 -> S, xx, 32Uto64
9479 1:xx100 -> invalid
9480 1:x1000 -> D, x, copy64
9481 other -> invalid
9482 SMOV:
9483 Ts,index,ops = case q:imm5 of
9484 0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
9485 1:xxxx1 -> B, xxxx, 8Sto64
9486 0:xxx10 -> H, xxx, (32Uto64 . 16Sto32)
9487 1:xxx10 -> H, xxx, 16Sto64
9488 0:xx100 -> invalid
9489 1:xx100 -> S, xx, 32Sto64
9490 1:x1000 -> invalid
9491 other -> invalid
9493 if (bitOP == 0 && (imm4 == BITS4(0,1,0,1) || imm4 == BITS4(0,1,1,1))) {
9494 Bool isU = (imm4 & 2) == 2;
9495 const HChar* arTs = "??";
9496 UInt laneNo = 16; /* invalid */
9497 // Setting 'res' to non-NULL determines valid/invalid
9498 IRExpr* res = NULL;
9499 if (!bitQ && (imm5 & 1)) { // 0:xxxx1
9500 laneNo = (imm5 >> 1) & 15;
9501 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
9502 res = isU ? unop(Iop_8Uto64, lane)
9503 : unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
9504 arTs = "b";
9506 else if (bitQ && (imm5 & 1)) { // 1:xxxx1
9507 laneNo = (imm5 >> 1) & 15;
9508 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
9509 res = isU ? NULL
9510 : unop(Iop_8Sto64, lane);
9511 arTs = "b";
9513 else if (!bitQ && (imm5 & 2)) { // 0:xxx10
9514 laneNo = (imm5 >> 2) & 7;
9515 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
9516 res = isU ? unop(Iop_16Uto64, lane)
9517 : unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
9518 arTs = "h";
9520 else if (bitQ && (imm5 & 2)) { // 1:xxx10
9521 laneNo = (imm5 >> 2) & 7;
9522 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
9523 res = isU ? NULL
9524 : unop(Iop_16Sto64, lane);
9525 arTs = "h";
9527 else if (!bitQ && (imm5 & 4)) { // 0:xx100
9528 laneNo = (imm5 >> 3) & 3;
9529 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
9530 res = isU ? unop(Iop_32Uto64, lane)
9531 : NULL;
9532 arTs = "s";
9534 else if (bitQ && (imm5 & 4)) { // 1:xxx10
9535 laneNo = (imm5 >> 3) & 3;
9536 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
9537 res = isU ? NULL
9538 : unop(Iop_32Sto64, lane);
9539 arTs = "s";
9541 else if (bitQ && (imm5 & 8)) { // 1:x1000
9542 laneNo = (imm5 >> 4) & 1;
9543 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
9544 res = isU ? lane
9545 : NULL;
9546 arTs = "d";
9548 /* */
9549 if (res) {
9550 vassert(laneNo < 16);
9551 putIReg64orZR(dd, res);
9552 DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
9553 nameIRegOrZR(bitQ == 1, dd),
9554 nameQReg128(nn), arTs, laneNo);
9555 return True;
9557 /* invalid */
9558 return False;
9561 /* -------- 1,1,xxxx: INS (element) -------- */
9562 /* 31 28 20 14 9 4
9563 011 01110000 imm5 0 imm4 n d INS Vd.Ts[ix1], Vn.Ts[ix2]
9564 where Ts,ix1,ix2
9565 = case imm5 of xxxx1 -> B, xxxx, imm4[3:0]
9566 xxx10 -> H, xxx, imm4[3:1]
9567 xx100 -> S, xx, imm4[3:2]
9568 x1000 -> D, x, imm4[3:3]
9570 if (bitQ == 1 && bitOP == 1) {
9571 HChar ts = '?';
9572 IRType ity = Ity_INVALID;
9573 UInt ix1 = 16;
9574 UInt ix2 = 16;
9575 if (imm5 & 1) {
9576 ts = 'b';
9577 ity = Ity_I8;
9578 ix1 = (imm5 >> 1) & 15;
9579 ix2 = (imm4 >> 0) & 15;
9581 else if (imm5 & 2) {
9582 ts = 'h';
9583 ity = Ity_I16;
9584 ix1 = (imm5 >> 2) & 7;
9585 ix2 = (imm4 >> 1) & 7;
9587 else if (imm5 & 4) {
9588 ts = 's';
9589 ity = Ity_I32;
9590 ix1 = (imm5 >> 3) & 3;
9591 ix2 = (imm4 >> 2) & 3;
9593 else if (imm5 & 8) {
9594 ts = 'd';
9595 ity = Ity_I64;
9596 ix1 = (imm5 >> 4) & 1;
9597 ix2 = (imm4 >> 3) & 1;
9599 /* */
9600 if (ity != Ity_INVALID) {
9601 vassert(ix1 < 16);
9602 vassert(ix2 < 16);
9603 putQRegLane(dd, ix1, getQRegLane(nn, ix2, ity));
9604 DIP("ins %s.%c[%u], %s.%c[%u]\n",
9605 nameQReg128(dd), ts, ix1, nameQReg128(nn), ts, ix2);
9606 return True;
9608 /* invalid */
9609 return False;
9612 return False;
9613 # undef INSN
9617 static
9618 Bool dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
9620 /* 31 28 18 15 11 9 4
9621 0q op 01111 00000 abc cmode 01 defgh d
9622 Decode fields: q,op,cmode
9623 Bit 11 is really "o2", but it is always zero.
9625 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9626 if (INSN(31,31) != 0
9627 || INSN(28,19) != BITS10(0,1,1,1,1,0,0,0,0,0)
9628 || INSN(11,10) != BITS2(0,1)) {
9629 return False;
9631 UInt bitQ = INSN(30,30);
9632 UInt bitOP = INSN(29,29);
9633 UInt cmode = INSN(15,12);
9634 UInt abcdefgh = (INSN(18,16) << 5) | INSN(9,5);
9635 UInt dd = INSN(4,0);
9637 ULong imm64lo = 0;
9638 UInt op_cmode = (bitOP << 4) | cmode;
9639 Bool ok = False;
9640 Bool isORR = False;
9641 Bool isBIC = False;
9642 Bool isMOV = False;
9643 Bool isMVN = False;
9644 Bool isFMOV = False;
9645 switch (op_cmode) {
9646 /* -------- x,0,0000 MOVI 32-bit shifted imm -------- */
9647 /* -------- x,0,0010 MOVI 32-bit shifted imm -------- */
9648 /* -------- x,0,0100 MOVI 32-bit shifted imm -------- */
9649 /* -------- x,0,0110 MOVI 32-bit shifted imm -------- */
9650 case BITS5(0,0,0,0,0): case BITS5(0,0,0,1,0):
9651 case BITS5(0,0,1,0,0): case BITS5(0,0,1,1,0): // 0:0xx0
9652 ok = True; isMOV = True; break;
9654 /* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */
9655 /* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */
9656 /* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */
9657 /* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */
9658 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1):
9659 case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1
9660 ok = True; isORR = True; break;
9662 /* -------- x,0,1000 MOVI 16-bit shifted imm -------- */
9663 /* -------- x,0,1010 MOVI 16-bit shifted imm -------- */
9664 case BITS5(0,1,0,0,0): case BITS5(0,1,0,1,0): // 0:10x0
9665 ok = True; isMOV = True; break;
9667 /* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */
9668 /* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */
9669 case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1
9670 ok = True; isORR = True; break;
9672 /* -------- x,0,1100 MOVI 32-bit shifting ones -------- */
9673 /* -------- x,0,1101 MOVI 32-bit shifting ones -------- */
9674 case BITS5(0,1,1,0,0): case BITS5(0,1,1,0,1): // 0:110x
9675 ok = True; isMOV = True; break;
9677 /* -------- x,0,1110 MOVI 8-bit -------- */
9678 case BITS5(0,1,1,1,0):
9679 ok = True; isMOV = True; break;
9681 /* -------- x,0,1111 FMOV (vector, immediate, F32) -------- */
9682 case BITS5(0,1,1,1,1): // 0:1111
9683 ok = True; isFMOV = True; break;
9685 /* -------- x,1,0000 MVNI 32-bit shifted imm -------- */
9686 /* -------- x,1,0010 MVNI 32-bit shifted imm -------- */
9687 /* -------- x,1,0100 MVNI 32-bit shifted imm -------- */
9688 /* -------- x,1,0110 MVNI 32-bit shifted imm -------- */
9689 case BITS5(1,0,0,0,0): case BITS5(1,0,0,1,0):
9690 case BITS5(1,0,1,0,0): case BITS5(1,0,1,1,0): // 1:0xx0
9691 ok = True; isMVN = True; break;
9693 /* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */
9694 /* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */
9695 /* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */
9696 /* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */
9697 case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1):
9698 case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1
9699 ok = True; isBIC = True; break;
9701 /* -------- x,1,1000 MVNI 16-bit shifted imm -------- */
9702 /* -------- x,1,1010 MVNI 16-bit shifted imm -------- */
9703 case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
9704 ok = True; isMVN = True; break;
9706 /* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */
9707 /* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */
9708 case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1
9709 ok = True; isBIC = True; break;
9711 /* -------- x,1,1100 MVNI 32-bit shifting ones -------- */
9712 /* -------- x,1,1101 MVNI 32-bit shifting ones -------- */
9713 case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
9714 ok = True; isMVN = True; break;
9716 /* -------- 0,1,1110 MOVI 64-bit scalar -------- */
9717 /* -------- 1,1,1110 MOVI 64-bit vector -------- */
9718 case BITS5(1,1,1,1,0):
9719 ok = True; isMOV = True; break;
9721 /* -------- 1,1,1111 FMOV (vector, immediate, F64) -------- */
9722 case BITS5(1,1,1,1,1): // 1:1111
9723 ok = bitQ == 1; isFMOV = True; break;
9725 default:
9726 break;
9728 if (ok) {
9729 vassert(1 == (isMOV ? 1 : 0) + (isMVN ? 1 : 0)
9730 + (isORR ? 1 : 0) + (isBIC ? 1 : 0) + (isFMOV ? 1 : 0));
9731 ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, abcdefgh);
9733 if (ok) {
9734 if (isORR || isBIC) {
9735 ULong inv
9736 = isORR ? 0ULL : ~0ULL;
9737 IRExpr* immV128
9738 = binop(Iop_64HLtoV128, mkU64(inv ^ imm64lo), mkU64(inv ^ imm64lo));
9739 IRExpr* res
9740 = binop(isORR ? Iop_OrV128 : Iop_AndV128, getQReg128(dd), immV128);
9741 const HChar* nm = isORR ? "orr" : "bic";
9742 if (bitQ == 0) {
9743 putQReg128(dd, unop(Iop_ZeroHI64ofV128, res));
9744 DIP("%s %s.1d, %016llx\n", nm, nameQReg128(dd), imm64lo);
9745 } else {
9746 putQReg128(dd, res);
9747 DIP("%s %s.2d, #0x%016llx'%016llx\n", nm,
9748 nameQReg128(dd), imm64lo, imm64lo);
9751 else if (isMOV || isMVN || isFMOV) {
9752 if (isMVN) imm64lo = ~imm64lo;
9753 ULong imm64hi = bitQ == 0 ? 0 : imm64lo;
9754 IRExpr* immV128 = binop(Iop_64HLtoV128, mkU64(imm64hi),
9755 mkU64(imm64lo));
9756 putQReg128(dd, immV128);
9757 DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
9759 return True;
9761 /* else fall through */
9763 return False;
9764 # undef INSN
9768 static
9769 Bool dis_AdvSIMD_scalar_copy(/*MB_OUT*/DisResult* dres, UInt insn)
9771 /* 31 28 20 15 14 10 9 4
9772 01 op 11110000 imm5 0 imm4 1 n d
9773 Decode fields: op,imm4
9775 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9776 if (INSN(31,30) != BITS2(0,1)
9777 || INSN(28,21) != BITS8(1,1,1,1,0,0,0,0)
9778 || INSN(15,15) != 0 || INSN(10,10) != 1) {
9779 return False;
9781 UInt bitOP = INSN(29,29);
9782 UInt imm5 = INSN(20,16);
9783 UInt imm4 = INSN(14,11);
9784 UInt nn = INSN(9,5);
9785 UInt dd = INSN(4,0);
9787 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
9788 /* -------- 0,0000 DUP (element, scalar) -------- */
9789 IRTemp w0 = newTemp(Ity_I64);
9790 const HChar* arTs = "??";
9791 IRType laneTy = Ity_INVALID;
9792 UInt laneNo = 16; /* invalid */
9793 if (imm5 & 1) {
9794 arTs = "b";
9795 laneNo = (imm5 >> 1) & 15;
9796 laneTy = Ity_I8;
9797 assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
9799 else if (imm5 & 2) {
9800 arTs = "h";
9801 laneNo = (imm5 >> 2) & 7;
9802 laneTy = Ity_I16;
9803 assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
9805 else if (imm5 & 4) {
9806 arTs = "s";
9807 laneNo = (imm5 >> 3) & 3;
9808 laneTy = Ity_I32;
9809 assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
9811 else if (imm5 & 8) {
9812 arTs = "d";
9813 laneNo = (imm5 >> 4) & 1;
9814 laneTy = Ity_I64;
9815 assign(w0, getQRegLane(nn, laneNo, laneTy));
9817 else {
9818 /* invalid; leave laneTy unchanged. */
9820 /* */
9821 if (laneTy != Ity_INVALID) {
9822 vassert(laneNo < 16);
9823 putQReg128(dd, binop(Iop_64HLtoV128, mkU64(0), mkexpr(w0)));
9824 DIP("dup %s, %s.%s[%u]\n",
9825 nameQRegLO(dd, laneTy), nameQReg128(nn), arTs, laneNo);
9826 return True;
9828 /* else fall through */
9831 return False;
9832 # undef INSN
9836 static
9837 Bool dis_AdvSIMD_scalar_pairwise(/*MB_OUT*/DisResult* dres, UInt insn,
9838 const VexArchInfo* archinfo)
9840 /* 31 28 23 21 16 11 9 4
9841 01 u 11110 sz 11000 opcode 10 n d
9842 Decode fields: u,sz,opcode
9844 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9845 if (INSN(31,30) != BITS2(0,1)
9846 || INSN(28,24) != BITS5(1,1,1,1,0)
9847 || INSN(21,17) != BITS5(1,1,0,0,0)
9848 || INSN(11,10) != BITS2(1,0)) {
9849 return False;
9851 UInt bitU = INSN(29,29);
9852 UInt sz = INSN(23,22);
9853 UInt opcode = INSN(16,12);
9854 UInt nn = INSN(9,5);
9855 UInt dd = INSN(4,0);
9857 if (bitU == 0 && sz == X11 && opcode == BITS5(1,1,0,1,1)) {
9858 /* -------- 0,11,11011 ADDP d_2d -------- */
9859 IRTemp xy = newTempV128();
9860 IRTemp xx = newTempV128();
9861 assign(xy, getQReg128(nn));
9862 assign(xx, binop(Iop_InterleaveHI64x2, mkexpr(xy), mkexpr(xy)));
9863 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9864 binop(Iop_Add64x2, mkexpr(xy), mkexpr(xx))));
9865 DIP("addp d%u, %s.2d\n", dd, nameQReg128(nn));
9866 return True;
9869 if (bitU == 1 && sz <= X01 && opcode == BITS5(0,1,1,0,1)) {
9870 /* -------- 1,00,01101 ADDP s_2s -------- */
9871 /* -------- 1,01,01101 ADDP d_2d -------- */
9872 Bool isD = sz == X01;
9873 IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
9874 IROp opADD = mkVecADDF(isD ? 3 : 2);
9875 IRTemp src = newTempV128();
9876 IRTemp argL = newTempV128();
9877 IRTemp argR = newTempV128();
9878 assign(src, getQReg128(nn));
9879 assign(argL, unop(opZHI, mkexpr(src)));
9880 assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
9881 mkU8(isD ? 8 : 4))));
9882 putQReg128(dd, unop(opZHI,
9883 triop(opADD, mkexpr(mk_get_IR_rounding_mode()),
9884 mkexpr(argL), mkexpr(argR))));
9885 DIP(isD ? "faddp d%u, v%u.2d\n" : "faddp s%u, v%u.2s\n", dd, nn);
9886 return True;
9889 /* Half-precision floating point ADDP (v8.2). */
9890 if (bitU == 0 && sz <= X00 && opcode == BITS5(0,1,1,0,1)) {
9891 /* -------- 0,00,01101 ADDP h_2h -------- */
9892 if ((archinfo->hwcaps & VEX_HWCAPS_ARM64_FP16) == 0)
9893 return False;
9894 IROp opZHI = mkVecZEROHIxxOFV128(1);
9895 IROp opADD = mkVecADDF(1);
9896 IRTemp src = newTempV128();
9897 IRTemp argL = newTempV128();
9898 IRTemp argR = newTempV128();
9899 assign(src, getQReg128(nn));
9900 assign(argL, unop(opZHI, mkexpr(src)));
9901 assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
9902 mkU8(2))));
9903 putQReg128(dd, unop(opZHI,
9904 triop(opADD, mkexpr(mk_get_IR_rounding_mode()),
9905 mkexpr(argL), mkexpr(argR))));
9906 DIP("faddp h%u, v%u.2h\n", dd, nn);
9907 return True;
9910 if (bitU == 1
9911 && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
9912 /* -------- 1,0x,01100 FMAXNMP d_2d, s_2s -------- */
9913 /* -------- 1,1x,01100 FMINNMP d_2d, s_2s -------- */
9914 /* -------- 1,0x,01111 FMAXP d_2d, s_2s -------- */
9915 /* -------- 1,1x,01111 FMINP d_2d, s_2s -------- */
9916 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
9917 Bool isD = (sz & 1) == 1;
9918 Bool isMIN = (sz & 2) == 2;
9919 Bool isNM = opcode == BITS5(0,1,1,0,0);
9920 IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
9921 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
9922 IRTemp src = newTempV128();
9923 IRTemp argL = newTempV128();
9924 IRTemp argR = newTempV128();
9925 assign(src, getQReg128(nn));
9926 assign(argL, unop(opZHI, mkexpr(src)));
9927 assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
9928 mkU8(isD ? 8 : 4))));
9929 putQReg128(dd, unop(opZHI,
9930 binop(opMXX, mkexpr(argL), mkexpr(argR))));
9931 HChar c = isD ? 'd' : 's';
9932 DIP("%s%sp %c%u, v%u.2%c\n",
9933 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", c, dd, nn, c);
9934 return True;
9937 return False;
9938 # undef INSN
9942 static
9943 Bool dis_AdvSIMD_scalar_shift_by_imm(/*MB_OUT*/DisResult* dres, UInt insn)
9945 /* 31 28 22 18 15 10 9 4
9946 01 u 111110 immh immb opcode 1 n d
9947 Decode fields: u,immh,opcode
9949 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9950 if (INSN(31,30) != BITS2(0,1)
9951 || INSN(28,23) != BITS6(1,1,1,1,1,0) || INSN(10,10) != 1) {
9952 return False;
9954 UInt bitU = INSN(29,29);
9955 UInt immh = INSN(22,19);
9956 UInt immb = INSN(18,16);
9957 UInt opcode = INSN(15,11);
9958 UInt nn = INSN(9,5);
9959 UInt dd = INSN(4,0);
9960 UInt immhb = (immh << 3) | immb;
9962 if ((immh & 8) == 8
9963 && (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0))) {
9964 /* -------- 0,1xxx,00000 SSHR d_d_#imm -------- */
9965 /* -------- 1,1xxx,00000 USHR d_d_#imm -------- */
9966 /* -------- 0,1xxx,00010 SSRA d_d_#imm -------- */
9967 /* -------- 1,1xxx,00010 USRA d_d_#imm -------- */
9968 Bool isU = bitU == 1;
9969 Bool isAcc = opcode == BITS5(0,0,0,1,0);
9970 UInt sh = 128 - immhb;
9971 vassert(sh >= 1 && sh <= 64);
9972 IROp op = isU ? Iop_ShrN64x2 : Iop_SarN64x2;
9973 IRExpr* src = getQReg128(nn);
9974 IRTemp shf = newTempV128();
9975 IRTemp res = newTempV128();
9976 if (sh == 64 && isU) {
9977 assign(shf, mkV128(0x0000));
9978 } else {
9979 UInt nudge = 0;
9980 if (sh == 64) {
9981 vassert(!isU);
9982 nudge = 1;
9984 assign(shf, binop(op, src, mkU8(sh - nudge)));
9986 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
9987 : mkexpr(shf));
9988 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9989 const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
9990 : (isU ? "ushr" : "sshr");
9991 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
9992 return True;
9995 if ((immh & 8) == 8
9996 && (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0))) {
9997 /* -------- 0,1xxx,00100 SRSHR d_d_#imm -------- */
9998 /* -------- 1,1xxx,00100 URSHR d_d_#imm -------- */
9999 /* -------- 0,1xxx,00110 SRSRA d_d_#imm -------- */
10000 /* -------- 1,1xxx,00110 URSRA d_d_#imm -------- */
10001 Bool isU = bitU == 1;
10002 Bool isAcc = opcode == BITS5(0,0,1,1,0);
10003 UInt sh = 128 - immhb;
10004 vassert(sh >= 1 && sh <= 64);
10005 IROp op = isU ? Iop_Rsh64Ux2 : Iop_Rsh64Sx2;
10006 vassert(sh >= 1 && sh <= 64);
10007 IRExpr* src = getQReg128(nn);
10008 IRTemp imm8 = newTemp(Ity_I8);
10009 assign(imm8, mkU8((UChar)(-sh)));
10010 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
10011 IRTemp shf = newTempV128();
10012 IRTemp res = newTempV128();
10013 assign(shf, binop(op, src, amt));
10014 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
10015 : mkexpr(shf));
10016 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10017 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
10018 : (isU ? "urshr" : "srshr");
10019 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
10020 return True;
10023 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,0,0)) {
10024 /* -------- 1,1xxx,01000 SRI d_d_#imm -------- */
10025 UInt sh = 128 - immhb;
10026 vassert(sh >= 1 && sh <= 64);
10027 if (sh == 64) {
10028 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
10029 } else {
10030 /* sh is in range 1 .. 63 */
10031 ULong nmask = (ULong)(((Long)0x8000000000000000ULL) >> (sh-1));
10032 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
10033 IRTemp res = newTempV128();
10034 assign(res, binop(Iop_OrV128,
10035 binop(Iop_AndV128, getQReg128(dd), nmaskV),
10036 binop(Iop_ShrN64x2, getQReg128(nn), mkU8(sh))));
10037 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10039 DIP("sri d%u, d%u, #%u\n", dd, nn, sh);
10040 return True;
10043 if (bitU == 0 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
10044 /* -------- 0,1xxx,01010 SHL d_d_#imm -------- */
10045 UInt sh = immhb - 64;
10046 vassert(sh >= 0 && sh < 64);
10047 putQReg128(dd,
10048 unop(Iop_ZeroHI64ofV128,
10049 sh == 0 ? getQReg128(nn)
10050 : binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
10051 DIP("shl d%u, d%u, #%u\n", dd, nn, sh);
10052 return True;
10055 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
10056 /* -------- 1,1xxx,01010 SLI d_d_#imm -------- */
10057 UInt sh = immhb - 64;
10058 vassert(sh >= 0 && sh < 64);
10059 if (sh == 0) {
10060 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(nn)));
10061 } else {
10062 /* sh is in range 1 .. 63 */
10063 ULong nmask = (1ULL << sh) - 1;
10064 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
10065 IRTemp res = newTempV128();
10066 assign(res, binop(Iop_OrV128,
10067 binop(Iop_AndV128, getQReg128(dd), nmaskV),
10068 binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
10069 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10071 DIP("sli d%u, d%u, #%u\n", dd, nn, sh);
10072 return True;
10075 if (opcode == BITS5(0,1,1,1,0)
10076 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
10077 /* -------- 0,01110 SQSHL #imm -------- */
10078 /* -------- 1,01110 UQSHL #imm -------- */
10079 /* -------- 1,01100 SQSHLU #imm -------- */
10080 UInt size = 0;
10081 UInt shift = 0;
10082 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10083 if (!ok) return False;
10084 vassert(size >= 0 && size <= 3);
10085 /* The shift encoding has opposite sign for the leftwards case.
10086 Adjust shift to compensate. */
10087 UInt lanebits = 8 << size;
10088 shift = lanebits - shift;
10089 vassert(shift >= 0 && shift < lanebits);
10090 const HChar* nm = NULL;
10091 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
10092 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
10093 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
10094 else vassert(0);
10095 IRTemp qDiff1 = IRTemp_INVALID;
10096 IRTemp qDiff2 = IRTemp_INVALID;
10097 IRTemp res = IRTemp_INVALID;
10098 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn));
10099 /* This relies on the fact that the zeroed out lanes generate zeroed
10100 result lanes and don't saturate, so there's no point in trimming
10101 the resulting res, qDiff1 or qDiff2 values. */
10102 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
10103 putQReg128(dd, mkexpr(res));
10104 updateQCFLAGwithDifference(qDiff1, qDiff2);
10105 const HChar arr = "bhsd"[size];
10106 DIP("%s %c%u, %c%u, #%u\n", nm, arr, dd, arr, nn, shift);
10107 return True;
10110 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
10111 || (bitU == 1
10112 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
10113 /* -------- 0,10010 SQSHRN #imm -------- */
10114 /* -------- 1,10010 UQSHRN #imm -------- */
10115 /* -------- 0,10011 SQRSHRN #imm -------- */
10116 /* -------- 1,10011 UQRSHRN #imm -------- */
10117 /* -------- 1,10000 SQSHRUN #imm -------- */
10118 /* -------- 1,10001 SQRSHRUN #imm -------- */
10119 UInt size = 0;
10120 UInt shift = 0;
10121 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10122 if (!ok || size == X11) return False;
10123 vassert(size >= X00 && size <= X10);
10124 vassert(shift >= 1 && shift <= (8 << size));
10125 const HChar* nm = "??";
10126 IROp op = Iop_INVALID;
10127 /* Decide on the name and the operation. */
10128 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
10129 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
10131 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
10132 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
10134 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
10135 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
10137 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
10138 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
10140 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
10141 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
10143 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
10144 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
10146 else vassert(0);
10147 /* Compute the result (Q, shifted value) pair. */
10148 IRTemp src128 = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size+1, getQReg128(nn));
10149 IRTemp pair = newTempV128();
10150 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
10151 /* Update the result reg */
10152 IRTemp res64in128 = newTempV128();
10153 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
10154 putQReg128(dd, mkexpr(res64in128));
10155 /* Update the Q flag. */
10156 IRTemp q64q64 = newTempV128();
10157 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
10158 IRTemp z128 = newTempV128();
10159 assign(z128, mkV128(0x0000));
10160 updateQCFLAGwithDifference(q64q64, z128);
10161 /* */
10162 const HChar arrNarrow = "bhsd"[size];
10163 const HChar arrWide = "bhsd"[size+1];
10164 DIP("%s %c%u, %c%u, #%u\n", nm, arrNarrow, dd, arrWide, nn, shift);
10165 return True;
10168 if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,0,0)) {
10169 /* -------- 0,!=00xx,11100 SCVTF d_d_imm, s_s_imm -------- */
10170 /* -------- 1,!=00xx,11100 UCVTF d_d_imm, s_s_imm -------- */
10171 UInt size = 0;
10172 UInt fbits = 0;
10173 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
10174 /* The following holds because immh is never zero. */
10175 vassert(ok);
10176 /* The following holds because immh >= 0100. */
10177 vassert(size == X10 || size == X11);
10178 Bool isD = size == X11;
10179 Bool isU = bitU == 1;
10180 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
10181 Double scale = two_to_the_minus(fbits);
10182 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
10183 : IRExpr_Const(IRConst_F32( (Float)scale ));
10184 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
10185 IROp opCVT = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
10186 : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
10187 IRType tyF = isD ? Ity_F64 : Ity_F32;
10188 IRType tyI = isD ? Ity_I64 : Ity_I32;
10189 IRTemp src = newTemp(tyI);
10190 IRTemp res = newTemp(tyF);
10191 IRTemp rm = mk_get_IR_rounding_mode();
10192 assign(src, getQRegLane(nn, 0, tyI));
10193 assign(res, triop(opMUL, mkexpr(rm),
10194 binop(opCVT, mkexpr(rm), mkexpr(src)), scaleE));
10195 putQRegLane(dd, 0, mkexpr(res));
10196 if (!isD) {
10197 putQRegLane(dd, 1, mkU32(0));
10199 putQRegLane(dd, 1, mkU64(0));
10200 const HChar ch = isD ? 'd' : 's';
10201 DIP("%s %c%u, %c%u, #%u\n", isU ? "ucvtf" : "scvtf",
10202 ch, dd, ch, nn, fbits);
10203 return True;
10206 if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,1,1)) {
10207 /* -------- 0,!=00xx,11111 FCVTZS d_d_imm, s_s_imm -------- */
10208 /* -------- 1,!=00xx,11111 FCVTZU d_d_imm, s_s_imm -------- */
10209 UInt size = 0;
10210 UInt fbits = 0;
10211 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
10212 /* The following holds because immh is never zero. */
10213 vassert(ok);
10214 /* The following holds because immh >= 0100. */
10215 vassert(size == X10 || size == X11);
10216 Bool isD = size == X11;
10217 Bool isU = bitU == 1;
10218 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
10219 Double scale = two_to_the_plus(fbits);
10220 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
10221 : IRExpr_Const(IRConst_F32( (Float)scale ));
10222 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
10223 IROp opCVT = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
10224 : (isD ? Iop_F64toI64S : Iop_F32toI32S);
10225 IRType tyF = isD ? Ity_F64 : Ity_F32;
10226 IRType tyI = isD ? Ity_I64 : Ity_I32;
10227 IRTemp src = newTemp(tyF);
10228 IRTemp res = newTemp(tyI);
10229 IRTemp rm = newTemp(Ity_I32);
10230 assign(src, getQRegLane(nn, 0, tyF));
10231 assign(rm, mkU32(Irrm_ZERO));
10232 assign(res, binop(opCVT, mkexpr(rm),
10233 triop(opMUL, mkexpr(rm), mkexpr(src), scaleE)));
10234 putQRegLane(dd, 0, mkexpr(res));
10235 if (!isD) {
10236 putQRegLane(dd, 1, mkU32(0));
10238 putQRegLane(dd, 1, mkU64(0));
10239 const HChar ch = isD ? 'd' : 's';
10240 DIP("%s %c%u, %c%u, #%u\n", isU ? "fcvtzu" : "fcvtzs",
10241 ch, dd, ch, nn, fbits);
10242 return True;
10245 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10246 return False;
10247 # undef INSN
10251 static
10252 Bool dis_AdvSIMD_scalar_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
10254 /* 31 29 28 23 21 20 15 11 9 4
10255 01 U 11110 size 1 m opcode 00 n d
10256 Decode fields: u,opcode
10258 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10259 if (INSN(31,30) != BITS2(0,1)
10260 || INSN(28,24) != BITS5(1,1,1,1,0)
10261 || INSN(21,21) != 1
10262 || INSN(11,10) != BITS2(0,0)) {
10263 return False;
10265 UInt bitU = INSN(29,29);
10266 UInt size = INSN(23,22);
10267 UInt mm = INSN(20,16);
10268 UInt opcode = INSN(15,12);
10269 UInt nn = INSN(9,5);
10270 UInt dd = INSN(4,0);
10271 vassert(size < 4);
10273 if (bitU == 0
10274 && (opcode == BITS4(1,1,0,1)
10275 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
10276 /* -------- 0,1101 SQDMULL -------- */ // 0 (ks)
10277 /* -------- 0,1001 SQDMLAL -------- */ // 1
10278 /* -------- 0,1011 SQDMLSL -------- */ // 2
10279 /* Widens, and size refers to the narrowed lanes. */
10280 UInt ks = 3;
10281 switch (opcode) {
10282 case BITS4(1,1,0,1): ks = 0; break;
10283 case BITS4(1,0,0,1): ks = 1; break;
10284 case BITS4(1,0,1,1): ks = 2; break;
10285 default: vassert(0);
10287 vassert(ks >= 0 && ks <= 2);
10288 if (size == X00 || size == X11) return False;
10289 vassert(size <= 2);
10290 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
10291 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
10292 newTempsV128_3(&vecN, &vecM, &vecD);
10293 assign(vecN, getQReg128(nn));
10294 assign(vecM, getQReg128(mm));
10295 assign(vecD, getQReg128(dd));
10296 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
10297 False/*!is2*/, size, "mas"[ks],
10298 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
10299 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
10300 putQReg128(dd, unop(opZHI, mkexpr(res)));
10301 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
10302 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10303 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
10304 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
10306 const HChar* nm = ks == 0 ? "sqdmull"
10307 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
10308 const HChar arrNarrow = "bhsd"[size];
10309 const HChar arrWide = "bhsd"[size+1];
10310 DIP("%s %c%u, %c%u, %c%u\n",
10311 nm, arrWide, dd, arrNarrow, nn, arrNarrow, mm);
10312 return True;
10315 return False;
10316 # undef INSN
10320 static
10321 Bool dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
10323 /* 31 29 28 23 21 20 15 10 9 4
10324 01 U 11110 size 1 m opcode 1 n d
10325 Decode fields: u,size,opcode
10327 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10328 if (INSN(31,30) != BITS2(0,1)
10329 || INSN(28,24) != BITS5(1,1,1,1,0)
10330 || INSN(21,21) != 1
10331 || INSN(10,10) != 1) {
10332 return False;
10334 UInt bitU = INSN(29,29);
10335 UInt size = INSN(23,22);
10336 UInt mm = INSN(20,16);
10337 UInt opcode = INSN(15,11);
10338 UInt nn = INSN(9,5);
10339 UInt dd = INSN(4,0);
10340 vassert(size < 4);
10342 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
10343 /* -------- 0,xx,00001 SQADD std4_std4_std4 -------- */
10344 /* -------- 1,xx,00001 UQADD std4_std4_std4 -------- */
10345 /* -------- 0,xx,00101 SQSUB std4_std4_std4 -------- */
10346 /* -------- 1,xx,00101 UQSUB std4_std4_std4 -------- */
10347 Bool isADD = opcode == BITS5(0,0,0,0,1);
10348 Bool isU = bitU == 1;
10349 IROp qop = Iop_INVALID;
10350 IROp nop = Iop_INVALID;
10351 if (isADD) {
10352 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
10353 nop = mkVecADD(size);
10354 } else {
10355 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
10356 nop = mkVecSUB(size);
10358 IRTemp argL = newTempV128();
10359 IRTemp argR = newTempV128();
10360 IRTemp qres = newTempV128();
10361 IRTemp nres = newTempV128();
10362 assign(argL, getQReg128(nn));
10363 assign(argR, getQReg128(mm));
10364 assign(qres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10365 size, binop(qop, mkexpr(argL), mkexpr(argR)))));
10366 assign(nres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10367 size, binop(nop, mkexpr(argL), mkexpr(argR)))));
10368 putQReg128(dd, mkexpr(qres));
10369 updateQCFLAGwithDifference(qres, nres);
10370 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
10371 : (isU ? "uqsub" : "sqsub");
10372 const HChar arr = "bhsd"[size];
10373 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
10374 return True;
10377 if (size == X11 && opcode == BITS5(0,0,1,1,0)) {
10378 /* -------- 0,11,00110 CMGT d_d_d -------- */ // >s
10379 /* -------- 1,11,00110 CMHI d_d_d -------- */ // >u
10380 Bool isGT = bitU == 0;
10381 IRExpr* argL = getQReg128(nn);
10382 IRExpr* argR = getQReg128(mm);
10383 IRTemp res = newTempV128();
10384 assign(res,
10385 isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
10386 : binop(Iop_CmpGT64Ux2, argL, argR));
10387 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10388 DIP("%s %s, %s, %s\n",isGT ? "cmgt" : "cmhi",
10389 nameQRegLO(dd, Ity_I64),
10390 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
10391 return True;
10394 if (size == X11 && opcode == BITS5(0,0,1,1,1)) {
10395 /* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s
10396 /* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u
10397 Bool isGE = bitU == 0;
10398 IRExpr* argL = getQReg128(nn);
10399 IRExpr* argR = getQReg128(mm);
10400 IRTemp res = newTempV128();
10401 assign(res,
10402 isGE ? unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL))
10403 : unop(Iop_NotV128, binop(Iop_CmpGT64Ux2, argR, argL)));
10404 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10405 DIP("%s %s, %s, %s\n", isGE ? "cmge" : "cmhs",
10406 nameQRegLO(dd, Ity_I64),
10407 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
10408 return True;
10411 if (size == X11 && (opcode == BITS5(0,1,0,0,0)
10412 || opcode == BITS5(0,1,0,1,0))) {
10413 /* -------- 0,xx,01000 SSHL d_d_d -------- */
10414 /* -------- 0,xx,01010 SRSHL d_d_d -------- */
10415 /* -------- 1,xx,01000 USHL d_d_d -------- */
10416 /* -------- 1,xx,01010 URSHL d_d_d -------- */
10417 Bool isU = bitU == 1;
10418 Bool isR = opcode == BITS5(0,1,0,1,0);
10419 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
10420 : (isU ? mkVecSHU(size) : mkVecSHS(size));
10421 IRTemp res = newTempV128();
10422 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
10423 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10424 const HChar* nm = isR ? (isU ? "urshl" : "srshl")
10425 : (isU ? "ushl" : "sshl");
10426 DIP("%s %s, %s, %s\n", nm,
10427 nameQRegLO(dd, Ity_I64),
10428 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
10429 return True;
10432 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
10433 /* -------- 0,xx,01001 SQSHL std4_std4_std4 -------- */
10434 /* -------- 0,xx,01011 SQRSHL std4_std4_std4 -------- */
10435 /* -------- 1,xx,01001 UQSHL std4_std4_std4 -------- */
10436 /* -------- 1,xx,01011 UQRSHL std4_std4_std4 -------- */
10437 Bool isU = bitU == 1;
10438 Bool isR = opcode == BITS5(0,1,0,1,1);
10439 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
10440 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
10441 /* This is a bit tricky. Since we're only interested in the lowest
10442 lane of the result, we zero out all the rest in the operands, so
10443 as to ensure that other lanes don't pollute the returned Q value.
10444 This works because it means, for the lanes we don't care about, we
10445 are shifting zero by zero, which can never saturate. */
10446 IRTemp res256 = newTemp(Ity_V256);
10447 IRTemp resSH = newTempV128();
10448 IRTemp resQ = newTempV128();
10449 IRTemp zero = newTempV128();
10450 assign(
10451 res256,
10452 binop(op,
10453 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn))),
10454 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(mm)))));
10455 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
10456 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
10457 assign(zero, mkV128(0x0000));
10458 putQReg128(dd, mkexpr(resSH));
10459 updateQCFLAGwithDifference(resQ, zero);
10460 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
10461 : (isU ? "uqshl" : "sqshl");
10462 const HChar arr = "bhsd"[size];
10463 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
10464 return True;
10467 if (size == X11 && opcode == BITS5(1,0,0,0,0)) {
10468 /* -------- 0,11,10000 ADD d_d_d -------- */
10469 /* -------- 1,11,10000 SUB d_d_d -------- */
10470 Bool isSUB = bitU == 1;
10471 IRTemp res = newTemp(Ity_I64);
10472 assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
10473 getQRegLane(nn, 0, Ity_I64),
10474 getQRegLane(mm, 0, Ity_I64)));
10475 putQRegLane(dd, 0, mkexpr(res));
10476 putQRegLane(dd, 1, mkU64(0));
10477 DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
10478 nameQRegLO(dd, Ity_I64),
10479 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
10480 return True;
10483 if (size == X11 && opcode == BITS5(1,0,0,0,1)) {
10484 /* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0
10485 /* -------- 1,11,10001 CMEQ d_d_d -------- */ // ==
10486 Bool isEQ = bitU == 1;
10487 IRExpr* argL = getQReg128(nn);
10488 IRExpr* argR = getQReg128(mm);
10489 IRTemp res = newTempV128();
10490 assign(res,
10491 isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
10492 : unop(Iop_NotV128, binop(Iop_CmpEQ64x2,
10493 binop(Iop_AndV128, argL, argR),
10494 mkV128(0x0000))));
10495 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10496 DIP("%s %s, %s, %s\n", isEQ ? "cmeq" : "cmtst",
10497 nameQRegLO(dd, Ity_I64),
10498 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
10499 return True;
10502 if (opcode == BITS5(1,0,1,1,0)) {
10503 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
10504 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
10505 if (size == X00 || size == X11) return False;
10506 Bool isR = bitU == 1;
10507 IRTemp res, sat1q, sat1n, vN, vM;
10508 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
10509 newTempsV128_2(&vN, &vM);
10510 assign(vN, getQReg128(nn));
10511 assign(vM, getQReg128(mm));
10512 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
10513 putQReg128(dd,
10514 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res))));
10515 updateQCFLAGwithDifference(
10516 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1q)),
10517 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1n)));
10518 const HChar arr = "bhsd"[size];
10519 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
10520 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
10521 return True;
10524 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
10525 /* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */
10526 IRType ity = size == X11 ? Ity_F64 : Ity_F32;
10527 IRTemp res = newTemp(ity);
10528 assign(res, unop(mkABSF(ity),
10529 triop(mkSUBF(ity),
10530 mkexpr(mk_get_IR_rounding_mode()),
10531 getQRegLO(nn,ity), getQRegLO(mm,ity))));
10532 putQReg128(dd, mkV128(0x0000));
10533 putQRegLO(dd, mkexpr(res));
10534 DIP("fabd %s, %s, %s\n",
10535 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10536 return True;
10539 if (bitU == 0 && size <= X01 && opcode == BITS5(1,1,0,1,1)) {
10540 /* -------- 0,0x,11011 FMULX d_d_d, s_s_s -------- */
10541 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
10542 IRType ity = size == X01 ? Ity_F64 : Ity_F32;
10543 IRTemp res = newTemp(ity);
10544 assign(res, triop(mkMULF(ity),
10545 mkexpr(mk_get_IR_rounding_mode()),
10546 getQRegLO(nn,ity), getQRegLO(mm,ity)));
10547 putQReg128(dd, mkV128(0x0000));
10548 putQRegLO(dd, mkexpr(res));
10549 DIP("fmulx %s, %s, %s\n",
10550 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10551 return True;
10554 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
10555 /* -------- 0,0x,11100 FCMEQ d_d_d, s_s_s -------- */
10556 /* -------- 1,0x,11100 FCMGE d_d_d, s_s_s -------- */
10557 Bool isD = size == X01;
10558 IRType ity = isD ? Ity_F64 : Ity_F32;
10559 Bool isGE = bitU == 1;
10560 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
10561 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
10562 IRTemp res = newTempV128();
10563 assign(res, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
10564 : binop(opCMP, getQReg128(nn), getQReg128(mm)));
10565 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10566 mkexpr(res))));
10567 DIP("%s %s, %s, %s\n", isGE ? "fcmge" : "fcmeq",
10568 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10569 return True;
10572 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
10573 /* -------- 1,1x,11100 FCMGT d_d_d, s_s_s -------- */
10574 Bool isD = size == X11;
10575 IRType ity = isD ? Ity_F64 : Ity_F32;
10576 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
10577 IRTemp res = newTempV128();
10578 assign(res, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
10579 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10580 mkexpr(res))));
10581 DIP("%s %s, %s, %s\n", "fcmgt",
10582 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10583 return True;
10586 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
10587 /* -------- 1,0x,11101 FACGE d_d_d, s_s_s -------- */
10588 /* -------- 1,1x,11101 FACGT d_d_d, s_s_s -------- */
10589 Bool isD = (size & 1) == 1;
10590 IRType ity = isD ? Ity_F64 : Ity_F32;
10591 Bool isGT = (size & 2) == 2;
10592 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
10593 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
10594 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
10595 IRTemp res = newTempV128();
10596 assign(res, binop(opCMP, unop(opABS, getQReg128(mm)),
10597 unop(opABS, getQReg128(nn)))); // swapd
10598 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10599 mkexpr(res))));
10600 DIP("%s %s, %s, %s\n", isGT ? "facgt" : "facge",
10601 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10602 return True;
10605 if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
10606 /* -------- 0,0x,11111: FRECPS d_d_d, s_s_s -------- */
10607 /* -------- 0,1x,11111: FRSQRTS d_d_d, s_s_s -------- */
10608 Bool isSQRT = (size & 2) == 2;
10609 Bool isD = (size & 1) == 1;
10610 IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
10611 : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
10612 IRTemp res = newTempV128();
10613 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
10614 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10615 mkexpr(res))));
10616 HChar c = isD ? 'd' : 's';
10617 DIP("%s %c%u, %c%u, %c%u\n", isSQRT ? "frsqrts" : "frecps",
10618 c, dd, c, nn, c, mm);
10619 return True;
10622 return False;
10623 # undef INSN
10626 static
10627 Bool dis_AdvSIMD_scalar_three_same_extra(/*MB_OUT*/DisResult* dres, UInt insn)
10629 /* 31 29 28 23 21 20 15 10 9 4
10630 01 U 11110 size 0 m opcode 1 n d
10631 Decode fields: u,size,opcode
10633 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10634 if (INSN(31,30) != BITS2(0,1)
10635 || INSN(28,24) != BITS5(1,1,1,1,0)
10636 || INSN(21,21) != 0
10637 || INSN(10,10) != 1) {
10638 return False;
10640 UInt bitU = INSN(29,29);
10641 UInt size = INSN(23,22);
10642 UInt mm = INSN(20,16);
10643 UInt opcode = INSN(15,11);
10644 UInt nn = INSN(9,5);
10645 UInt dd = INSN(4,0);
10646 vassert(size < 4);
10647 vassert(mm < 32 && nn < 32 && dd < 32);
10649 if (bitU == 1 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) {
10650 /* -------- xx,10000 SQRDMLAH s and h variants only -------- */
10651 /* -------- xx,10001 SQRDMLSH s and h variants only -------- */
10652 if (size == X00 || size == X11) return False;
10653 Bool isAdd = opcode == BITS5(1,0,0,0,0);
10655 IRTemp res, res_nosat, vD, vN, vM;
10656 res = res_nosat = vD = vN = vM = IRTemp_INVALID;
10657 newTempsV128_3(&vD, &vN, &vM);
10658 assign(vD, getQReg128(dd));
10659 assign(vN, getQReg128(nn));
10660 assign(vM, getQReg128(mm));
10662 math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM);
10663 putQReg128(dd,
10664 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res))));
10665 updateQCFLAGwithDifference(
10666 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res)),
10667 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res_nosat)));
10669 const HChar arr = "hs"[size];
10670 const HChar* nm = isAdd ? "sqrdmlah" : "sqrdmlsh";
10671 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
10672 return True;
10675 return False;
10676 # undef INSN
10680 static
10681 Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
10683 /* 31 29 28 23 21 16 11 9 4
10684 01 U 11110 size 10000 opcode 10 n d
10685 Decode fields: u,size,opcode
10687 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10688 if (INSN(31,30) != BITS2(0,1)
10689 || INSN(28,24) != BITS5(1,1,1,1,0)
10690 || INSN(21,17) != BITS5(1,0,0,0,0)
10691 || INSN(11,10) != BITS2(1,0)) {
10692 return False;
10694 UInt bitU = INSN(29,29);
10695 UInt size = INSN(23,22);
10696 UInt opcode = INSN(16,12);
10697 UInt nn = INSN(9,5);
10698 UInt dd = INSN(4,0);
10699 vassert(size < 4);
10701 if (opcode == BITS5(0,0,0,1,1)) {
10702 /* -------- 0,xx,00011: SUQADD std4_std4 -------- */
10703 /* -------- 1,xx,00011: USQADD std4_std4 -------- */
10704 /* These are a bit tricky (to say the least). See comments on
10705 the vector variants (in dis_AdvSIMD_two_reg_misc) below for
10706 details. */
10707 Bool isUSQADD = bitU == 1;
10708 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
10709 : mkVecQADDEXTUSSATSS(size);
10710 IROp nop = mkVecADD(size);
10711 IRTemp argL = newTempV128();
10712 IRTemp argR = newTempV128();
10713 assign(argL, getQReg128(nn));
10714 assign(argR, getQReg128(dd));
10715 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10716 size, binop(qop, mkexpr(argL), mkexpr(argR)));
10717 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10718 size, binop(nop, mkexpr(argL), mkexpr(argR)));
10719 putQReg128(dd, mkexpr(qres));
10720 updateQCFLAGwithDifference(qres, nres);
10721 const HChar arr = "bhsd"[size];
10722 DIP("%s %c%u, %c%u\n", isUSQADD ? "usqadd" : "suqadd", arr, dd, arr, nn);
10723 return True;
10726 if (opcode == BITS5(0,0,1,1,1)) {
10727 /* -------- 0,xx,00111 SQABS std4_std4 -------- */
10728 /* -------- 1,xx,00111 SQNEG std4_std4 -------- */
10729 Bool isNEG = bitU == 1;
10730 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
10731 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
10732 getQReg128(nn), size );
10733 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(qresFW));
10734 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(nresFW));
10735 putQReg128(dd, mkexpr(qres));
10736 updateQCFLAGwithDifference(qres, nres);
10737 const HChar arr = "bhsd"[size];
10738 DIP("%s %c%u, %c%u\n", isNEG ? "sqneg" : "sqabs", arr, dd, arr, nn);
10739 return True;
10742 if (size == X11 && opcode == BITS5(0,1,0,0,0)) {
10743 /* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0
10744 /* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0
10745 Bool isGT = bitU == 0;
10746 IRExpr* argL = getQReg128(nn);
10747 IRExpr* argR = mkV128(0x0000);
10748 IRTemp res = newTempV128();
10749 assign(res, isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
10750 : unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL)));
10751 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10752 DIP("cm%s d%u, d%u, #0\n", isGT ? "gt" : "ge", dd, nn);
10753 return True;
10756 if (size == X11 && opcode == BITS5(0,1,0,0,1)) {
10757 /* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0
10758 /* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0
10759 Bool isEQ = bitU == 0;
10760 IRExpr* argL = getQReg128(nn);
10761 IRExpr* argR = mkV128(0x0000);
10762 IRTemp res = newTempV128();
10763 assign(res, isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
10764 : unop(Iop_NotV128,
10765 binop(Iop_CmpGT64Sx2, argL, argR)));
10766 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10767 DIP("cm%s d%u, d%u, #0\n", isEQ ? "eq" : "le", dd, nn);
10768 return True;
10771 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,0)) {
10772 /* -------- 0,11,01010: CMLT d_d_#0 -------- */ // <s 0
10773 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
10774 binop(Iop_CmpGT64Sx2, mkV128(0x0000),
10775 getQReg128(nn))));
10776 DIP("cm%s d%u, d%u, #0\n", "lt", dd, nn);
10777 return True;
10780 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
10781 /* -------- 0,11,01011 ABS d_d -------- */
10782 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
10783 unop(Iop_Abs64x2, getQReg128(nn))));
10784 DIP("abs d%u, d%u\n", dd, nn);
10785 return True;
10788 if (bitU == 1 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
10789 /* -------- 1,11,01011 NEG d_d -------- */
10790 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
10791 binop(Iop_Sub64x2, mkV128(0x0000), getQReg128(nn))));
10792 DIP("neg d%u, d%u\n", dd, nn);
10793 return True;
10796 UInt ix = 0; /*INVALID*/
10797 if (size >= X10) {
10798 switch (opcode) {
10799 case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
10800 case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
10801 case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
10802 default: break;
10805 if (ix > 0) {
10806 /* -------- 0,1x,01100 FCMGT d_d_#0.0, s_s_#0.0 (ix 1) -------- */
10807 /* -------- 0,1x,01101 FCMEQ d_d_#0.0, s_s_#0.0 (ix 2) -------- */
10808 /* -------- 0,1x,01110 FCMLT d_d_#0.0, s_s_#0.0 (ix 3) -------- */
10809 /* -------- 1,1x,01100 FCMGE d_d_#0.0, s_s_#0.0 (ix 4) -------- */
10810 /* -------- 1,1x,01101 FCMLE d_d_#0.0, s_s_#0.0 (ix 5) -------- */
10811 Bool isD = size == X11;
10812 IRType ity = isD ? Ity_F64 : Ity_F32;
10813 IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
10814 IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
10815 IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
10816 IROp opCmp = Iop_INVALID;
10817 Bool swap = False;
10818 const HChar* nm = "??";
10819 switch (ix) {
10820 case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
10821 case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
10822 case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
10823 case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
10824 case 5: nm = "fcmle"; opCmp = opCmpLE; break;
10825 default: vassert(0);
10827 IRExpr* zero = mkV128(0x0000);
10828 IRTemp res = newTempV128();
10829 assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
10830 : binop(opCmp, getQReg128(nn), zero));
10831 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10832 mkexpr(res))));
10834 DIP("%s %s, %s, #0.0\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
10835 return True;
10838 if (opcode == BITS5(1,0,1,0,0)
10839 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
10840 /* -------- 0,xx,10100: SQXTN -------- */
10841 /* -------- 1,xx,10100: UQXTN -------- */
10842 /* -------- 1,xx,10010: SQXTUN -------- */
10843 if (size == X11) return False;
10844 vassert(size < 3);
10845 IROp opN = Iop_INVALID;
10846 Bool zWiden = True;
10847 const HChar* nm = "??";
10848 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
10849 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
10851 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
10852 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
10854 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
10855 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
10857 else vassert(0);
10858 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10859 size+1, getQReg128(nn));
10860 IRTemp resN = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10861 size, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
10862 putQReg128(dd, mkexpr(resN));
10863 /* This widens zero lanes to zero, and compares it against zero, so all
10864 of the non-participating lanes make no contribution to the
10865 Q flag state. */
10866 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
10867 size, mkexpr(resN));
10868 updateQCFLAGwithDifference(src, resW);
10869 const HChar arrNarrow = "bhsd"[size];
10870 const HChar arrWide = "bhsd"[size+1];
10871 DIP("%s %c%u, %c%u\n", nm, arrNarrow, dd, arrWide, nn);
10872 return True;
10875 if (opcode == BITS5(1,0,1,1,0) && bitU == 1 && size == X01) {
10876 /* -------- 1,01,10110 FCVTXN s_d -------- */
10877 /* Using Irrm_NEAREST here isn't right. The docs say "round to
10878 odd" but I don't know what that really means. */
10879 putQRegLO(dd,
10880 binop(Iop_F64toF32, mkU32(Irrm_NEAREST),
10881 getQRegLO(nn, Ity_F64)));
10882 putQRegLane(dd, 1, mkU32(0));
10883 putQRegLane(dd, 1, mkU64(0));
10884 DIP("fcvtxn s%u, d%u\n", dd, nn);
10885 return True;
10888 ix = 0; /*INVALID*/
10889 switch (opcode) {
10890 case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
10891 case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
10892 case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
10893 default: break;
10895 if (ix > 0) {
10896 /* -------- 0,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
10897 /* -------- 0,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
10898 /* -------- 0,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
10899 /* -------- 0,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
10900 /* -------- 0,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
10901 /* -------- 1,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
10902 /* -------- 1,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
10903 /* -------- 1,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
10904 /* -------- 1,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
10905 /* -------- 1,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
10906 Bool isD = (size & 1) == 1;
10907 IRType tyF = isD ? Ity_F64 : Ity_F32;
10908 IRType tyI = isD ? Ity_I64 : Ity_I32;
10909 IRRoundingMode irrm = 8; /*impossible*/
10910 HChar ch = '?';
10911 switch (ix) {
10912 case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
10913 case 2: ch = 'm'; irrm = Irrm_NegINF; break;
10914 case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
10915 case 4: ch = 'p'; irrm = Irrm_PosINF; break;
10916 case 5: ch = 'z'; irrm = Irrm_ZERO; break;
10917 default: vassert(0);
10919 IROp cvt = Iop_INVALID;
10920 if (bitU == 1) {
10921 cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
10922 } else {
10923 cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
10925 IRTemp src = newTemp(tyF);
10926 IRTemp res = newTemp(tyI);
10927 assign(src, getQRegLane(nn, 0, tyF));
10928 assign(res, binop(cvt, mkU32(irrm), mkexpr(src)));
10929 putQRegLane(dd, 0, mkexpr(res)); /* bits 31-0 or 63-0 */
10930 if (!isD) {
10931 putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
10933 putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */
10934 HChar sOrD = isD ? 'd' : 's';
10935 DIP("fcvt%c%c %c%u, %c%u\n", ch, bitU == 1 ? 'u' : 's',
10936 sOrD, dd, sOrD, nn);
10937 return True;
10940 if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
10941 /* -------- 0,0x,11101: SCVTF d_d, s_s -------- */
10942 /* -------- 1,0x,11101: UCVTF d_d, s_s -------- */
10943 Bool isU = bitU == 1;
10944 Bool isD = (size & 1) == 1;
10945 IRType tyI = isD ? Ity_I64 : Ity_I32;
10946 IROp iop = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
10947 : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
10948 IRTemp rm = mk_get_IR_rounding_mode();
10949 putQRegLO(dd, binop(iop, mkexpr(rm), getQRegLO(nn, tyI)));
10950 if (!isD) {
10951 putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
10953 putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */
10954 HChar c = isD ? 'd' : 's';
10955 DIP("%ccvtf %c%u, %c%u\n", isU ? 'u' : 's', c, dd, c, nn);
10956 return True;
10959 if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
10960 /* -------- 0,1x,11101: FRECPE d_d, s_s -------- */
10961 /* -------- 1,1x,11101: FRSQRTE d_d, s_s -------- */
10962 Bool isSQRT = bitU == 1;
10963 Bool isD = (size & 1) == 1;
10964 IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
10965 : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
10966 IRTemp resV = newTempV128();
10967 assign(resV, unop(op, getQReg128(nn)));
10968 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10969 mkexpr(resV))));
10970 HChar c = isD ? 'd' : 's';
10971 DIP("%s %c%u, %c%u\n", isSQRT ? "frsqrte" : "frecpe", c, dd, c, nn);
10972 return True;
10975 if (bitU == 0 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
10976 /* -------- 0,1x,11111: FRECPX d_d, s_s -------- */
10977 Bool isD = (size & 1) == 1;
10978 IRType ty = isD ? Ity_F64 : Ity_F32;
10979 IROp op = isD ? Iop_RecpExpF64 : Iop_RecpExpF32;
10980 IRTemp res = newTemp(ty);
10981 IRTemp rm = mk_get_IR_rounding_mode();
10982 assign(res, binop(op, mkexpr(rm), getQRegLane(nn, 0, ty)));
10983 putQReg128(dd, mkV128(0x0000));
10984 putQRegLane(dd, 0, mkexpr(res));
10985 HChar c = isD ? 'd' : 's';
10986 DIP("%s %c%u, %c%u\n", "frecpx", c, dd, c, nn);
10987 return True;
10990 return False;
10991 # undef INSN
10995 static
10996 Bool dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult* dres, UInt insn)
10998 /* 31 28 23 21 20 19 15 11 9 4
10999 01 U 11111 size L M m opcode H 0 n d
11000 Decode fields are: u,size,opcode
11001 M is really part of the mm register number. Individual
11002 cases need to inspect L and H though.
11004 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11005 if (INSN(31,30) != BITS2(0,1)
11006 || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) != 0) {
11007 return False;
11009 UInt bitU = INSN(29,29);
11010 UInt size = INSN(23,22);
11011 UInt bitL = INSN(21,21);
11012 UInt bitM = INSN(20,20);
11013 UInt mmLO4 = INSN(19,16);
11014 UInt opcode = INSN(15,12);
11015 UInt bitH = INSN(11,11);
11016 UInt nn = INSN(9,5);
11017 UInt dd = INSN(4,0);
11018 vassert(size < 4);
11019 vassert(bitH < 2 && bitM < 2 && bitL < 2);
11021 if (bitU == 0 && size >= X10
11022 && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
11023 /* -------- 0,1x,0001 FMLA d_d_d[], s_s_s[] -------- */
11024 /* -------- 0,1x,0101 FMLS d_d_d[], s_s_s[] -------- */
11025 Bool isD = (size & 1) == 1;
11026 Bool isSUB = opcode == BITS4(0,1,0,1);
11027 UInt index;
11028 if (!isD) index = (bitH << 1) | bitL;
11029 else if (isD && bitL == 0) index = bitH;
11030 else return False; // sz:L == x11 => unallocated encoding
11031 vassert(index < (isD ? 2 : 4));
11032 IRType ity = isD ? Ity_F64 : Ity_F32;
11033 IRTemp elem = newTemp(ity);
11034 UInt mm = (bitM << 4) | mmLO4;
11035 assign(elem, getQRegLane(mm, index, ity));
11036 IRTemp dupd = math_DUP_TO_V128(elem, ity);
11037 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
11038 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
11039 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
11040 IRTemp rm = mk_get_IR_rounding_mode();
11041 IRTemp t1 = newTempV128();
11042 IRTemp t2 = newTempV128();
11043 // FIXME: double rounding; use FMA primops instead
11044 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
11045 assign(t2, triop(isSUB ? opSUB : opADD,
11046 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
11047 putQReg128(dd,
11048 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
11049 mkexpr(t2))));
11050 const HChar c = isD ? 'd' : 's';
11051 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
11052 c, dd, c, nn, nameQReg128(mm), c, index);
11053 return True;
11056 if (size >= X10 && opcode == BITS4(1,0,0,1)) {
11057 /* -------- 0,1x,1001 FMUL d_d_d[], s_s_s[] -------- */
11058 /* -------- 1,1x,1001 FMULX d_d_d[], s_s_s[] -------- */
11059 Bool isD = (size & 1) == 1;
11060 Bool isMULX = bitU == 1;
11061 UInt index;
11062 if (!isD) index = (bitH << 1) | bitL;
11063 else if (isD && bitL == 0) index = bitH;
11064 else return False; // sz:L == x11 => unallocated encoding
11065 vassert(index < (isD ? 2 : 4));
11066 IRType ity = isD ? Ity_F64 : Ity_F32;
11067 IRTemp elem = newTemp(ity);
11068 UInt mm = (bitM << 4) | mmLO4;
11069 assign(elem, getQRegLane(mm, index, ity));
11070 IRTemp dupd = math_DUP_TO_V128(elem, ity);
11071 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
11072 IRTemp rm = mk_get_IR_rounding_mode();
11073 IRTemp t1 = newTempV128();
11074 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
11075 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
11076 putQReg128(dd,
11077 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
11078 mkexpr(t1))));
11079 const HChar c = isD ? 'd' : 's';
11080 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isMULX ? "fmulx" : "fmul",
11081 c, dd, c, nn, nameQReg128(mm), c, index);
11082 return True;
11085 if (bitU == 0
11086 && (opcode == BITS4(1,0,1,1)
11087 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
11088 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
11089 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
11090 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
11091 /* Widens, and size refers to the narrowed lanes. */
11092 UInt ks = 3;
11093 switch (opcode) {
11094 case BITS4(1,0,1,1): ks = 0; break;
11095 case BITS4(0,0,1,1): ks = 1; break;
11096 case BITS4(0,1,1,1): ks = 2; break;
11097 default: vassert(0);
11099 vassert(ks >= 0 && ks <= 2);
11100 UInt mm = 32; // invalid
11101 UInt ix = 16; // invalid
11102 switch (size) {
11103 case X00:
11104 return False; // h_b_b[] case is not allowed
11105 case X01:
11106 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
11107 case X10:
11108 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
11109 case X11:
11110 return False; // q_d_d[] case is not allowed
11111 default:
11112 vassert(0);
11114 vassert(mm < 32 && ix < 16);
11115 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
11116 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
11117 newTempsV128_2(&vecN, &vecD);
11118 assign(vecN, getQReg128(nn));
11119 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
11120 assign(vecD, getQReg128(dd));
11121 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
11122 False/*!is2*/, size, "mas"[ks],
11123 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
11124 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
11125 putQReg128(dd, unop(opZHI, mkexpr(res)));
11126 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
11127 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
11128 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
11129 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
11131 const HChar* nm = ks == 0 ? "sqmull"
11132 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
11133 const HChar arrNarrow = "bhsd"[size];
11134 const HChar arrWide = "bhsd"[size+1];
11135 DIP("%s %c%u, %c%u, v%u.%c[%u]\n",
11136 nm, arrWide, dd, arrNarrow, nn, dd, arrNarrow, ix);
11137 return True;
11140 if (bitU == 0 && (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1))) {
11141 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
11142 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
11143 UInt mm = 32; // invalid
11144 UInt ix = 16; // invalid
11145 switch (size) {
11146 case X00:
11147 return False; // b case is not allowed
11148 case X01:
11149 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
11150 case X10:
11151 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
11152 case X11:
11153 return False; // q case is not allowed
11154 default:
11155 vassert(0);
11157 vassert(mm < 32 && ix < 16);
11158 Bool isR = opcode == BITS4(1,1,0,1);
11159 IRTemp res, sat1q, sat1n, vN, vM;
11160 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
11161 vN = newTempV128();
11162 assign(vN, getQReg128(nn));
11163 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
11164 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
11165 IROp opZHI = mkVecZEROHIxxOFV128(size);
11166 putQReg128(dd, unop(opZHI, mkexpr(res)));
11167 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
11168 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
11169 HChar ch = size == X01 ? 'h' : 's';
11170 DIP("%s %c%u, %c%u, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, (Int)dd, ix);
11171 return True;
11174 if (bitU == 1 && (opcode == BITS4(1,1,0,1) || opcode == BITS4(1,1,1,1))) {
11175 /* -------- 0,xx,1101 SQRDMLAH s and h variants only -------- */
11176 /* -------- 0,xx,1111 SQRDMLSH s and h variants only -------- */
11177 UInt mm = 32; // invalid
11178 UInt ix = 16; // invalid
11179 switch (size) {
11180 case X00:
11181 return False; // b case is not allowed
11182 case X01:
11183 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
11184 case X10:
11185 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
11186 case X11:
11187 return False; // d case is not allowed
11188 default:
11189 vassert(0);
11191 vassert(size < 4);
11192 vassert(mm < 32 && ix < 16);
11193 Bool isAdd = opcode == BITS4(1,1,0,1);
11195 IRTemp res, res_nosat, vD, vN, vM;
11196 res = res_nosat = vD = vN = vM = IRTemp_INVALID;
11197 newTempsV128_2(&vD, &vN);
11198 assign(vD, getQReg128(dd));
11199 assign(vN, getQReg128(nn));
11200 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
11202 math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM);
11203 IROp opZHI = mkVecZEROHIxxOFV128(size);
11204 putQReg128(dd, unop(opZHI, mkexpr(res)));
11205 updateQCFLAGwithDifferenceZHI(res, res_nosat, opZHI);
11207 const HChar* nm = isAdd ? "sqrdmlah" : "sqrdmlsh";
11208 HChar ch = size == X01 ? 'h' : 's';
11209 DIP("%s %c%u, %c%u, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, (Int)dd, ix);
11210 return True;
11213 return False;
11214 # undef INSN
11218 static
11219 Bool dis_AdvSIMD_shift_by_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
11221 /* 31 28 22 18 15 10 9 4
11222 0 q u 011110 immh immb opcode 1 n d
11223 Decode fields: u,opcode
11225 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11226 if (INSN(31,31) != 0
11227 || INSN(28,23) != BITS6(0,1,1,1,1,0) || INSN(10,10) != 1) {
11228 return False;
11230 UInt bitQ = INSN(30,30);
11231 UInt bitU = INSN(29,29);
11232 UInt immh = INSN(22,19);
11233 UInt immb = INSN(18,16);
11234 UInt opcode = INSN(15,11);
11235 UInt nn = INSN(9,5);
11236 UInt dd = INSN(4,0);
11238 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0)) {
11239 /* -------- 0,00000 SSHR std7_std7_#imm -------- */
11240 /* -------- 1,00000 USHR std7_std7_#imm -------- */
11241 /* -------- 0,00010 SSRA std7_std7_#imm -------- */
11242 /* -------- 1,00010 USRA std7_std7_#imm -------- */
11243 /* laneTy, shift = case immh:immb of
11244 0001:xxx -> B, SHR:8-xxx
11245 001x:xxx -> H, SHR:16-xxxx
11246 01xx:xxx -> S, SHR:32-xxxxx
11247 1xxx:xxx -> D, SHR:64-xxxxxx
11248 other -> invalid
11250 UInt size = 0;
11251 UInt shift = 0;
11252 Bool isQ = bitQ == 1;
11253 Bool isU = bitU == 1;
11254 Bool isAcc = opcode == BITS5(0,0,0,1,0);
11255 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
11256 if (!ok || (bitQ == 0 && size == X11)) return False;
11257 vassert(size >= 0 && size <= 3);
11258 UInt lanebits = 8 << size;
11259 vassert(shift >= 1 && shift <= lanebits);
11260 IROp op = isU ? mkVecSHRN(size) : mkVecSARN(size);
11261 IRExpr* src = getQReg128(nn);
11262 IRTemp shf = newTempV128();
11263 IRTemp res = newTempV128();
11264 if (shift == lanebits && isU) {
11265 assign(shf, mkV128(0x0000));
11266 } else {
11267 UInt nudge = 0;
11268 if (shift == lanebits) {
11269 vassert(!isU);
11270 nudge = 1;
11272 assign(shf, binop(op, src, mkU8(shift - nudge)));
11274 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
11275 : mkexpr(shf));
11276 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11277 HChar laneCh = "bhsd"[size];
11278 UInt nLanes = (isQ ? 128 : 64) / lanebits;
11279 const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
11280 : (isU ? "ushr" : "sshr");
11281 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
11282 nameQReg128(dd), nLanes, laneCh,
11283 nameQReg128(nn), nLanes, laneCh, shift);
11284 return True;
11287 if (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0)) {
11288 /* -------- 0,00100 SRSHR std7_std7_#imm -------- */
11289 /* -------- 1,00100 URSHR std7_std7_#imm -------- */
11290 /* -------- 0,00110 SRSRA std7_std7_#imm -------- */
11291 /* -------- 1,00110 URSRA std7_std7_#imm -------- */
11292 /* laneTy, shift = case immh:immb of
11293 0001:xxx -> B, SHR:8-xxx
11294 001x:xxx -> H, SHR:16-xxxx
11295 01xx:xxx -> S, SHR:32-xxxxx
11296 1xxx:xxx -> D, SHR:64-xxxxxx
11297 other -> invalid
11299 UInt size = 0;
11300 UInt shift = 0;
11301 Bool isQ = bitQ == 1;
11302 Bool isU = bitU == 1;
11303 Bool isAcc = opcode == BITS5(0,0,1,1,0);
11304 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
11305 if (!ok || (bitQ == 0 && size == X11)) return False;
11306 vassert(size >= 0 && size <= 3);
11307 UInt lanebits = 8 << size;
11308 vassert(shift >= 1 && shift <= lanebits);
11309 IROp op = isU ? mkVecRSHU(size) : mkVecRSHS(size);
11310 IRExpr* src = getQReg128(nn);
11311 IRTemp imm8 = newTemp(Ity_I8);
11312 assign(imm8, mkU8((UChar)(-shift)));
11313 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
11314 IRTemp shf = newTempV128();
11315 IRTemp res = newTempV128();
11316 assign(shf, binop(op, src, amt));
11317 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
11318 : mkexpr(shf));
11319 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11320 HChar laneCh = "bhsd"[size];
11321 UInt nLanes = (isQ ? 128 : 64) / lanebits;
11322 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
11323 : (isU ? "urshr" : "srshr");
11324 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
11325 nameQReg128(dd), nLanes, laneCh,
11326 nameQReg128(nn), nLanes, laneCh, shift);
11327 return True;
11330 if (bitU == 1 && opcode == BITS5(0,1,0,0,0)) {
11331 /* -------- 1,01000 SRI std7_std7_#imm -------- */
11332 /* laneTy, shift = case immh:immb of
11333 0001:xxx -> B, SHR:8-xxx
11334 001x:xxx -> H, SHR:16-xxxx
11335 01xx:xxx -> S, SHR:32-xxxxx
11336 1xxx:xxx -> D, SHR:64-xxxxxx
11337 other -> invalid
11339 UInt size = 0;
11340 UInt shift = 0;
11341 Bool isQ = bitQ == 1;
11342 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
11343 if (!ok || (bitQ == 0 && size == X11)) return False;
11344 vassert(size >= 0 && size <= 3);
11345 UInt lanebits = 8 << size;
11346 vassert(shift >= 1 && shift <= lanebits);
11347 IRExpr* src = getQReg128(nn);
11348 IRTemp res = newTempV128();
11349 if (shift == lanebits) {
11350 assign(res, getQReg128(dd));
11351 } else {
11352 assign(res, binop(mkVecSHRN(size), src, mkU8(shift)));
11353 IRExpr* nmask = binop(mkVecSHLN(size),
11354 mkV128(0xFFFF), mkU8(lanebits - shift));
11355 IRTemp tmp = newTempV128();
11356 assign(tmp, binop(Iop_OrV128,
11357 mkexpr(res),
11358 binop(Iop_AndV128, getQReg128(dd), nmask)));
11359 res = tmp;
11361 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11362 HChar laneCh = "bhsd"[size];
11363 UInt nLanes = (isQ ? 128 : 64) / lanebits;
11364 DIP("%s %s.%u%c, %s.%u%c, #%u\n", "sri",
11365 nameQReg128(dd), nLanes, laneCh,
11366 nameQReg128(nn), nLanes, laneCh, shift);
11367 return True;
11370 if (opcode == BITS5(0,1,0,1,0)) {
11371 /* -------- 0,01010 SHL std7_std7_#imm -------- */
11372 /* -------- 1,01010 SLI std7_std7_#imm -------- */
11373 /* laneTy, shift = case immh:immb of
11374 0001:xxx -> B, xxx
11375 001x:xxx -> H, xxxx
11376 01xx:xxx -> S, xxxxx
11377 1xxx:xxx -> D, xxxxxx
11378 other -> invalid
11380 UInt size = 0;
11381 UInt shift = 0;
11382 Bool isSLI = bitU == 1;
11383 Bool isQ = bitQ == 1;
11384 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
11385 if (!ok || (bitQ == 0 && size == X11)) return False;
11386 vassert(size >= 0 && size <= 3);
11387 /* The shift encoding has opposite sign for the leftwards case.
11388 Adjust shift to compensate. */
11389 UInt lanebits = 8 << size;
11390 shift = lanebits - shift;
11391 vassert(shift >= 0 && shift < lanebits);
11392 IROp op = mkVecSHLN(size);
11393 IRExpr* src = getQReg128(nn);
11394 IRTemp res = newTempV128();
11395 if (shift == 0) {
11396 assign(res, src);
11397 } else {
11398 assign(res, binop(op, src, mkU8(shift)));
11399 if (isSLI) {
11400 IRExpr* nmask = binop(mkVecSHRN(size),
11401 mkV128(0xFFFF), mkU8(lanebits - shift));
11402 IRTemp tmp = newTempV128();
11403 assign(tmp, binop(Iop_OrV128,
11404 mkexpr(res),
11405 binop(Iop_AndV128, getQReg128(dd), nmask)));
11406 res = tmp;
11409 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11410 HChar laneCh = "bhsd"[size];
11411 UInt nLanes = (isQ ? 128 : 64) / lanebits;
11412 const HChar* nm = isSLI ? "sli" : "shl";
11413 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
11414 nameQReg128(dd), nLanes, laneCh,
11415 nameQReg128(nn), nLanes, laneCh, shift);
11416 return True;
11419 if (opcode == BITS5(0,1,1,1,0)
11420 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
11421 /* -------- 0,01110 SQSHL std7_std7_#imm -------- */
11422 /* -------- 1,01110 UQSHL std7_std7_#imm -------- */
11423 /* -------- 1,01100 SQSHLU std7_std7_#imm -------- */
11424 UInt size = 0;
11425 UInt shift = 0;
11426 Bool isQ = bitQ == 1;
11427 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
11428 if (!ok || (bitQ == 0 && size == X11)) return False;
11429 vassert(size >= 0 && size <= 3);
11430 /* The shift encoding has opposite sign for the leftwards case.
11431 Adjust shift to compensate. */
11432 UInt lanebits = 8 << size;
11433 shift = lanebits - shift;
11434 vassert(shift >= 0 && shift < lanebits);
11435 const HChar* nm = NULL;
11436 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
11437 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
11438 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
11439 else vassert(0);
11440 IRTemp qDiff1 = IRTemp_INVALID;
11441 IRTemp qDiff2 = IRTemp_INVALID;
11442 IRTemp res = IRTemp_INVALID;
11443 IRTemp src = newTempV128();
11444 assign(src, getQReg128(nn));
11445 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
11446 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11447 updateQCFLAGwithDifferenceZHI(qDiff1, qDiff2,
11448 isQ ? Iop_INVALID : Iop_ZeroHI64ofV128);
11449 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11450 DIP("%s %s.%s, %s.%s, #%u\n", nm,
11451 nameQReg128(dd), arr, nameQReg128(nn), arr, shift);
11452 return True;
11455 if (bitU == 0
11456 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) {
11457 /* -------- 0,10000 SHRN{,2} #imm -------- */
11458 /* -------- 0,10001 RSHRN{,2} #imm -------- */
11459 /* Narrows, and size is the narrow size. */
11460 UInt size = 0;
11461 UInt shift = 0;
11462 Bool is2 = bitQ == 1;
11463 Bool isR = opcode == BITS5(1,0,0,0,1);
11464 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
11465 if (!ok || size == X11) return False;
11466 vassert(shift >= 1);
11467 IRTemp t1 = newTempV128();
11468 IRTemp t2 = newTempV128();
11469 IRTemp t3 = newTempV128();
11470 assign(t1, getQReg128(nn));
11471 assign(t2, isR ? binop(mkVecADD(size+1),
11472 mkexpr(t1),
11473 mkexpr(math_VEC_DUP_IMM(size+1, 1ULL<<(shift-1))))
11474 : mkexpr(t1));
11475 assign(t3, binop(mkVecSHRN(size+1), mkexpr(t2), mkU8(shift)));
11476 IRTemp t4 = math_NARROW_LANES(t3, t3, size);
11477 putLO64andZUorPutHI64(is2, dd, t4);
11478 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11479 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11480 DIP("%s %s.%s, %s.%s, #%u\n", isR ? "rshrn" : "shrn",
11481 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
11482 return True;
11485 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
11486 || (bitU == 1
11487 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
11488 /* -------- 0,10010 SQSHRN{,2} #imm -------- */
11489 /* -------- 1,10010 UQSHRN{,2} #imm -------- */
11490 /* -------- 0,10011 SQRSHRN{,2} #imm -------- */
11491 /* -------- 1,10011 UQRSHRN{,2} #imm -------- */
11492 /* -------- 1,10000 SQSHRUN{,2} #imm -------- */
11493 /* -------- 1,10001 SQRSHRUN{,2} #imm -------- */
11494 UInt size = 0;
11495 UInt shift = 0;
11496 Bool is2 = bitQ == 1;
11497 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
11498 if (!ok || size == X11) return False;
11499 vassert(shift >= 1 && shift <= (8 << size));
11500 const HChar* nm = "??";
11501 IROp op = Iop_INVALID;
11502 /* Decide on the name and the operation. */
11503 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
11504 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
11506 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
11507 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
11509 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
11510 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
11512 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
11513 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
11515 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
11516 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
11518 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
11519 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
11521 else vassert(0);
11522 /* Compute the result (Q, shifted value) pair. */
11523 IRTemp src128 = newTempV128();
11524 assign(src128, getQReg128(nn));
11525 IRTemp pair = newTempV128();
11526 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
11527 /* Update the result reg */
11528 IRTemp res64in128 = newTempV128();
11529 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
11530 putLO64andZUorPutHI64(is2, dd, res64in128);
11531 /* Update the Q flag. */
11532 IRTemp q64q64 = newTempV128();
11533 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
11534 IRTemp z128 = newTempV128();
11535 assign(z128, mkV128(0x0000));
11536 updateQCFLAGwithDifference(q64q64, z128);
11537 /* */
11538 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11539 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11540 DIP("%s %s.%s, %s.%s, #%u\n", nm,
11541 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
11542 return True;
11545 if (opcode == BITS5(1,0,1,0,0)) {
11546 /* -------- 0,10100 SSHLL{,2} #imm -------- */
11547 /* -------- 1,10100 USHLL{,2} #imm -------- */
11548 /* 31 28 22 18 15 9 4
11549 0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh
11550 0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh
11551 where Ta,Tb,sh
11552 = case immh of 1xxx -> invalid
11553 01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31)
11554 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15)
11555 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7)
11556 0000 -> AdvSIMD modified immediate (???)
11558 Bool isQ = bitQ == 1;
11559 Bool isU = bitU == 1;
11560 UInt immhb = (immh << 3) | immb;
11561 IRTemp src = newTempV128();
11562 IRTemp zero = newTempV128();
11563 IRExpr* res = NULL;
11564 UInt sh = 0;
11565 const HChar* ta = "??";
11566 const HChar* tb = "??";
11567 assign(src, getQReg128(nn));
11568 assign(zero, mkV128(0x0000));
11569 if (immh & 8) {
11570 /* invalid; don't assign to res */
11572 else if (immh & 4) {
11573 sh = immhb - 32;
11574 vassert(sh < 32); /* so 32-sh is 1..32 */
11575 ta = "2d";
11576 tb = isQ ? "4s" : "2s";
11577 IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
11578 : mk_InterleaveLO32x4(src, zero);
11579 res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
11581 else if (immh & 2) {
11582 sh = immhb - 16;
11583 vassert(sh < 16); /* so 16-sh is 1..16 */
11584 ta = "4s";
11585 tb = isQ ? "8h" : "4h";
11586 IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero)
11587 : mk_InterleaveLO16x8(src, zero);
11588 res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
11590 else if (immh & 1) {
11591 sh = immhb - 8;
11592 vassert(sh < 8); /* so 8-sh is 1..8 */
11593 ta = "8h";
11594 tb = isQ ? "16b" : "8b";
11595 IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
11596 : mk_InterleaveLO8x16(src, zero);
11597 res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
11598 } else {
11599 vassert(immh == 0);
11600 /* invalid; don't assign to res */
11602 /* */
11603 if (res) {
11604 putQReg128(dd, res);
11605 DIP("%cshll%s %s.%s, %s.%s, #%u\n",
11606 isU ? 'u' : 's', isQ ? "2" : "",
11607 nameQReg128(dd), ta, nameQReg128(nn), tb, sh);
11608 return True;
11610 return False;
11613 if (opcode == BITS5(1,1,1,0,0)) {
11614 /* -------- 0,11100 SCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
11615 /* -------- 1,11100 UCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
11616 /* If immh is of the form 00xx, the insn is invalid. */
11617 if (immh < BITS4(0,1,0,0)) return False;
11618 UInt size = 0;
11619 UInt fbits = 0;
11620 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
11621 /* The following holds because immh is never zero. */
11622 vassert(ok);
11623 /* The following holds because immh >= 0100. */
11624 vassert(size == X10 || size == X11);
11625 Bool isD = size == X11;
11626 Bool isU = bitU == 1;
11627 Bool isQ = bitQ == 1;
11628 if (isD && !isQ) return False; /* reject .1d case */
11629 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
11630 Double scale = two_to_the_minus(fbits);
11631 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
11632 : IRExpr_Const(IRConst_F32( (Float)scale ));
11633 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
11634 IROp opCVT = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
11635 : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
11636 IRType tyF = isD ? Ity_F64 : Ity_F32;
11637 IRType tyI = isD ? Ity_I64 : Ity_I32;
11638 UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
11639 vassert(nLanes == 2 || nLanes == 4);
11640 for (UInt i = 0; i < nLanes; i++) {
11641 IRTemp src = newTemp(tyI);
11642 IRTemp res = newTemp(tyF);
11643 IRTemp rm = mk_get_IR_rounding_mode();
11644 assign(src, getQRegLane(nn, i, tyI));
11645 assign(res, triop(opMUL, mkexpr(rm),
11646 binop(opCVT, mkexpr(rm), mkexpr(src)),
11647 scaleE));
11648 putQRegLane(dd, i, mkexpr(res));
11650 if (!isQ) {
11651 putQRegLane(dd, 1, mkU64(0));
11653 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11654 DIP("%s %s.%s, %s.%s, #%u\n", isU ? "ucvtf" : "scvtf",
11655 nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
11656 return True;
11659 if (opcode == BITS5(1,1,1,1,1)) {
11660 /* -------- 0,11111 FCVTZS {2d_2d,4s_4s,2s_2s}_imm -------- */
11661 /* -------- 1,11111 FCVTZU {2d_2d,4s_4s,2s_2s}_imm -------- */
11662 /* If immh is of the form 00xx, the insn is invalid. */
11663 if (immh < BITS4(0,1,0,0)) return False;
11664 UInt size = 0;
11665 UInt fbits = 0;
11666 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
11667 /* The following holds because immh is never zero. */
11668 vassert(ok);
11669 /* The following holds because immh >= 0100. */
11670 vassert(size == X10 || size == X11);
11671 Bool isD = size == X11;
11672 Bool isU = bitU == 1;
11673 Bool isQ = bitQ == 1;
11674 if (isD && !isQ) return False; /* reject .1d case */
11675 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
11676 Double scale = two_to_the_plus(fbits);
11677 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
11678 : IRExpr_Const(IRConst_F32( (Float)scale ));
11679 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
11680 IROp opCVT = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
11681 : (isD ? Iop_F64toI64S : Iop_F32toI32S);
11682 IRType tyF = isD ? Ity_F64 : Ity_F32;
11683 IRType tyI = isD ? Ity_I64 : Ity_I32;
11684 UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
11685 vassert(nLanes == 2 || nLanes == 4);
11686 for (UInt i = 0; i < nLanes; i++) {
11687 IRTemp src = newTemp(tyF);
11688 IRTemp res = newTemp(tyI);
11689 IRTemp rm = newTemp(Ity_I32);
11690 assign(src, getQRegLane(nn, i, tyF));
11691 assign(rm, mkU32(Irrm_ZERO));
11692 assign(res, binop(opCVT, mkexpr(rm),
11693 triop(opMUL, mkexpr(rm),
11694 mkexpr(src), scaleE)));
11695 putQRegLane(dd, i, mkexpr(res));
11697 if (!isQ) {
11698 putQRegLane(dd, 1, mkU64(0));
11700 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11701 DIP("%s %s.%s, %s.%s, #%u\n", isU ? "fcvtzu" : "fcvtzs",
11702 nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
11703 return True;
11706 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11707 return False;
11708 # undef INSN
11712 static
11713 Bool dis_AdvSIMD_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
11715 /* 31 30 29 28 23 21 20 15 11 9 4
11716 0 Q U 01110 size 1 m opcode 00 n d
11717 Decode fields: u,opcode
11719 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11720 if (INSN(31,31) != 0
11721 || INSN(28,24) != BITS5(0,1,1,1,0)
11722 || INSN(21,21) != 1
11723 || INSN(11,10) != BITS2(0,0)) {
11724 return False;
11726 UInt bitQ = INSN(30,30);
11727 UInt bitU = INSN(29,29);
11728 UInt size = INSN(23,22);
11729 UInt mm = INSN(20,16);
11730 UInt opcode = INSN(15,12);
11731 UInt nn = INSN(9,5);
11732 UInt dd = INSN(4,0);
11733 vassert(size < 4);
11734 Bool is2 = bitQ == 1;
11736 if (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,0,1,0)) {
11737 /* -------- 0,0000 SADDL{2} -------- */
11738 /* -------- 1,0000 UADDL{2} -------- */
11739 /* -------- 0,0010 SSUBL{2} -------- */
11740 /* -------- 1,0010 USUBL{2} -------- */
11741 /* Widens, and size refers to the narrow lanes. */
11742 if (size == X11) return False;
11743 vassert(size <= 2);
11744 Bool isU = bitU == 1;
11745 Bool isADD = opcode == BITS4(0,0,0,0);
11746 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
11747 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
11748 IRTemp res = newTempV128();
11749 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
11750 mkexpr(argL), mkexpr(argR)));
11751 putQReg128(dd, mkexpr(res));
11752 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11753 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11754 const HChar* nm = isADD ? (isU ? "uaddl" : "saddl")
11755 : (isU ? "usubl" : "ssubl");
11756 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11757 nameQReg128(dd), arrWide,
11758 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11759 return True;
11762 if (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,0,1,1)) {
11763 /* -------- 0,0001 SADDW{2} -------- */
11764 /* -------- 1,0001 UADDW{2} -------- */
11765 /* -------- 0,0011 SSUBW{2} -------- */
11766 /* -------- 1,0011 USUBW{2} -------- */
11767 /* Widens, and size refers to the narrow lanes. */
11768 if (size == X11) return False;
11769 vassert(size <= 2);
11770 Bool isU = bitU == 1;
11771 Bool isADD = opcode == BITS4(0,0,0,1);
11772 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
11773 IRTemp res = newTempV128();
11774 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
11775 getQReg128(nn), mkexpr(argR)));
11776 putQReg128(dd, mkexpr(res));
11777 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11778 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11779 const HChar* nm = isADD ? (isU ? "uaddw" : "saddw")
11780 : (isU ? "usubw" : "ssubw");
11781 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11782 nameQReg128(dd), arrWide,
11783 nameQReg128(nn), arrWide, nameQReg128(mm), arrNarrow);
11784 return True;
11787 if (opcode == BITS4(0,1,0,0) || opcode == BITS4(0,1,1,0)) {
11788 /* -------- 0,0100 ADDHN{2} -------- */
11789 /* -------- 1,0100 RADDHN{2} -------- */
11790 /* -------- 0,0110 SUBHN{2} -------- */
11791 /* -------- 1,0110 RSUBHN{2} -------- */
11792 /* Narrows, and size refers to the narrowed lanes. */
11793 if (size == X11) return False;
11794 vassert(size <= 2);
11795 const UInt shift[3] = { 8, 16, 32 };
11796 Bool isADD = opcode == BITS4(0,1,0,0);
11797 Bool isR = bitU == 1;
11798 /* Combined elements in wide lanes */
11799 IRTemp wide = newTempV128();
11800 IRExpr* wideE = binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
11801 getQReg128(nn), getQReg128(mm));
11802 if (isR) {
11803 wideE = binop(mkVecADD(size+1),
11804 wideE,
11805 mkexpr(math_VEC_DUP_IMM(size+1,
11806 1ULL << (shift[size]-1))));
11808 assign(wide, wideE);
11809 /* Top halves of elements, still in wide lanes */
11810 IRTemp shrd = newTempV128();
11811 assign(shrd, binop(mkVecSHRN(size+1), mkexpr(wide), mkU8(shift[size])));
11812 /* Elements now compacted into lower 64 bits */
11813 IRTemp new64 = newTempV128();
11814 assign(new64, binop(mkVecCATEVENLANES(size), mkexpr(shrd), mkexpr(shrd)));
11815 putLO64andZUorPutHI64(is2, dd, new64);
11816 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11817 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11818 const HChar* nm = isADD ? (isR ? "raddhn" : "addhn")
11819 : (isR ? "rsubhn" : "subhn");
11820 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11821 nameQReg128(dd), arrNarrow,
11822 nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
11823 return True;
11826 if (opcode == BITS4(0,1,0,1) || opcode == BITS4(0,1,1,1)) {
11827 /* -------- 0,0101 SABAL{2} -------- */
11828 /* -------- 1,0101 UABAL{2} -------- */
11829 /* -------- 0,0111 SABDL{2} -------- */
11830 /* -------- 1,0111 UABDL{2} -------- */
11831 /* Widens, and size refers to the narrow lanes. */
11832 if (size == X11) return False;
11833 vassert(size <= 2);
11834 Bool isU = bitU == 1;
11835 Bool isACC = opcode == BITS4(0,1,0,1);
11836 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
11837 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
11838 IRTemp abd = math_ABD(isU, size+1, mkexpr(argL), mkexpr(argR));
11839 IRTemp res = newTempV128();
11840 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(abd), getQReg128(dd))
11841 : mkexpr(abd));
11842 putQReg128(dd, mkexpr(res));
11843 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11844 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11845 const HChar* nm = isACC ? (isU ? "uabal" : "sabal")
11846 : (isU ? "uabdl" : "sabdl");
11847 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11848 nameQReg128(dd), arrWide,
11849 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11850 return True;
11853 if (opcode == BITS4(1,1,0,0)
11854 || opcode == BITS4(1,0,0,0) || opcode == BITS4(1,0,1,0)) {
11855 /* -------- 0,1100 SMULL{2} -------- */ // 0 (ks)
11856 /* -------- 1,1100 UMULL{2} -------- */ // 0
11857 /* -------- 0,1000 SMLAL{2} -------- */ // 1
11858 /* -------- 1,1000 UMLAL{2} -------- */ // 1
11859 /* -------- 0,1010 SMLSL{2} -------- */ // 2
11860 /* -------- 1,1010 UMLSL{2} -------- */ // 2
11861 /* Widens, and size refers to the narrow lanes. */
11862 UInt ks = 3;
11863 switch (opcode) {
11864 case BITS4(1,1,0,0): ks = 0; break;
11865 case BITS4(1,0,0,0): ks = 1; break;
11866 case BITS4(1,0,1,0): ks = 2; break;
11867 default: vassert(0);
11869 vassert(ks >= 0 && ks <= 2);
11870 if (size == X11) return False;
11871 vassert(size <= 2);
11872 Bool isU = bitU == 1;
11873 IRTemp vecN = newTempV128();
11874 IRTemp vecM = newTempV128();
11875 IRTemp vecD = newTempV128();
11876 assign(vecN, getQReg128(nn));
11877 assign(vecM, getQReg128(mm));
11878 assign(vecD, getQReg128(dd));
11879 IRTemp res = IRTemp_INVALID;
11880 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
11881 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
11882 putQReg128(dd, mkexpr(res));
11883 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11884 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11885 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
11886 DIP("%c%s%s %s.%s, %s.%s, %s.%s\n", isU ? 'u' : 's', nm, is2 ? "2" : "",
11887 nameQReg128(dd), arrWide,
11888 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11889 return True;
11892 if (bitU == 0
11893 && (opcode == BITS4(1,1,0,1)
11894 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
11895 /* -------- 0,1101 SQDMULL{2} -------- */ // 0 (ks)
11896 /* -------- 0,1001 SQDMLAL{2} -------- */ // 1
11897 /* -------- 0,1011 SQDMLSL{2} -------- */ // 2
11898 /* Widens, and size refers to the narrow lanes. */
11899 UInt ks = 3;
11900 switch (opcode) {
11901 case BITS4(1,1,0,1): ks = 0; break;
11902 case BITS4(1,0,0,1): ks = 1; break;
11903 case BITS4(1,0,1,1): ks = 2; break;
11904 default: vassert(0);
11906 vassert(ks >= 0 && ks <= 2);
11907 if (size == X00 || size == X11) return False;
11908 vassert(size <= 2);
11909 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
11910 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
11911 newTempsV128_3(&vecN, &vecM, &vecD);
11912 assign(vecN, getQReg128(nn));
11913 assign(vecM, getQReg128(mm));
11914 assign(vecD, getQReg128(dd));
11915 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
11916 is2, size, "mas"[ks],
11917 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
11918 putQReg128(dd, mkexpr(res));
11919 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
11920 updateQCFLAGwithDifference(sat1q, sat1n);
11921 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
11922 updateQCFLAGwithDifference(sat2q, sat2n);
11924 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11925 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11926 const HChar* nm = ks == 0 ? "sqdmull"
11927 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
11928 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11929 nameQReg128(dd), arrWide,
11930 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11931 return True;
11934 if (bitU == 0 && opcode == BITS4(1,1,1,0)) {
11935 /* -------- 0,1110 PMULL{2} -------- */
11936 /* Widens, and size refers to the narrow lanes. */
11937 if (size != X00 && size != X11) return False;
11938 IRTemp res = IRTemp_INVALID;
11939 IRExpr* srcN = getQReg128(nn);
11940 IRExpr* srcM = getQReg128(mm);
11941 const HChar* arrNarrow = NULL;
11942 const HChar* arrWide = NULL;
11943 if (size == X00) {
11944 res = math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8,
11945 srcN, srcM);
11946 arrNarrow = nameArr_Q_SZ(bitQ, size);
11947 arrWide = nameArr_Q_SZ(1, size+1);
11948 } else {
11949 /* The same thing as the X00 case, except we have to call
11950 a helper to do it. */
11951 vassert(size == X11);
11952 res = newTemp(Ity_V128);
11953 IROp slice
11954 = is2 ? Iop_V128HIto64 : Iop_V128to64;
11955 IRExpr** args
11956 = mkIRExprVec_3( IRExpr_VECRET(),
11957 unop(slice, srcN), unop(slice, srcM));
11958 IRDirty* di
11959 = unsafeIRDirty_1_N( res, 0/*regparms*/,
11960 "arm64g_dirtyhelper_PMULLQ",
11961 &arm64g_dirtyhelper_PMULLQ, args);
11962 stmt(IRStmt_Dirty(di));
11963 /* We can't use nameArr_Q_SZ for this because it can't deal with
11964 Q-sized (128 bit) results. Hence do it by hand. */
11965 arrNarrow = bitQ == 0 ? "1d" : "2d";
11966 arrWide = "1q";
11968 putQReg128(dd, mkexpr(res));
11969 DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2 ? "2" : "",
11970 nameQReg128(dd), arrWide,
11971 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11972 return True;
11975 return False;
11976 # undef INSN
11980 static
11981 Bool dis_AdvSIMD_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
11983 /* 31 30 29 28 23 21 20 15 10 9 4
11984 0 Q U 01110 size 1 m opcode 1 n d
11985 Decode fields: u,size,opcode
11987 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11988 if (INSN(31,31) != 0
11989 || INSN(28,24) != BITS5(0,1,1,1,0)
11990 || INSN(21,21) != 1
11991 || INSN(10,10) != 1) {
11992 return False;
11994 UInt bitQ = INSN(30,30);
11995 UInt bitU = INSN(29,29);
11996 UInt size = INSN(23,22);
11997 UInt mm = INSN(20,16);
11998 UInt opcode = INSN(15,11);
11999 UInt nn = INSN(9,5);
12000 UInt dd = INSN(4,0);
12001 vassert(size < 4);
12003 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,1,0,0)) {
12004 /* -------- 0,xx,00000 SHADD std6_std6_std6 -------- */
12005 /* -------- 1,xx,00000 UHADD std6_std6_std6 -------- */
12006 /* -------- 0,xx,00100 SHSUB std6_std6_std6 -------- */
12007 /* -------- 1,xx,00100 UHSUB std6_std6_std6 -------- */
12008 if (size == X11) return False;
12009 Bool isADD = opcode == BITS5(0,0,0,0,0);
12010 Bool isU = bitU == 1;
12011 /* Widen both args out, do the math, narrow to final result. */
12012 IRTemp argL = newTempV128();
12013 IRTemp argLhi = IRTemp_INVALID;
12014 IRTemp argLlo = IRTemp_INVALID;
12015 IRTemp argR = newTempV128();
12016 IRTemp argRhi = IRTemp_INVALID;
12017 IRTemp argRlo = IRTemp_INVALID;
12018 IRTemp resHi = newTempV128();
12019 IRTemp resLo = newTempV128();
12020 IRTemp res = IRTemp_INVALID;
12021 assign(argL, getQReg128(nn));
12022 argLlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argL));
12023 argLhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argL));
12024 assign(argR, getQReg128(mm));
12025 argRlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argR));
12026 argRhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argR));
12027 IROp opADDSUB = isADD ? mkVecADD(size+1) : mkVecSUB(size+1);
12028 IROp opSxR = isU ? mkVecSHRN(size+1) : mkVecSARN(size+1);
12029 assign(resHi, binop(opSxR,
12030 binop(opADDSUB, mkexpr(argLhi), mkexpr(argRhi)),
12031 mkU8(1)));
12032 assign(resLo, binop(opSxR,
12033 binop(opADDSUB, mkexpr(argLlo), mkexpr(argRlo)),
12034 mkU8(1)));
12035 res = math_NARROW_LANES ( resHi, resLo, size );
12036 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12037 const HChar* nm = isADD ? (isU ? "uhadd" : "shadd")
12038 : (isU ? "uhsub" : "shsub");
12039 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12040 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12041 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12042 return True;
12045 if (opcode == BITS5(0,0,0,1,0)) {
12046 /* -------- 0,xx,00010 SRHADD std7_std7_std7 -------- */
12047 /* -------- 1,xx,00010 URHADD std7_std7_std7 -------- */
12048 if (bitQ == 0 && size == X11) return False; // implied 1d case
12049 Bool isU = bitU == 1;
12050 IRTemp argL = newTempV128();
12051 IRTemp argR = newTempV128();
12052 assign(argL, getQReg128(nn));
12053 assign(argR, getQReg128(mm));
12054 IRTemp res = math_RHADD(size, isU, argL, argR);
12055 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12056 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12057 DIP("%s %s.%s, %s.%s, %s.%s\n", isU ? "urhadd" : "srhadd",
12058 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12059 return True;
12062 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
12063 /* -------- 0,xx,00001 SQADD std7_std7_std7 -------- */
12064 /* -------- 1,xx,00001 UQADD std7_std7_std7 -------- */
12065 /* -------- 0,xx,00101 SQSUB std7_std7_std7 -------- */
12066 /* -------- 1,xx,00101 UQSUB std7_std7_std7 -------- */
12067 if (bitQ == 0 && size == X11) return False; // implied 1d case
12068 Bool isADD = opcode == BITS5(0,0,0,0,1);
12069 Bool isU = bitU == 1;
12070 IROp qop = Iop_INVALID;
12071 IROp nop = Iop_INVALID;
12072 if (isADD) {
12073 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
12074 nop = mkVecADD(size);
12075 } else {
12076 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
12077 nop = mkVecSUB(size);
12079 IRTemp argL = newTempV128();
12080 IRTemp argR = newTempV128();
12081 IRTemp qres = newTempV128();
12082 IRTemp nres = newTempV128();
12083 assign(argL, getQReg128(nn));
12084 assign(argR, getQReg128(mm));
12085 assign(qres, math_MAYBE_ZERO_HI64_fromE(
12086 bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
12087 assign(nres, math_MAYBE_ZERO_HI64_fromE(
12088 bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
12089 putQReg128(dd, mkexpr(qres));
12090 updateQCFLAGwithDifference(qres, nres);
12091 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
12092 : (isU ? "uqsub" : "sqsub");
12093 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12094 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12095 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12096 return True;
12099 if (bitU == 0 && opcode == BITS5(0,0,0,1,1)) {
12100 /* -------- 0,00,00011 AND 16b_16b_16b, 8b_8b_8b -------- */
12101 /* -------- 0,01,00011 BIC 16b_16b_16b, 8b_8b_8b -------- */
12102 /* -------- 0,10,00011 ORR 16b_16b_16b, 8b_8b_8b -------- */
12103 /* -------- 0,10,00011 ORN 16b_16b_16b, 8b_8b_8b -------- */
12104 Bool isORx = (size & 2) == 2;
12105 Bool invert = (size & 1) == 1;
12106 IRTemp res = newTempV128();
12107 assign(res, binop(isORx ? Iop_OrV128 : Iop_AndV128,
12108 getQReg128(nn),
12109 invert ? unop(Iop_NotV128, getQReg128(mm))
12110 : getQReg128(mm)));
12111 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12112 const HChar* names[4] = { "and", "bic", "orr", "orn" };
12113 const HChar* ar = bitQ == 1 ? "16b" : "8b";
12114 DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)],
12115 nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar);
12116 return True;
12119 if (bitU == 1 && opcode == BITS5(0,0,0,1,1)) {
12120 /* -------- 1,00,00011 EOR 16b_16b_16b, 8b_8b_8b -------- */
12121 /* -------- 1,01,00011 BSL 16b_16b_16b, 8b_8b_8b -------- */
12122 /* -------- 1,10,00011 BIT 16b_16b_16b, 8b_8b_8b -------- */
12123 /* -------- 1,10,00011 BIF 16b_16b_16b, 8b_8b_8b -------- */
12124 IRTemp argD = newTempV128();
12125 IRTemp argN = newTempV128();
12126 IRTemp argM = newTempV128();
12127 assign(argD, getQReg128(dd));
12128 assign(argN, getQReg128(nn));
12129 assign(argM, getQReg128(mm));
12130 const IROp opXOR = Iop_XorV128;
12131 const IROp opAND = Iop_AndV128;
12132 const IROp opNOT = Iop_NotV128;
12133 IRTemp res = newTempV128();
12134 switch (size) {
12135 case BITS2(0,0): /* EOR */
12136 assign(res, binop(opXOR, mkexpr(argM), mkexpr(argN)));
12137 break;
12138 case BITS2(0,1): /* BSL */
12139 assign(res, binop(opXOR, mkexpr(argM),
12140 binop(opAND,
12141 binop(opXOR, mkexpr(argM), mkexpr(argN)),
12142 mkexpr(argD))));
12143 break;
12144 case BITS2(1,0): /* BIT */
12145 assign(res, binop(opXOR, mkexpr(argD),
12146 binop(opAND,
12147 binop(opXOR, mkexpr(argD), mkexpr(argN)),
12148 mkexpr(argM))));
12149 break;
12150 case BITS2(1,1): /* BIF */
12151 assign(res, binop(opXOR, mkexpr(argD),
12152 binop(opAND,
12153 binop(opXOR, mkexpr(argD), mkexpr(argN)),
12154 unop(opNOT, mkexpr(argM)))));
12155 break;
12156 default:
12157 vassert(0);
12159 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12160 const HChar* nms[4] = { "eor", "bsl", "bit", "bif" };
12161 const HChar* arr = bitQ == 1 ? "16b" : "8b";
12162 DIP("%s %s.%s, %s.%s, %s.%s\n", nms[size],
12163 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12164 return True;
12167 if (opcode == BITS5(0,0,1,1,0)) {
12168 /* -------- 0,xx,00110 CMGT std7_std7_std7 -------- */ // >s
12169 /* -------- 1,xx,00110 CMHI std7_std7_std7 -------- */ // >u
12170 if (bitQ == 0 && size == X11) return False; // implied 1d case
12171 Bool isGT = bitU == 0;
12172 IRExpr* argL = getQReg128(nn);
12173 IRExpr* argR = getQReg128(mm);
12174 IRTemp res = newTempV128();
12175 assign(res,
12176 isGT ? binop(mkVecCMPGTS(size), argL, argR)
12177 : binop(mkVecCMPGTU(size), argL, argR));
12178 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12179 const HChar* nm = isGT ? "cmgt" : "cmhi";
12180 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12181 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12182 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12183 return True;
12186 if (opcode == BITS5(0,0,1,1,1)) {
12187 /* -------- 0,xx,00111 CMGE std7_std7_std7 -------- */ // >=s
12188 /* -------- 1,xx,00111 CMHS std7_std7_std7 -------- */ // >=u
12189 if (bitQ == 0 && size == X11) return False; // implied 1d case
12190 Bool isGE = bitU == 0;
12191 IRExpr* argL = getQReg128(nn);
12192 IRExpr* argR = getQReg128(mm);
12193 IRTemp res = newTempV128();
12194 assign(res,
12195 isGE ? unop(Iop_NotV128, binop(mkVecCMPGTS(size), argR, argL))
12196 : unop(Iop_NotV128, binop(mkVecCMPGTU(size), argR, argL)));
12197 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12198 const HChar* nm = isGE ? "cmge" : "cmhs";
12199 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12200 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12201 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12202 return True;
12205 if (opcode == BITS5(0,1,0,0,0) || opcode == BITS5(0,1,0,1,0)) {
12206 /* -------- 0,xx,01000 SSHL std7_std7_std7 -------- */
12207 /* -------- 0,xx,01010 SRSHL std7_std7_std7 -------- */
12208 /* -------- 1,xx,01000 USHL std7_std7_std7 -------- */
12209 /* -------- 1,xx,01010 URSHL std7_std7_std7 -------- */
12210 if (bitQ == 0 && size == X11) return False; // implied 1d case
12211 Bool isU = bitU == 1;
12212 Bool isR = opcode == BITS5(0,1,0,1,0);
12213 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
12214 : (isU ? mkVecSHU(size) : mkVecSHS(size));
12215 IRTemp res = newTempV128();
12216 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
12217 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12218 const HChar* nm = isR ? (isU ? "urshl" : "srshl")
12219 : (isU ? "ushl" : "sshl");
12220 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12221 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12222 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12223 return True;
12226 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
12227 /* -------- 0,xx,01001 SQSHL std7_std7_std7 -------- */
12228 /* -------- 0,xx,01011 SQRSHL std7_std7_std7 -------- */
12229 /* -------- 1,xx,01001 UQSHL std7_std7_std7 -------- */
12230 /* -------- 1,xx,01011 UQRSHL std7_std7_std7 -------- */
12231 if (bitQ == 0 && size == X11) return False; // implied 1d case
12232 Bool isU = bitU == 1;
12233 Bool isR = opcode == BITS5(0,1,0,1,1);
12234 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
12235 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
12236 /* This is a bit tricky. If we're only interested in the lowest 64 bits
12237 of the result (viz, bitQ == 0), then we must adjust the operands to
12238 ensure that the upper part of the result, that we don't care about,
12239 doesn't pollute the returned Q value. To do this, zero out the upper
12240 operand halves beforehand. This works because it means, for the
12241 lanes we don't care about, we are shifting zero by zero, which can
12242 never saturate. */
12243 IRTemp res256 = newTemp(Ity_V256);
12244 IRTemp resSH = newTempV128();
12245 IRTemp resQ = newTempV128();
12246 IRTemp zero = newTempV128();
12247 assign(res256, binop(op,
12248 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(nn)),
12249 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(mm))));
12250 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
12251 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
12252 assign(zero, mkV128(0x0000));
12253 putQReg128(dd, mkexpr(resSH));
12254 updateQCFLAGwithDifference(resQ, zero);
12255 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
12256 : (isU ? "uqshl" : "sqshl");
12257 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12258 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12259 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12260 return True;
12263 if (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,0,1)) {
12264 /* -------- 0,xx,01100 SMAX std7_std7_std7 -------- */
12265 /* -------- 1,xx,01100 UMAX std7_std7_std7 -------- */
12266 /* -------- 0,xx,01101 SMIN std7_std7_std7 -------- */
12267 /* -------- 1,xx,01101 UMIN std7_std7_std7 -------- */
12268 if (bitQ == 0 && size == X11) return False; // implied 1d case
12269 Bool isU = bitU == 1;
12270 Bool isMAX = (opcode & 1) == 0;
12271 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
12272 : (isU ? mkVecMINU(size) : mkVecMINS(size));
12273 IRTemp t = newTempV128();
12274 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
12275 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
12276 const HChar* nm = isMAX ? (isU ? "umax" : "smax")
12277 : (isU ? "umin" : "smin");
12278 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12279 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12280 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12281 return True;
12284 if (opcode == BITS5(0,1,1,1,0) || opcode == BITS5(0,1,1,1,1)) {
12285 /* -------- 0,xx,01110 SABD std6_std6_std6 -------- */
12286 /* -------- 1,xx,01110 UABD std6_std6_std6 -------- */
12287 /* -------- 0,xx,01111 SABA std6_std6_std6 -------- */
12288 /* -------- 1,xx,01111 UABA std6_std6_std6 -------- */
12289 if (size == X11) return False; // 1d/2d cases not allowed
12290 Bool isU = bitU == 1;
12291 Bool isACC = opcode == BITS5(0,1,1,1,1);
12292 vassert(size <= 2);
12293 IRTemp t1 = math_ABD(isU, size, getQReg128(nn), getQReg128(mm));
12294 IRTemp t2 = newTempV128();
12295 assign(t2, isACC ? binop(mkVecADD(size), mkexpr(t1), getQReg128(dd))
12296 : mkexpr(t1));
12297 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
12298 const HChar* nm = isACC ? (isU ? "uaba" : "saba")
12299 : (isU ? "uabd" : "sabd");
12300 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12301 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12302 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12303 return True;
12306 if (opcode == BITS5(1,0,0,0,0)) {
12307 /* -------- 0,xx,10000 ADD std7_std7_std7 -------- */
12308 /* -------- 1,xx,10000 SUB std7_std7_std7 -------- */
12309 if (bitQ == 0 && size == X11) return False; // implied 1d case
12310 Bool isSUB = bitU == 1;
12311 IROp op = isSUB ? mkVecSUB(size) : mkVecADD(size);
12312 IRTemp t = newTempV128();
12313 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
12314 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
12315 const HChar* nm = isSUB ? "sub" : "add";
12316 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12317 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12318 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12319 return True;
12322 if (opcode == BITS5(1,0,0,0,1)) {
12323 /* -------- 0,xx,10001 CMTST std7_std7_std7 -------- */ // &, != 0
12324 /* -------- 1,xx,10001 CMEQ std7_std7_std7 -------- */ // ==
12325 if (bitQ == 0 && size == X11) return False; // implied 1d case
12326 Bool isEQ = bitU == 1;
12327 IRExpr* argL = getQReg128(nn);
12328 IRExpr* argR = getQReg128(mm);
12329 IRTemp res = newTempV128();
12330 assign(res,
12331 isEQ ? binop(mkVecCMPEQ(size), argL, argR)
12332 : unop(Iop_NotV128, binop(mkVecCMPEQ(size),
12333 binop(Iop_AndV128, argL, argR),
12334 mkV128(0x0000))));
12335 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12336 const HChar* nm = isEQ ? "cmeq" : "cmtst";
12337 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12338 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12339 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12340 return True;
12343 if (opcode == BITS5(1,0,0,1,0)) {
12344 /* -------- 0,xx,10010 MLA std7_std7_std7 -------- */
12345 /* -------- 1,xx,10010 MLS std7_std7_std7 -------- */
12346 if (bitQ == 0 && size == X11) return False; // implied 1d case
12347 Bool isMLS = bitU == 1;
12348 IROp opMUL = mkVecMUL(size);
12349 IROp opADDSUB = isMLS ? mkVecSUB(size) : mkVecADD(size);
12350 IRTemp res = newTempV128();
12351 if (opMUL != Iop_INVALID && opADDSUB != Iop_INVALID) {
12352 assign(res, binop(opADDSUB,
12353 getQReg128(dd),
12354 binop(opMUL, getQReg128(nn), getQReg128(mm))));
12355 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12356 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12357 DIP("%s %s.%s, %s.%s, %s.%s\n", isMLS ? "mls" : "mla",
12358 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12359 return True;
12361 return False;
12364 if (opcode == BITS5(1,0,0,1,1)) {
12365 /* -------- 0,xx,10011 MUL std7_std7_std7 -------- */
12366 /* -------- 1,xx,10011 PMUL 16b_16b_16b, 8b_8b_8b -------- */
12367 if (bitQ == 0 && size == X11) return False; // implied 1d case
12368 Bool isPMUL = bitU == 1;
12369 const IROp opsPMUL[4]
12370 = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
12371 IROp opMUL = isPMUL ? opsPMUL[size] : mkVecMUL(size);
12372 IRTemp res = newTempV128();
12373 if (opMUL != Iop_INVALID) {
12374 assign(res, binop(opMUL, getQReg128(nn), getQReg128(mm)));
12375 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12376 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12377 DIP("%s %s.%s, %s.%s, %s.%s\n", isPMUL ? "pmul" : "mul",
12378 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12379 return True;
12381 return False;
12384 if (opcode == BITS5(1,0,1,0,0) || opcode == BITS5(1,0,1,0,1)) {
12385 /* -------- 0,xx,10100 SMAXP std6_std6_std6 -------- */
12386 /* -------- 1,xx,10100 UMAXP std6_std6_std6 -------- */
12387 /* -------- 0,xx,10101 SMINP std6_std6_std6 -------- */
12388 /* -------- 1,xx,10101 UMINP std6_std6_std6 -------- */
12389 if (size == X11) return False;
12390 Bool isU = bitU == 1;
12391 Bool isMAX = opcode == BITS5(1,0,1,0,0);
12392 IRTemp vN = newTempV128();
12393 IRTemp vM = newTempV128();
12394 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
12395 : (isU ? mkVecMINU(size) : mkVecMINS(size));
12396 assign(vN, getQReg128(nn));
12397 assign(vM, getQReg128(mm));
12398 IRTemp res128 = newTempV128();
12399 assign(res128,
12400 binop(op,
12401 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
12402 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
12403 /* In the half-width case, use CatEL32x4 to extract the half-width
12404 result from the full-width result. */
12405 IRExpr* res
12406 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
12407 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
12408 mkexpr(res128)))
12409 : mkexpr(res128);
12410 putQReg128(dd, res);
12411 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12412 const HChar* nm = isMAX ? (isU ? "umaxp" : "smaxp")
12413 : (isU ? "uminp" : "sminp");
12414 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12415 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12416 return True;
12419 if (opcode == BITS5(1,0,1,1,0)) {
12420 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
12421 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
12422 if (size == X00 || size == X11) return False;
12423 Bool isR = bitU == 1;
12424 IRTemp res, sat1q, sat1n, vN, vM;
12425 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
12426 newTempsV128_2(&vN, &vM);
12427 assign(vN, getQReg128(nn));
12428 assign(vM, getQReg128(mm));
12429 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
12430 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12431 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
12432 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
12433 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12434 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
12435 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12436 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12437 return True;
12440 if (bitU == 0 && opcode == BITS5(1,0,1,1,1)) {
12441 /* -------- 0,xx,10111 ADDP std7_std7_std7 -------- */
12442 if (bitQ == 0 && size == X11) return False; // implied 1d case
12443 IRTemp vN = newTempV128();
12444 IRTemp vM = newTempV128();
12445 assign(vN, getQReg128(nn));
12446 assign(vM, getQReg128(mm));
12447 IRTemp res128 = newTempV128();
12448 assign(res128,
12449 binop(mkVecADD(size),
12450 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
12451 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
12452 /* In the half-width case, use CatEL32x4 to extract the half-width
12453 result from the full-width result. */
12454 IRExpr* res
12455 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
12456 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
12457 mkexpr(res128)))
12458 : mkexpr(res128);
12459 putQReg128(dd, res);
12460 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12461 DIP("addp %s.%s, %s.%s, %s.%s\n",
12462 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12463 return True;
12466 if (bitU == 0
12467 && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
12468 /* -------- 0,0x,11000 FMAXNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12469 /* -------- 0,1x,11000 FMINNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12470 /* -------- 0,0x,11110 FMAX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12471 /* -------- 0,1x,11110 FMIN 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12472 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
12473 Bool isD = (size & 1) == 1;
12474 if (bitQ == 0 && isD) return False; // implied 1d case
12475 Bool isMIN = (size & 2) == 2;
12476 Bool isNM = opcode == BITS5(1,1,0,0,0);
12477 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? X11 : X10);
12478 IRTemp res = newTempV128();
12479 assign(res, binop(opMXX, getQReg128(nn), getQReg128(mm)));
12480 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12481 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12482 DIP("%s%s %s.%s, %s.%s, %s.%s\n",
12483 isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
12484 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12485 return True;
12488 if (bitU == 0 && opcode == BITS5(1,1,0,0,1)) {
12489 /* -------- 0,0x,11001 FMLA 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12490 /* -------- 0,1x,11001 FMLS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12491 Bool isD = (size & 1) == 1;
12492 Bool isSUB = (size & 2) == 2;
12493 if (bitQ == 0 && isD) return False; // implied 1d case
12494 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
12495 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
12496 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
12497 IRTemp rm = mk_get_IR_rounding_mode();
12498 IRTemp t1 = newTempV128();
12499 IRTemp t2 = newTempV128();
12500 // FIXME: double rounding; use FMA primops instead
12501 assign(t1, triop(opMUL,
12502 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
12503 assign(t2, triop(isSUB ? opSUB : opADD,
12504 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
12505 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
12506 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12507 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fmls" : "fmla",
12508 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12509 return True;
12512 if (bitU == 0 && opcode == BITS5(1,1,0,1,0)) {
12513 /* -------- 0,0x,11010 FADD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12514 /* -------- 0,1x,11010 FSUB 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12515 Bool isD = (size & 1) == 1;
12516 Bool isSUB = (size & 2) == 2;
12517 if (bitQ == 0 && isD) return False; // implied 1d case
12518 const IROp ops[4]
12519 = { Iop_Add32Fx4, Iop_Add64Fx2, Iop_Sub32Fx4, Iop_Sub64Fx2 };
12520 IROp op = ops[size];
12521 IRTemp rm = mk_get_IR_rounding_mode();
12522 IRTemp t1 = newTempV128();
12523 IRTemp t2 = newTempV128();
12524 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
12525 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
12526 putQReg128(dd, mkexpr(t2));
12527 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12528 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fsub" : "fadd",
12529 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12530 return True;
12533 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
12534 /* -------- 1,1x,11010 FABD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12535 Bool isD = (size & 1) == 1;
12536 if (bitQ == 0 && isD) return False; // implied 1d case
12537 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
12538 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
12539 IRTemp rm = mk_get_IR_rounding_mode();
12540 IRTemp t1 = newTempV128();
12541 IRTemp t2 = newTempV128();
12542 // FIXME: use Abd primop instead?
12543 assign(t1, triop(opSUB, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
12544 assign(t2, unop(opABS, mkexpr(t1)));
12545 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
12546 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12547 DIP("fabd %s.%s, %s.%s, %s.%s\n",
12548 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12549 return True;
12552 if (size <= X01 && opcode == BITS5(1,1,0,1,1)) {
12553 /* -------- 0,0x,11011 FMULX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12554 /* -------- 1,0x,11011 FMUL 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12555 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
12556 Bool isD = (size & 1) == 1;
12557 Bool isMULX = bitU == 0;
12558 if (bitQ == 0 && isD) return False; // implied 1d case
12559 IRTemp rm = mk_get_IR_rounding_mode();
12560 IRTemp t1 = newTempV128();
12561 assign(t1, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
12562 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
12563 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
12564 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12565 DIP("%s %s.%s, %s.%s, %s.%s\n", isMULX ? "fmulx" : "fmul",
12566 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12567 return True;
12570 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
12571 /* -------- 0,0x,11100 FCMEQ 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12572 /* -------- 1,0x,11100 FCMGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12573 Bool isD = (size & 1) == 1;
12574 if (bitQ == 0 && isD) return False; // implied 1d case
12575 Bool isGE = bitU == 1;
12576 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
12577 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
12578 IRTemp t1 = newTempV128();
12579 assign(t1, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
12580 : binop(opCMP, getQReg128(nn), getQReg128(mm)));
12581 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
12582 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12583 DIP("%s %s.%s, %s.%s, %s.%s\n", isGE ? "fcmge" : "fcmeq",
12584 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12585 return True;
12588 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
12589 /* -------- 1,1x,11100 FCMGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12590 Bool isD = (size & 1) == 1;
12591 if (bitQ == 0 && isD) return False; // implied 1d case
12592 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
12593 IRTemp t1 = newTempV128();
12594 assign(t1, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
12595 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
12596 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12597 DIP("%s %s.%s, %s.%s, %s.%s\n", "fcmgt",
12598 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12599 return True;
12602 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
12603 /* -------- 1,0x,11101 FACGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12604 /* -------- 1,1x,11101 FACGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12605 Bool isD = (size & 1) == 1;
12606 Bool isGT = (size & 2) == 2;
12607 if (bitQ == 0 && isD) return False; // implied 1d case
12608 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
12609 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
12610 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
12611 IRTemp t1 = newTempV128();
12612 assign(t1, binop(opCMP, unop(opABS, getQReg128(mm)),
12613 unop(opABS, getQReg128(nn)))); // swapd
12614 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
12615 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12616 DIP("%s %s.%s, %s.%s, %s.%s\n", isGT ? "facgt" : "facge",
12617 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12618 return True;
12621 if (bitU == 1
12622 && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
12623 /* -------- 1,0x,11000 FMAXNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12624 /* -------- 1,1x,11000 FMINNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12625 /* -------- 1,0x,11110 FMAXP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12626 /* -------- 1,1x,11110 FMINP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12627 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
12628 Bool isD = (size & 1) == 1;
12629 if (bitQ == 0 && isD) return False; // implied 1d case
12630 Bool isMIN = (size & 2) == 2;
12631 Bool isNM = opcode == BITS5(1,1,0,0,0);
12632 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
12633 IRTemp srcN = newTempV128();
12634 IRTemp srcM = newTempV128();
12635 IRTemp preL = IRTemp_INVALID;
12636 IRTemp preR = IRTemp_INVALID;
12637 assign(srcN, getQReg128(nn));
12638 assign(srcM, getQReg128(mm));
12639 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR, srcM, srcN,
12640 isD ? ARM64VSizeD : ARM64VSizeS, bitQ);
12641 putQReg128(
12642 dd, math_MAYBE_ZERO_HI64_fromE(
12643 bitQ,
12644 binop(opMXX, mkexpr(preL), mkexpr(preR))));
12645 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12646 DIP("%s%sp %s.%s, %s.%s, %s.%s\n",
12647 isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
12648 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12649 return True;
12652 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,0,1,0)) {
12653 /* -------- 1,0x,11010 FADDP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12654 Bool isD = size == X01;
12655 if (bitQ == 0 && isD) return False; // implied 1d case
12656 IRTemp srcN = newTempV128();
12657 IRTemp srcM = newTempV128();
12658 IRTemp preL = IRTemp_INVALID;
12659 IRTemp preR = IRTemp_INVALID;
12660 assign(srcN, getQReg128(nn));
12661 assign(srcM, getQReg128(mm));
12662 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR, srcM, srcN,
12663 isD ? ARM64VSizeD : ARM64VSizeS, bitQ);
12664 putQReg128(
12665 dd, math_MAYBE_ZERO_HI64_fromE(
12666 bitQ,
12667 triop(mkVecADDF(isD ? 3 : 2),
12668 mkexpr(mk_get_IR_rounding_mode()),
12669 mkexpr(preL), mkexpr(preR))));
12670 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12671 DIP("%s %s.%s, %s.%s, %s.%s\n", "faddp",
12672 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12673 return True;
12676 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,1,1,1)) {
12677 /* -------- 1,0x,11111 FDIV 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12678 Bool isD = (size & 1) == 1;
12679 if (bitQ == 0 && isD) return False; // implied 1d case
12680 vassert(size <= 1);
12681 const IROp ops[2] = { Iop_Div32Fx4, Iop_Div64Fx2 };
12682 IROp op = ops[size];
12683 IRTemp rm = mk_get_IR_rounding_mode();
12684 IRTemp t1 = newTempV128();
12685 IRTemp t2 = newTempV128();
12686 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
12687 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
12688 putQReg128(dd, mkexpr(t2));
12689 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12690 DIP("%s %s.%s, %s.%s, %s.%s\n", "fdiv",
12691 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12692 return True;
12695 if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
12696 /* -------- 0,0x,11111: FRECPS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12697 /* -------- 0,1x,11111: FRSQRTS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12698 Bool isSQRT = (size & 2) == 2;
12699 Bool isD = (size & 1) == 1;
12700 if (bitQ == 0 && isD) return False; // implied 1d case
12701 IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
12702 : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
12703 IRTemp res = newTempV128();
12704 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
12705 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12706 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12707 DIP("%s %s.%s, %s.%s, %s.%s\n", isSQRT ? "frsqrts" : "frecps",
12708 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12709 return True;
12712 return False;
12713 # undef INSN
12717 static
12718 Bool dis_AdvSIMD_three_same_extra(/*MB_OUT*/DisResult* dres, UInt insn)
12720 /* 31 30 29 28 23 21 20 15 14 10 9 4
12721 0 Q U 01110 size 0 m 1 opcode 1 n d
12722 Decode fields: u,size,opcode
12724 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12725 if (INSN(31,31) != 0
12726 || INSN(28,24) != BITS5(0,1,1,1,0)
12727 || INSN(21,21) != 0
12728 || INSN(15,15) != 1
12729 || INSN(10,10) != 1) {
12730 return False;
12732 UInt bitQ = INSN(30,30);
12733 UInt bitU = INSN(29,29);
12734 UInt size = INSN(23,22);
12735 UInt mm = INSN(20,16);
12736 UInt opcode = INSN(14,11);
12737 UInt nn = INSN(9,5);
12738 UInt dd = INSN(4,0);
12739 vassert(size < 4);
12740 vassert(mm < 32 && nn < 32 && dd < 32);
12742 if (bitU == 1 && (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,0,0,1))) {
12743 /* -------- 0,xx,10110 SQRDMLAH s and h variants only -------- */
12744 /* -------- 1,xx,10110 SQRDMLSH s and h variants only -------- */
12745 if (size == X00 || size == X11) return False;
12746 Bool isAdd = opcode == BITS4(0,0,0,0);
12748 IRTemp res, res_nosat, vD, vN, vM;
12749 res = res_nosat = vD = vN = vM = IRTemp_INVALID;
12750 newTempsV128_3(&vD, &vN, &vM);
12751 assign(vD, getQReg128(dd));
12752 assign(vN, getQReg128(nn));
12753 assign(vM, getQReg128(mm));
12755 math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM);
12756 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
12757 updateQCFLAGwithDifferenceZHI(res, res_nosat, opZHI);
12758 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12760 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12761 const HChar* nm = isAdd ? "sqrdmlah" : "sqrdmlsh";
12762 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12763 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12764 return True;
12767 return False;
12768 # undef INSN
12771 static
12772 Bool dis_AdvSIMD_three_same_fp16(/*MB_OUT*/DisResult* dres, UInt insn)
12774 /* 31 30 29 28 23 21 20 15 10 9 4
12775 0 Q U 01110 size 0 m opcode 1 n d
12776 Decode fields: u,size,opcode
12778 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12779 if (INSN(31,31) != 0
12780 || INSN(28,24) != BITS5(0,1,1,1,0)
12781 || INSN(21,21) != 0
12782 || INSN(10,10) != 1) {
12783 return False;
12785 UInt bitQ = INSN(30,30);
12786 UInt bitU = INSN(29,29);
12787 UInt size = INSN(23,22);
12788 UInt mm = INSN(20,16);
12789 UInt opcode = INSN(15,11);
12790 UInt nn = INSN(9,5);
12791 UInt dd = INSN(4,0);
12792 vassert(size < 4);
12793 vassert(mm < 32 && nn < 32 && dd < 32);
12795 if (bitU == 1 && size == X01 && opcode == BITS5(0,0,0,1,0)) {
12796 /* -------- 1,01,00010 FADDP 4h_4h_4h, 8h_8h_8h -------- */
12797 IROp opADD = mkVecADDF(1); //bitQ == 0 ? 0 : 1);
12798 IRTemp srcN = newTempV128();
12799 IRTemp srcM = newTempV128();
12800 IRTemp preL = IRTemp_INVALID;
12801 IRTemp preR = IRTemp_INVALID;
12802 assign(srcN, getQReg128(nn));
12803 assign(srcM, getQReg128(mm));
12804 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR, srcM, srcN,
12805 ARM64VSizeH, bitQ);
12806 putQReg128(
12807 dd, math_MAYBE_ZERO_HI64_fromE(
12808 bitQ,
12809 triop(opADD, mkexpr(mk_get_IR_rounding_mode()),
12810 mkexpr(preL), mkexpr(preR))));
12811 const HChar* arr = bitQ == 0 ? "4h" : "8h";
12812 DIP("%s %s.%s, %s.%s, %s.%s\n", "faddp",
12813 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12814 return True;
12817 return False;
12818 # undef INSN
12822 static
12823 Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
12825 /* 31 30 29 28 23 21 16 11 9 4
12826 0 Q U 01110 size 10000 opcode 10 n d
12827 Decode fields: U,size,opcode
12829 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12830 if (INSN(31,31) != 0
12831 || INSN(28,24) != BITS5(0,1,1,1,0)
12832 || INSN(21,17) != BITS5(1,0,0,0,0)
12833 || INSN(11,10) != BITS2(1,0)) {
12834 return False;
12836 UInt bitQ = INSN(30,30);
12837 UInt bitU = INSN(29,29);
12838 UInt size = INSN(23,22);
12839 UInt opcode = INSN(16,12);
12840 UInt nn = INSN(9,5);
12841 UInt dd = INSN(4,0);
12842 vassert(size < 4);
12844 if (bitU == 0 && size <= X10 && opcode == BITS5(0,0,0,0,0)) {
12845 /* -------- 0,00,00000: REV64 16b_16b, 8b_8b -------- */
12846 /* -------- 0,01,00000: REV64 8h_8h, 4h_4h -------- */
12847 /* -------- 0,10,00000: REV64 4s_4s, 2s_2s -------- */
12848 const IROp iops[3] = { Iop_Reverse8sIn64_x2,
12849 Iop_Reverse16sIn64_x2, Iop_Reverse32sIn64_x2 };
12850 vassert(size <= 2);
12851 IRTemp res = newTempV128();
12852 assign(res, unop(iops[size], getQReg128(nn)));
12853 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12854 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12855 DIP("%s %s.%s, %s.%s\n", "rev64",
12856 nameQReg128(dd), arr, nameQReg128(nn), arr);
12857 return True;
12860 if (bitU == 1 && size <= X01 && opcode == BITS5(0,0,0,0,0)) {
12861 /* -------- 1,00,00000: REV32 16b_16b, 8b_8b -------- */
12862 /* -------- 1,01,00000: REV32 8h_8h, 4h_4h -------- */
12863 Bool isH = size == X01;
12864 IRTemp res = newTempV128();
12865 IROp iop = isH ? Iop_Reverse16sIn32_x4 : Iop_Reverse8sIn32_x4;
12866 assign(res, unop(iop, getQReg128(nn)));
12867 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12868 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12869 DIP("%s %s.%s, %s.%s\n", "rev32",
12870 nameQReg128(dd), arr, nameQReg128(nn), arr);
12871 return True;
12874 if (bitU == 0 && size == X00 && opcode == BITS5(0,0,0,0,1)) {
12875 /* -------- 0,00,00001: REV16 16b_16b, 8b_8b -------- */
12876 IRTemp res = newTempV128();
12877 assign(res, unop(Iop_Reverse8sIn16_x8, getQReg128(nn)));
12878 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12879 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12880 DIP("%s %s.%s, %s.%s\n", "rev16",
12881 nameQReg128(dd), arr, nameQReg128(nn), arr);
12882 return True;
12885 if (opcode == BITS5(0,0,0,1,0) || opcode == BITS5(0,0,1,1,0)) {
12886 /* -------- 0,xx,00010: SADDLP std6_std6 -------- */
12887 /* -------- 1,xx,00010: UADDLP std6_std6 -------- */
12888 /* -------- 0,xx,00110: SADALP std6_std6 -------- */
12889 /* -------- 1,xx,00110: UADALP std6_std6 -------- */
12890 /* Widens, and size refers to the narrow size. */
12891 if (size == X11) return False; // no 1d or 2d cases
12892 Bool isU = bitU == 1;
12893 Bool isACC = opcode == BITS5(0,0,1,1,0);
12894 IRTemp src = newTempV128();
12895 IRTemp sum = newTempV128();
12896 IRTemp res = newTempV128();
12897 assign(src, getQReg128(nn));
12898 assign(sum,
12899 binop(mkVecADD(size+1),
12900 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
12901 isU, True/*fromOdd*/, size, mkexpr(src))),
12902 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
12903 isU, False/*!fromOdd*/, size, mkexpr(src)))));
12904 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(sum), getQReg128(dd))
12905 : mkexpr(sum));
12906 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12907 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12908 const HChar* arrWide = nameArr_Q_SZ(bitQ, size+1);
12909 DIP("%s %s.%s, %s.%s\n", isACC ? (isU ? "uadalp" : "sadalp")
12910 : (isU ? "uaddlp" : "saddlp"),
12911 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
12912 return True;
12915 if (opcode == BITS5(0,0,0,1,1)) {
12916 /* -------- 0,xx,00011: SUQADD std7_std7 -------- */
12917 /* -------- 1,xx,00011: USQADD std7_std7 -------- */
12918 if (bitQ == 0 && size == X11) return False; // implied 1d case
12919 Bool isUSQADD = bitU == 1;
12920 /* This is switched (in the US vs SU sense) deliberately.
12921 SUQADD corresponds to the ExtUSsatSS variants and
12922 USQADD corresponds to the ExtSUsatUU variants.
12923 See libvex_ir for more details. */
12924 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
12925 : mkVecQADDEXTUSSATSS(size);
12926 IROp nop = mkVecADD(size);
12927 IRTemp argL = newTempV128();
12928 IRTemp argR = newTempV128();
12929 IRTemp qres = newTempV128();
12930 IRTemp nres = newTempV128();
12931 /* Because the two arguments to the addition are implicitly
12932 extended differently (one signedly, the other unsignedly) it is
12933 important to present them to the primop in the correct order. */
12934 assign(argL, getQReg128(nn));
12935 assign(argR, getQReg128(dd));
12936 assign(qres, math_MAYBE_ZERO_HI64_fromE(
12937 bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
12938 assign(nres, math_MAYBE_ZERO_HI64_fromE(
12939 bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
12940 putQReg128(dd, mkexpr(qres));
12941 updateQCFLAGwithDifference(qres, nres);
12942 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12943 DIP("%s %s.%s, %s.%s\n", isUSQADD ? "usqadd" : "suqadd",
12944 nameQReg128(dd), arr, nameQReg128(nn), arr);
12945 return True;
12948 if (opcode == BITS5(0,0,1,0,0)) {
12949 /* -------- 0,xx,00100: CLS std6_std6 -------- */
12950 /* -------- 1,xx,00100: CLZ std6_std6 -------- */
12951 if (size == X11) return False; // no 1d or 2d cases
12952 const IROp opsCLS[3] = { Iop_Cls8x16, Iop_Cls16x8, Iop_Cls32x4 };
12953 const IROp opsCLZ[3] = { Iop_Clz8x16, Iop_Clz16x8, Iop_Clz32x4 };
12954 Bool isCLZ = bitU == 1;
12955 IRTemp res = newTempV128();
12956 vassert(size <= 2);
12957 assign(res, unop(isCLZ ? opsCLZ[size] : opsCLS[size], getQReg128(nn)));
12958 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12959 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12960 DIP("%s %s.%s, %s.%s\n", isCLZ ? "clz" : "cls",
12961 nameQReg128(dd), arr, nameQReg128(nn), arr);
12962 return True;
12965 if (size == X00 && opcode == BITS5(0,0,1,0,1)) {
12966 /* -------- 0,00,00101: CNT 16b_16b, 8b_8b -------- */
12967 /* -------- 1,00,00101: NOT 16b_16b, 8b_8b -------- */
12968 IRTemp res = newTempV128();
12969 assign(res, unop(bitU == 0 ? Iop_Cnt8x16 : Iop_NotV128, getQReg128(nn)));
12970 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12971 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
12972 DIP("%s %s.%s, %s.%s\n", bitU == 0 ? "cnt" : "not",
12973 nameQReg128(dd), arr, nameQReg128(nn), arr);
12974 return True;
12977 if (bitU == 1 && size == X01 && opcode == BITS5(0,0,1,0,1)) {
12978 /* -------- 1,01,00101 RBIT 16b_16b, 8b_8b -------- */
12979 IRTemp res = newTempV128();
12980 assign(res, unop(Iop_Reverse1sIn8_x16, getQReg128(nn)));
12981 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12982 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
12983 DIP("%s %s.%s, %s.%s\n", "rbit",
12984 nameQReg128(dd), arr, nameQReg128(nn), arr);
12985 return True;
12988 if (opcode == BITS5(0,0,1,1,1)) {
12989 /* -------- 0,xx,00111 SQABS std7_std7 -------- */
12990 /* -------- 1,xx,00111 SQNEG std7_std7 -------- */
12991 if (bitQ == 0 && size == X11) return False; // implied 1d case
12992 Bool isNEG = bitU == 1;
12993 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
12994 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
12995 getQReg128(nn), size );
12996 IRTemp qres = newTempV128(), nres = newTempV128();
12997 assign(qres, math_MAYBE_ZERO_HI64(bitQ, qresFW));
12998 assign(nres, math_MAYBE_ZERO_HI64(bitQ, nresFW));
12999 putQReg128(dd, mkexpr(qres));
13000 updateQCFLAGwithDifference(qres, nres);
13001 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13002 DIP("%s %s.%s, %s.%s\n", isNEG ? "sqneg" : "sqabs",
13003 nameQReg128(dd), arr, nameQReg128(nn), arr);
13004 return True;
13007 if (opcode == BITS5(0,1,0,0,0)) {
13008 /* -------- 0,xx,01000: CMGT std7_std7_#0 -------- */ // >s 0
13009 /* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0
13010 if (bitQ == 0 && size == X11) return False; // implied 1d case
13011 Bool isGT = bitU == 0;
13012 IRExpr* argL = getQReg128(nn);
13013 IRExpr* argR = mkV128(0x0000);
13014 IRTemp res = newTempV128();
13015 IROp opGTS = mkVecCMPGTS(size);
13016 assign(res, isGT ? binop(opGTS, argL, argR)
13017 : unop(Iop_NotV128, binop(opGTS, argR, argL)));
13018 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13019 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13020 DIP("cm%s %s.%s, %s.%s, #0\n", isGT ? "gt" : "ge",
13021 nameQReg128(dd), arr, nameQReg128(nn), arr);
13022 return True;
13025 if (opcode == BITS5(0,1,0,0,1)) {
13026 /* -------- 0,xx,01001: CMEQ std7_std7_#0 -------- */ // == 0
13027 /* -------- 1,xx,01001: CMLE std7_std7_#0 -------- */ // <=s 0
13028 if (bitQ == 0 && size == X11) return False; // implied 1d case
13029 Bool isEQ = bitU == 0;
13030 IRExpr* argL = getQReg128(nn);
13031 IRExpr* argR = mkV128(0x0000);
13032 IRTemp res = newTempV128();
13033 assign(res, isEQ ? binop(mkVecCMPEQ(size), argL, argR)
13034 : unop(Iop_NotV128,
13035 binop(mkVecCMPGTS(size), argL, argR)));
13036 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13037 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13038 DIP("cm%s %s.%s, %s.%s, #0\n", isEQ ? "eq" : "le",
13039 nameQReg128(dd), arr, nameQReg128(nn), arr);
13040 return True;
13043 if (bitU == 0 && opcode == BITS5(0,1,0,1,0)) {
13044 /* -------- 0,xx,01010: CMLT std7_std7_#0 -------- */ // <s 0
13045 if (bitQ == 0 && size == X11) return False; // implied 1d case
13046 IRExpr* argL = getQReg128(nn);
13047 IRExpr* argR = mkV128(0x0000);
13048 IRTemp res = newTempV128();
13049 assign(res, binop(mkVecCMPGTS(size), argR, argL));
13050 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13051 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13052 DIP("cm%s %s.%s, %s.%s, #0\n", "lt",
13053 nameQReg128(dd), arr, nameQReg128(nn), arr);
13054 return True;
13057 if (bitU == 0 && opcode == BITS5(0,1,0,1,1)) {
13058 /* -------- 0,xx,01011: ABS std7_std7 -------- */
13059 if (bitQ == 0 && size == X11) return False; // implied 1d case
13060 IRTemp res = newTempV128();
13061 assign(res, unop(mkVecABS(size), getQReg128(nn)));
13062 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13063 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13064 DIP("abs %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
13065 return True;
13068 if (bitU == 1 && opcode == BITS5(0,1,0,1,1)) {
13069 /* -------- 1,xx,01011: NEG std7_std7 -------- */
13070 if (bitQ == 0 && size == X11) return False; // implied 1d case
13071 IRTemp res = newTempV128();
13072 assign(res, binop(mkVecSUB(size), mkV128(0x0000), getQReg128(nn)));
13073 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13074 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13075 DIP("neg %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
13076 return True;
13079 UInt ix = 0; /*INVALID*/
13080 if (size >= X10) {
13081 switch (opcode) {
13082 case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
13083 case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
13084 case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
13085 default: break;
13088 if (ix > 0) {
13089 /* -------- 0,1x,01100 FCMGT 2d_2d,4s_4s,2s_2s _#0.0 (ix 1) -------- */
13090 /* -------- 0,1x,01101 FCMEQ 2d_2d,4s_4s,2s_2s _#0.0 (ix 2) -------- */
13091 /* -------- 0,1x,01110 FCMLT 2d_2d,4s_4s,2s_2s _#0.0 (ix 3) -------- */
13092 /* -------- 1,1x,01100 FCMGE 2d_2d,4s_4s,2s_2s _#0.0 (ix 4) -------- */
13093 /* -------- 1,1x,01101 FCMLE 2d_2d,4s_4s,2s_2s _#0.0 (ix 5) -------- */
13094 if (bitQ == 0 && size == X11) return False; // implied 1d case
13095 Bool isD = size == X11;
13096 IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
13097 IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
13098 IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
13099 IROp opCmp = Iop_INVALID;
13100 Bool swap = False;
13101 const HChar* nm = "??";
13102 switch (ix) {
13103 case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
13104 case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
13105 case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
13106 case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
13107 case 5: nm = "fcmle"; opCmp = opCmpLE; break;
13108 default: vassert(0);
13110 IRExpr* zero = mkV128(0x0000);
13111 IRTemp res = newTempV128();
13112 assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
13113 : binop(opCmp, getQReg128(nn), zero));
13114 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13115 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
13116 DIP("%s %s.%s, %s.%s, #0.0\n", nm,
13117 nameQReg128(dd), arr, nameQReg128(nn), arr);
13118 return True;
13121 if (size >= X10 && opcode == BITS5(0,1,1,1,1)) {
13122 /* -------- 0,1x,01111: FABS 2d_2d, 4s_4s, 2s_2s -------- */
13123 /* -------- 1,1x,01111: FNEG 2d_2d, 4s_4s, 2s_2s -------- */
13124 if (bitQ == 0 && size == X11) return False; // implied 1d case
13125 Bool isFNEG = bitU == 1;
13126 IROp op = isFNEG ? (size == X10 ? Iop_Neg32Fx4 : Iop_Neg64Fx2)
13127 : (size == X10 ? Iop_Abs32Fx4 : Iop_Abs64Fx2);
13128 IRTemp res = newTempV128();
13129 assign(res, unop(op, getQReg128(nn)));
13130 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13131 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
13132 DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
13133 nameQReg128(dd), arr, nameQReg128(nn), arr);
13134 return True;
13137 if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
13138 /* -------- 0,xx,10010: XTN{,2} -------- */
13139 if (size == X11) return False;
13140 vassert(size < 3);
13141 Bool is2 = bitQ == 1;
13142 IROp opN = mkVecNARROWUN(size);
13143 IRTemp resN = newTempV128();
13144 assign(resN, unop(Iop_64UtoV128, unop(opN, getQReg128(nn))));
13145 putLO64andZUorPutHI64(is2, dd, resN);
13146 const HChar* nm = "xtn";
13147 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
13148 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
13149 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
13150 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
13151 return True;
13154 if (opcode == BITS5(1,0,1,0,0)
13155 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
13156 /* -------- 0,xx,10100: SQXTN{,2} -------- */
13157 /* -------- 1,xx,10100: UQXTN{,2} -------- */
13158 /* -------- 1,xx,10010: SQXTUN{,2} -------- */
13159 if (size == X11) return False;
13160 vassert(size < 3);
13161 Bool is2 = bitQ == 1;
13162 IROp opN = Iop_INVALID;
13163 Bool zWiden = True;
13164 const HChar* nm = "??";
13165 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
13166 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
13168 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
13169 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
13171 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
13172 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
13174 else vassert(0);
13175 IRTemp src = newTempV128();
13176 assign(src, getQReg128(nn));
13177 IRTemp resN = newTempV128();
13178 assign(resN, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
13179 putLO64andZUorPutHI64(is2, dd, resN);
13180 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
13181 size, mkexpr(resN));
13182 updateQCFLAGwithDifference(src, resW);
13183 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
13184 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
13185 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
13186 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
13187 return True;
13190 if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
13191 /* -------- 1,xx,10011 SHLL{2} #lane-width -------- */
13192 /* Widens, and size is the narrow size. */
13193 if (size == X11) return False;
13194 Bool is2 = bitQ == 1;
13195 IROp opINT = is2 ? mkVecINTERLEAVEHI(size) : mkVecINTERLEAVELO(size);
13196 IROp opSHL = mkVecSHLN(size+1);
13197 IRTemp src = newTempV128();
13198 IRTemp res = newTempV128();
13199 assign(src, getQReg128(nn));
13200 assign(res, binop(opSHL, binop(opINT, mkexpr(src), mkexpr(src)),
13201 mkU8(8 << size)));
13202 putQReg128(dd, mkexpr(res));
13203 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
13204 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
13205 DIP("shll%s %s.%s, %s.%s, #%d\n", is2 ? "2" : "",
13206 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow, 8 << size);
13207 return True;
13210 if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,0)) {
13211 /* -------- 0,0x,10110: FCVTN 4h/8h_4s, 2s/4s_2d -------- */
13212 UInt nLanes = size == X00 ? 4 : 2;
13213 IRType srcTy = size == X00 ? Ity_F32 : Ity_F64;
13214 IROp opCvt = size == X00 ? Iop_F32toF16 : Iop_F64toF32;
13215 IRTemp rm = mk_get_IR_rounding_mode();
13216 IRTemp src[nLanes];
13217 for (UInt i = 0; i < nLanes; i++) {
13218 src[i] = newTemp(srcTy);
13219 assign(src[i], getQRegLane(nn, i, srcTy));
13221 for (UInt i = 0; i < nLanes; i++) {
13222 putQRegLane(dd, nLanes * bitQ + i,
13223 binop(opCvt, mkexpr(rm), mkexpr(src[i])));
13225 if (bitQ == 0) {
13226 putQRegLane(dd, 1, mkU64(0));
13228 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
13229 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
13230 DIP("fcvtn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
13231 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
13232 return True;
13235 if (bitU == 1 && size == X01 && opcode == BITS5(1,0,1,1,0)) {
13236 /* -------- 1,01,10110: FCVTXN 2s/4s_2d -------- */
13237 /* Using Irrm_NEAREST here isn't right. The docs say "round to
13238 odd" but I don't know what that really means. */
13239 IRType srcTy = Ity_F64;
13240 IROp opCvt = Iop_F64toF32;
13241 IRTemp src[2];
13242 for (UInt i = 0; i < 2; i++) {
13243 src[i] = newTemp(srcTy);
13244 assign(src[i], getQRegLane(nn, i, srcTy));
13246 for (UInt i = 0; i < 2; i++) {
13247 putQRegLane(dd, 2 * bitQ + i,
13248 binop(opCvt, mkU32(Irrm_NEAREST), mkexpr(src[i])));
13250 if (bitQ == 0) {
13251 putQRegLane(dd, 1, mkU64(0));
13253 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
13254 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
13255 DIP("fcvtxn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
13256 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
13257 return True;
13260 if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,1)) {
13261 /* -------- 0,0x,10111: FCVTL 4s_4h/8h, 2d_2s/4s -------- */
13262 UInt nLanes = size == X00 ? 4 : 2;
13263 IRType srcTy = size == X00 ? Ity_F16 : Ity_F32;
13264 IROp opCvt = size == X00 ? Iop_F16toF32 : Iop_F32toF64;
13265 IRTemp src[nLanes];
13266 for (UInt i = 0; i < nLanes; i++) {
13267 src[i] = newTemp(srcTy);
13268 assign(src[i], getQRegLane(nn, nLanes * bitQ + i, srcTy));
13270 for (UInt i = 0; i < nLanes; i++) {
13271 putQRegLane(dd, i, unop(opCvt, mkexpr(src[i])));
13273 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
13274 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
13275 DIP("fcvtl%s %s.%s, %s.%s\n", bitQ ? "2" : "",
13276 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
13277 return True;
13280 ix = 0;
13281 if (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,0,0,1)) {
13282 ix = 1 + ((((bitU & 1) << 2) | ((size & 2) << 0)) | ((opcode & 1) << 0));
13283 // = 1 + bitU[0]:size[1]:opcode[0]
13284 vassert(ix >= 1 && ix <= 8);
13285 if (ix == 7) ix = 0;
13287 if (ix > 0) {
13288 /* -------- 0,0x,11000 FRINTN 2d_2d, 4s_4s, 2s_2s (1) -------- */
13289 /* -------- 0,0x,11001 FRINTM 2d_2d, 4s_4s, 2s_2s (2) -------- */
13290 /* -------- 0,1x,11000 FRINTP 2d_2d, 4s_4s, 2s_2s (3) -------- */
13291 /* -------- 0,1x,11001 FRINTZ 2d_2d, 4s_4s, 2s_2s (4) -------- */
13292 /* -------- 1,0x,11000 FRINTA 2d_2d, 4s_4s, 2s_2s (5) -------- */
13293 /* -------- 1,0x,11001 FRINTX 2d_2d, 4s_4s, 2s_2s (6) -------- */
13294 /* -------- 1,1x,11000 (apparently unassigned) (7) -------- */
13295 /* -------- 1,1x,11001 FRINTI 2d_2d, 4s_4s, 2s_2s (8) -------- */
13296 /* rm plan:
13297 FRINTN: tieeven -- !! FIXME KLUDGED !!
13298 FRINTM: -inf
13299 FRINTP: +inf
13300 FRINTZ: zero
13301 FRINTA: tieaway -- !! FIXME KLUDGED !!
13302 FRINTX: per FPCR + "exact = TRUE"
13303 FRINTI: per FPCR
13305 Bool isD = (size & 1) == 1;
13306 if (bitQ == 0 && isD) return False; // implied 1d case
13308 IRTemp irrmRM = mk_get_IR_rounding_mode();
13310 UChar ch = '?';
13311 IRTemp irrm = newTemp(Ity_I32);
13312 switch (ix) {
13313 case 1: ch = 'n'; assign(irrm, mkU32(Irrm_NEAREST)); break;
13314 case 2: ch = 'm'; assign(irrm, mkU32(Irrm_NegINF)); break;
13315 case 3: ch = 'p'; assign(irrm, mkU32(Irrm_PosINF)); break;
13316 case 4: ch = 'z'; assign(irrm, mkU32(Irrm_ZERO)); break;
13317 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
13318 case 5: ch = 'a'; assign(irrm, mkU32(Irrm_NEAREST)); break;
13319 // I am unsure about the following, due to the "integral exact"
13320 // description in the manual. What does it mean? (frintx, that is)
13321 case 6: ch = 'x'; assign(irrm, mkexpr(irrmRM)); break;
13322 case 8: ch = 'i'; assign(irrm, mkexpr(irrmRM)); break;
13323 default: vassert(0);
13326 IROp opRND = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt;
13327 if (isD) {
13328 for (UInt i = 0; i < 2; i++) {
13329 putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
13330 getQRegLane(nn, i, Ity_F64)));
13332 } else {
13333 UInt n = bitQ==1 ? 4 : 2;
13334 for (UInt i = 0; i < n; i++) {
13335 putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
13336 getQRegLane(nn, i, Ity_F32)));
13338 if (bitQ == 0)
13339 putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
13341 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13342 DIP("frint%c %s.%s, %s.%s\n", ch,
13343 nameQReg128(dd), arr, nameQReg128(nn), arr);
13344 return True;
13347 ix = 0; /*INVALID*/
13348 switch (opcode) {
13349 case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
13350 case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
13351 case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
13352 default: break;
13354 if (ix > 0) {
13355 /* -------- 0,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
13356 /* -------- 0,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
13357 /* -------- 0,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
13358 /* -------- 0,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
13359 /* -------- 0,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
13360 /* -------- 1,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
13361 /* -------- 1,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
13362 /* -------- 1,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
13363 /* -------- 1,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
13364 /* -------- 1,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
13365 Bool isD = (size & 1) == 1;
13366 if (bitQ == 0 && isD) return False; // implied 1d case
13368 IRRoundingMode irrm = 8; /*impossible*/
13369 HChar ch = '?';
13370 switch (ix) {
13371 case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
13372 case 2: ch = 'm'; irrm = Irrm_NegINF; break;
13373 case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
13374 case 4: ch = 'p'; irrm = Irrm_PosINF; break;
13375 case 5: ch = 'z'; irrm = Irrm_ZERO; break;
13376 default: vassert(0);
13378 IROp cvt = Iop_INVALID;
13379 if (bitU == 1) {
13380 cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
13381 } else {
13382 cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
13384 if (isD) {
13385 for (UInt i = 0; i < 2; i++) {
13386 putQRegLane(dd, i, binop(cvt, mkU32(irrm),
13387 getQRegLane(nn, i, Ity_F64)));
13389 } else {
13390 UInt n = bitQ==1 ? 4 : 2;
13391 for (UInt i = 0; i < n; i++) {
13392 putQRegLane(dd, i, binop(cvt, mkU32(irrm),
13393 getQRegLane(nn, i, Ity_F32)));
13395 if (bitQ == 0)
13396 putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
13398 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13399 DIP("fcvt%c%c %s.%s, %s.%s\n", ch, bitU == 1 ? 'u' : 's',
13400 nameQReg128(dd), arr, nameQReg128(nn), arr);
13401 return True;
13404 if (size == X10 && opcode == BITS5(1,1,1,0,0)) {
13405 /* -------- 0,10,11100: URECPE 4s_4s, 2s_2s -------- */
13406 /* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */
13407 Bool isREC = bitU == 0;
13408 IROp op = isREC ? Iop_RecipEst32Ux4 : Iop_RSqrtEst32Ux4;
13409 IRTemp res = newTempV128();
13410 assign(res, unop(op, getQReg128(nn)));
13411 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13412 const HChar* nm = isREC ? "urecpe" : "ursqrte";
13413 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13414 DIP("%s %s.%s, %s.%s\n", nm,
13415 nameQReg128(dd), arr, nameQReg128(nn), arr);
13416 return True;
13419 if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
13420 /* -------- 0,0x,11101: SCVTF -------- */
13421 /* -------- 1,0x,11101: UCVTF -------- */
13422 /* 31 28 22 21 15 9 4
13423 0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn
13424 0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn
13425 with laneage:
13426 case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
13428 Bool isQ = bitQ == 1;
13429 Bool isU = bitU == 1;
13430 Bool isF64 = (size & 1) == 1;
13431 if (isQ || !isF64) {
13432 IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
13433 UInt nLanes = 0;
13434 Bool zeroHI = False;
13435 const HChar* arrSpec = NULL;
13436 Bool ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
13437 isQ, isF64 );
13438 IROp iop = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
13439 : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
13440 IRTemp rm = mk_get_IR_rounding_mode();
13441 UInt i;
13442 vassert(ok); /* the 'if' above should ensure this */
13443 for (i = 0; i < nLanes; i++) {
13444 putQRegLane(dd, i,
13445 binop(iop, mkexpr(rm), getQRegLane(nn, i, tyI)));
13447 if (zeroHI) {
13448 putQRegLane(dd, 1, mkU64(0));
13450 DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
13451 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
13452 return True;
13454 /* else fall through */
13457 if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
13458 /* -------- 0,1x,11101: FRECPE 2d_2d, 4s_4s, 2s_2s -------- */
13459 /* -------- 1,1x,11101: FRSQRTE 2d_2d, 4s_4s, 2s_2s -------- */
13460 Bool isSQRT = bitU == 1;
13461 Bool isD = (size & 1) == 1;
13462 IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
13463 : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
13464 if (bitQ == 0 && isD) return False; // implied 1d case
13465 IRTemp resV = newTempV128();
13466 assign(resV, unop(op, getQReg128(nn)));
13467 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
13468 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
13469 DIP("%s %s.%s, %s.%s\n", isSQRT ? "frsqrte" : "frecpe",
13470 nameQReg128(dd), arr, nameQReg128(nn), arr);
13471 return True;
13474 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
13475 /* -------- 1,1x,11111: FSQRT 2d_2d, 4s_4s, 2s_2s -------- */
13476 Bool isD = (size & 1) == 1;
13477 IROp op = isD ? Iop_Sqrt64Fx2 : Iop_Sqrt32Fx4;
13478 if (bitQ == 0 && isD) return False; // implied 1d case
13479 IRTemp resV = newTempV128();
13480 assign(resV, binop(op, mkexpr(mk_get_IR_rounding_mode()),
13481 getQReg128(nn)));
13482 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
13483 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
13484 DIP("%s %s.%s, %s.%s\n", "fsqrt",
13485 nameQReg128(dd), arr, nameQReg128(nn), arr);
13486 return True;
13489 return False;
13490 # undef INSN
13494 static
13495 Bool dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult* dres, UInt insn)
13497 /* 31 28 23 21 20 19 15 11 9 4
13498 0 Q U 01111 size L M m opcode H 0 n d
13499 Decode fields are: u,size,opcode
13500 M is really part of the mm register number. Individual
13501 cases need to inspect L and H though.
13503 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13504 if (INSN(31,31) != 0
13505 || INSN(28,24) != BITS5(0,1,1,1,1) || INSN(10,10) !=0) {
13506 return False;
13508 UInt bitQ = INSN(30,30);
13509 UInt bitU = INSN(29,29);
13510 UInt size = INSN(23,22);
13511 UInt bitL = INSN(21,21);
13512 UInt bitM = INSN(20,20);
13513 UInt mmLO4 = INSN(19,16);
13514 UInt opcode = INSN(15,12);
13515 UInt bitH = INSN(11,11);
13516 UInt nn = INSN(9,5);
13517 UInt dd = INSN(4,0);
13518 vassert(size < 4);
13519 vassert(bitH < 2 && bitM < 2 && bitL < 2);
13521 if (bitU == 0 && size >= X10
13522 && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
13523 /* -------- 0,1x,0001 FMLA 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
13524 /* -------- 0,1x,0101 FMLS 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
13525 if (bitQ == 0 && size == X11) return False; // implied 1d case
13526 Bool isD = (size & 1) == 1;
13527 Bool isSUB = opcode == BITS4(0,1,0,1);
13528 UInt index;
13529 if (!isD) index = (bitH << 1) | bitL;
13530 else if (isD && bitL == 0) index = bitH;
13531 else return False; // sz:L == x11 => unallocated encoding
13532 vassert(index < (isD ? 2 : 4));
13533 IRType ity = isD ? Ity_F64 : Ity_F32;
13534 IRTemp elem = newTemp(ity);
13535 UInt mm = (bitM << 4) | mmLO4;
13536 assign(elem, getQRegLane(mm, index, ity));
13537 IRTemp dupd = math_DUP_TO_V128(elem, ity);
13538 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
13539 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
13540 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
13541 IRTemp rm = mk_get_IR_rounding_mode();
13542 IRTemp t1 = newTempV128();
13543 IRTemp t2 = newTempV128();
13544 // FIXME: double rounding; use FMA primops instead
13545 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
13546 assign(t2, triop(isSUB ? opSUB : opADD,
13547 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
13548 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
13549 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
13550 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
13551 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm),
13552 isD ? 'd' : 's', index);
13553 return True;
13556 if (size >= X10 && opcode == BITS4(1,0,0,1)) {
13557 /* -------- 0,1x,1001 FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
13558 /* -------- 1,1x,1001 FMULX 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
13559 if (bitQ == 0 && size == X11) return False; // implied 1d case
13560 Bool isD = (size & 1) == 1;
13561 Bool isMULX = bitU == 1;
13562 UInt index;
13563 if (!isD) index = (bitH << 1) | bitL;
13564 else if (isD && bitL == 0) index = bitH;
13565 else return False; // sz:L == x11 => unallocated encoding
13566 vassert(index < (isD ? 2 : 4));
13567 IRType ity = isD ? Ity_F64 : Ity_F32;
13568 IRTemp elem = newTemp(ity);
13569 UInt mm = (bitM << 4) | mmLO4;
13570 assign(elem, getQRegLane(mm, index, ity));
13571 IRTemp dupd = math_DUP_TO_V128(elem, ity);
13572 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
13573 IRTemp res = newTempV128();
13574 assign(res, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
13575 mkexpr(mk_get_IR_rounding_mode()),
13576 getQReg128(nn), mkexpr(dupd)));
13577 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13578 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
13579 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n",
13580 isMULX ? "fmulx" : "fmul", nameQReg128(dd), arr,
13581 nameQReg128(nn), arr, nameQReg128(mm), isD ? 'd' : 's', index);
13582 return True;
13585 if ((bitU == 1 && (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,1,0,0)))
13586 || (bitU == 0 && opcode == BITS4(1,0,0,0))) {
13587 /* -------- 1,xx,0000 MLA s/h variants only -------- */
13588 /* -------- 1,xx,0100 MLS s/h variants only -------- */
13589 /* -------- 0,xx,1000 MUL s/h variants only -------- */
13590 Bool isMLA = opcode == BITS4(0,0,0,0);
13591 Bool isMLS = opcode == BITS4(0,1,0,0);
13592 UInt mm = 32; // invalid
13593 UInt ix = 16; // invalid
13594 switch (size) {
13595 case X00:
13596 return False; // b case is not allowed
13597 case X01:
13598 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
13599 case X10:
13600 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
13601 case X11:
13602 return False; // d case is not allowed
13603 default:
13604 vassert(0);
13606 vassert(mm < 32 && ix < 16);
13607 IROp opMUL = mkVecMUL(size);
13608 IROp opADD = mkVecADD(size);
13609 IROp opSUB = mkVecSUB(size);
13610 HChar ch = size == X01 ? 'h' : 's';
13611 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
13612 IRTemp vecD = newTempV128();
13613 IRTemp vecN = newTempV128();
13614 IRTemp res = newTempV128();
13615 assign(vecD, getQReg128(dd));
13616 assign(vecN, getQReg128(nn));
13617 IRExpr* prod = binop(opMUL, mkexpr(vecN), mkexpr(vecM));
13618 if (isMLA || isMLS) {
13619 assign(res, binop(isMLA ? opADD : opSUB, mkexpr(vecD), prod));
13620 } else {
13621 assign(res, prod);
13623 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13624 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13625 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isMLA ? "mla"
13626 : (isMLS ? "mls" : "mul"),
13627 nameQReg128(dd), arr,
13628 nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
13629 return True;
13632 if (opcode == BITS4(1,0,1,0)
13633 || opcode == BITS4(0,0,1,0) || opcode == BITS4(0,1,1,0)) {
13634 /* -------- 0,xx,1010 SMULL s/h variants only -------- */ // 0 (ks)
13635 /* -------- 1,xx,1010 UMULL s/h variants only -------- */ // 0
13636 /* -------- 0,xx,0010 SMLAL s/h variants only -------- */ // 1
13637 /* -------- 1,xx,0010 UMLAL s/h variants only -------- */ // 1
13638 /* -------- 0,xx,0110 SMLSL s/h variants only -------- */ // 2
13639 /* -------- 1,xx,0110 SMLSL s/h variants only -------- */ // 2
13640 /* Widens, and size refers to the narrowed lanes. */
13641 UInt ks = 3;
13642 switch (opcode) {
13643 case BITS4(1,0,1,0): ks = 0; break;
13644 case BITS4(0,0,1,0): ks = 1; break;
13645 case BITS4(0,1,1,0): ks = 2; break;
13646 default: vassert(0);
13648 vassert(ks >= 0 && ks <= 2);
13649 Bool isU = bitU == 1;
13650 Bool is2 = bitQ == 1;
13651 UInt mm = 32; // invalid
13652 UInt ix = 16; // invalid
13653 switch (size) {
13654 case X00:
13655 return False; // h_b_b[] case is not allowed
13656 case X01:
13657 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
13658 case X10:
13659 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
13660 case X11:
13661 return False; // q_d_d[] case is not allowed
13662 default:
13663 vassert(0);
13665 vassert(mm < 32 && ix < 16);
13666 IRTemp vecN = newTempV128();
13667 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
13668 IRTemp vecD = newTempV128();
13669 assign(vecN, getQReg128(nn));
13670 assign(vecD, getQReg128(dd));
13671 IRTemp res = IRTemp_INVALID;
13672 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
13673 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
13674 putQReg128(dd, mkexpr(res));
13675 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
13676 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
13677 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
13678 HChar ch = size == X01 ? 'h' : 's';
13679 DIP("%c%s%s %s.%s, %s.%s, %s.%c[%u]\n",
13680 isU ? 'u' : 's', nm, is2 ? "2" : "",
13681 nameQReg128(dd), arrWide,
13682 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
13683 return True;
13686 if (bitU == 0
13687 && (opcode == BITS4(1,0,1,1)
13688 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
13689 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
13690 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
13691 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
13692 /* Widens, and size refers to the narrowed lanes. */
13693 UInt ks = 3;
13694 switch (opcode) {
13695 case BITS4(1,0,1,1): ks = 0; break;
13696 case BITS4(0,0,1,1): ks = 1; break;
13697 case BITS4(0,1,1,1): ks = 2; break;
13698 default: vassert(0);
13700 vassert(ks >= 0 && ks <= 2);
13701 Bool is2 = bitQ == 1;
13702 UInt mm = 32; // invalid
13703 UInt ix = 16; // invalid
13704 switch (size) {
13705 case X00:
13706 return False; // h_b_b[] case is not allowed
13707 case X01:
13708 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
13709 case X10:
13710 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
13711 case X11:
13712 return False; // q_d_d[] case is not allowed
13713 default:
13714 vassert(0);
13716 vassert(mm < 32 && ix < 16);
13717 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
13718 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
13719 newTempsV128_2(&vecN, &vecD);
13720 assign(vecN, getQReg128(nn));
13721 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
13722 assign(vecD, getQReg128(dd));
13723 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
13724 is2, size, "mas"[ks],
13725 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
13726 putQReg128(dd, mkexpr(res));
13727 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
13728 updateQCFLAGwithDifference(sat1q, sat1n);
13729 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
13730 updateQCFLAGwithDifference(sat2q, sat2n);
13732 const HChar* nm = ks == 0 ? "sqdmull"
13733 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
13734 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
13735 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
13736 HChar ch = size == X01 ? 'h' : 's';
13737 DIP("%s%s %s.%s, %s.%s, %s.%c[%u]\n",
13738 nm, is2 ? "2" : "",
13739 nameQReg128(dd), arrWide,
13740 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
13741 return True;
13744 if (bitU == 0 && (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1))) {
13745 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
13746 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
13747 UInt mm = 32; // invalid
13748 UInt ix = 16; // invalid
13749 switch (size) {
13750 case X00:
13751 return False; // b case is not allowed
13752 case X01:
13753 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
13754 case X10:
13755 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
13756 case X11:
13757 return False; // q case is not allowed
13758 default:
13759 vassert(0);
13761 vassert(mm < 32 && ix < 16);
13762 Bool isR = opcode == BITS4(1,1,0,1);
13763 IRTemp res, sat1q, sat1n, vN, vM;
13764 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
13765 vN = newTempV128();
13766 assign(vN, getQReg128(nn));
13767 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
13768 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
13769 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13770 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
13771 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
13772 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
13773 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13774 HChar ch = size == X01 ? 'h' : 's';
13775 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm,
13776 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
13777 return True;
13780 if (bitU == 1 && (opcode == BITS4(1,1,0,1) || opcode == BITS4(1,1,1,1))) {
13781 /* -------- 0,xx,1101 SQRDMLAH s and h variants only -------- */
13782 /* -------- 0,xx,1111 SQRDMLSH s and h variants only -------- */
13783 UInt mm = 32; // invalid
13784 UInt ix = 16; // invalid
13785 switch (size) {
13786 case X00:
13787 return False; // b case is not allowed
13788 case X01: // h
13789 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
13790 case X10: // s
13791 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
13792 case X11:
13793 return False; // d case is not allowed
13794 default:
13795 vassert(0);
13797 vassert(mm < 32 && ix < 16);
13799 IRTemp res, res_nosat, vD, vN, vM;
13800 res = res_nosat = vD = vN = vM = IRTemp_INVALID;
13801 newTempsV128_2(&vD, &vN);
13802 assign(vD, getQReg128(dd));
13803 assign(vN, getQReg128(nn));
13805 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
13806 Bool isAdd = opcode == BITS4(1,1,0,1);
13807 math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM);
13808 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
13809 updateQCFLAGwithDifferenceZHI(res, res_nosat, opZHI);
13810 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13812 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13813 const HChar* nm = isAdd ? "sqrdmlah" : "sqrdmlsh";
13814 HChar ch = size == X01 ? 'h' : 's';
13815 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm,
13816 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), ch, ix);
13817 return True;
13820 return False;
13821 # undef INSN
13825 static
13826 Bool dis_AdvSIMD_crypto_aes(/*MB_OUT*/DisResult* dres, UInt insn)
13828 /* 31 23 21 16 11 9 4
13829 0100 1110 size 10100 opcode 10 n d
13830 Decode fields are: size,opcode
13831 Size is always 00 in ARMv8, it appears.
13833 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13834 if (INSN(31,24) != BITS8(0,1,0,0,1,1,1,0)
13835 || INSN(21,17) != BITS5(1,0,1,0,0) || INSN(11,10) != BITS2(1,0)) {
13836 return False;
13838 UInt size = INSN(23,22);
13839 UInt opcode = INSN(16,12);
13840 UInt nn = INSN(9,5);
13841 UInt dd = INSN(4,0);
13843 if (size == BITS2(0,0)
13844 && (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,0,1))) {
13845 /* -------- 00,00100: AESE Vd.16b, Vn.16b -------- */
13846 /* -------- 00,00101: AESD Vd.16b, Vn.16b -------- */
13847 Bool isD = opcode == BITS5(0,0,1,0,1);
13848 IRTemp op1 = newTemp(Ity_V128);
13849 IRTemp op2 = newTemp(Ity_V128);
13850 IRTemp xord = newTemp(Ity_V128);
13851 IRTemp res = newTemp(Ity_V128);
13852 void* helper = isD ? &arm64g_dirtyhelper_AESD
13853 : &arm64g_dirtyhelper_AESE;
13854 const HChar* hname = isD ? "arm64g_dirtyhelper_AESD"
13855 : "arm64g_dirtyhelper_AESE";
13856 assign(op1, getQReg128(dd));
13857 assign(op2, getQReg128(nn));
13858 assign(xord, binop(Iop_XorV128, mkexpr(op1), mkexpr(op2)));
13859 IRDirty* di
13860 = unsafeIRDirty_1_N( res, 0/*regparms*/, hname, helper,
13861 mkIRExprVec_3(
13862 IRExpr_VECRET(),
13863 unop(Iop_V128HIto64, mkexpr(xord)),
13864 unop(Iop_V128to64, mkexpr(xord)) ) );
13865 stmt(IRStmt_Dirty(di));
13866 putQReg128(dd, mkexpr(res));
13867 DIP("aes%c %s.16b, %s.16b\n", isD ? 'd' : 'e',
13868 nameQReg128(dd), nameQReg128(nn));
13869 return True;
13872 if (size == BITS2(0,0)
13873 && (opcode == BITS5(0,0,1,1,0) || opcode == BITS5(0,0,1,1,1))) {
13874 /* -------- 00,00110: AESMC Vd.16b, Vn.16b -------- */
13875 /* -------- 00,00111: AESIMC Vd.16b, Vn.16b -------- */
13876 Bool isI = opcode == BITS5(0,0,1,1,1);
13877 IRTemp src = newTemp(Ity_V128);
13878 IRTemp res = newTemp(Ity_V128);
13879 void* helper = isI ? &arm64g_dirtyhelper_AESIMC
13880 : &arm64g_dirtyhelper_AESMC;
13881 const HChar* hname = isI ? "arm64g_dirtyhelper_AESIMC"
13882 : "arm64g_dirtyhelper_AESMC";
13883 assign(src, getQReg128(nn));
13884 IRDirty* di
13885 = unsafeIRDirty_1_N( res, 0/*regparms*/, hname, helper,
13886 mkIRExprVec_3(
13887 IRExpr_VECRET(),
13888 unop(Iop_V128HIto64, mkexpr(src)),
13889 unop(Iop_V128to64, mkexpr(src)) ) );
13890 stmt(IRStmt_Dirty(di));
13891 putQReg128(dd, mkexpr(res));
13892 DIP("aes%s %s.16b, %s.16b\n", isI ? "imc" : "mc",
13893 nameQReg128(dd), nameQReg128(nn));
13894 return True;
13897 return False;
13898 # undef INSN
13902 static
13903 Bool dis_AdvSIMD_crypto_three_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
13905 /* 31 28 23 21 20 15 14 11 9 4
13906 0101 1110 sz 0 m 0 opc 00 n d
13907 Decode fields are: sz,opc
13909 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13910 if (INSN(31,24) != BITS8(0,1,0,1,1,1,1,0) || INSN(21,21) != 0
13911 || INSN(15,15) != 0 || INSN(11,10) != BITS2(0,0)) {
13912 return False;
13914 UInt sz = INSN(23,22);
13915 UInt mm = INSN(20,16);
13916 UInt opc = INSN(14,12);
13917 UInt nn = INSN(9,5);
13918 UInt dd = INSN(4,0);
13919 if (sz == BITS2(0,0) && opc <= BITS3(1,1,0)) {
13920 /* -------- 00,000 SHA1C Qd, Sn, Vm.4S -------- */
13921 /* -------- 00,001 SHA1P Qd, Sn, Vm.4S -------- */
13922 /* -------- 00,010 SHA1M Qd, Sn, Vm.4S -------- */
13923 /* -------- 00,011 SHA1SU0 Vd.4S, Vn.4S, Vm.4S -------- */
13924 /* -------- 00,100 SHA256H Qd, Qn, Vm.4S -------- */
13925 /* -------- 00,101 SHA256H2 Qd, Qn, Vm.4S -------- */
13926 /* -------- 00,110 SHA256SU1 Vd.4S, Vn.4S, Vm.4S -------- */
13927 vassert(opc < 7);
13928 const HChar* inames[7]
13929 = { "sha1c", "sha1p", "sha1m", "sha1su0",
13930 "sha256h", "sha256h2", "sha256su1" };
13931 void(*helpers[7])(V128*,ULong,ULong,ULong,ULong,ULong,ULong)
13932 = { &arm64g_dirtyhelper_SHA1C, &arm64g_dirtyhelper_SHA1P,
13933 &arm64g_dirtyhelper_SHA1M, &arm64g_dirtyhelper_SHA1SU0,
13934 &arm64g_dirtyhelper_SHA256H, &arm64g_dirtyhelper_SHA256H2,
13935 &arm64g_dirtyhelper_SHA256SU1 };
13936 const HChar* hnames[7]
13937 = { "arm64g_dirtyhelper_SHA1C", "arm64g_dirtyhelper_SHA1P",
13938 "arm64g_dirtyhelper_SHA1M", "arm64g_dirtyhelper_SHA1SU0",
13939 "arm64g_dirtyhelper_SHA256H", "arm64g_dirtyhelper_SHA256H2",
13940 "arm64g_dirtyhelper_SHA256SU1" };
13941 IRTemp vD = newTemp(Ity_V128);
13942 IRTemp vN = newTemp(Ity_V128);
13943 IRTemp vM = newTemp(Ity_V128);
13944 IRTemp vDhi = newTemp(Ity_I64);
13945 IRTemp vDlo = newTemp(Ity_I64);
13946 IRTemp vNhiPre = newTemp(Ity_I64);
13947 IRTemp vNloPre = newTemp(Ity_I64);
13948 IRTemp vNhi = newTemp(Ity_I64);
13949 IRTemp vNlo = newTemp(Ity_I64);
13950 IRTemp vMhi = newTemp(Ity_I64);
13951 IRTemp vMlo = newTemp(Ity_I64);
13952 assign(vD, getQReg128(dd));
13953 assign(vN, getQReg128(nn));
13954 assign(vM, getQReg128(mm));
13955 assign(vDhi, unop(Iop_V128HIto64, mkexpr(vD)));
13956 assign(vDlo, unop(Iop_V128to64, mkexpr(vD)));
13957 assign(vNhiPre, unop(Iop_V128HIto64, mkexpr(vN)));
13958 assign(vNloPre, unop(Iop_V128to64, mkexpr(vN)));
13959 assign(vMhi, unop(Iop_V128HIto64, mkexpr(vM)));
13960 assign(vMlo, unop(Iop_V128to64, mkexpr(vM)));
13961 /* Mask off any bits of the N register operand that aren't actually
13962 needed, so that Memcheck doesn't complain unnecessarily. */
13963 switch (opc) {
13964 case BITS3(0,0,0): case BITS3(0,0,1): case BITS3(0,1,0):
13965 assign(vNhi, mkU64(0));
13966 assign(vNlo, unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(vNloPre))));
13967 break;
13968 case BITS3(0,1,1): case BITS3(1,0,0):
13969 case BITS3(1,0,1): case BITS3(1,1,0):
13970 assign(vNhi, mkexpr(vNhiPre));
13971 assign(vNlo, mkexpr(vNloPre));
13972 break;
13973 default:
13974 vassert(0);
13976 IRTemp res = newTemp(Ity_V128);
13977 IRDirty* di
13978 = unsafeIRDirty_1_N( res, 0/*regparms*/, hnames[opc], helpers[opc],
13979 mkIRExprVec_7(
13980 IRExpr_VECRET(),
13981 mkexpr(vDhi), mkexpr(vDlo), mkexpr(vNhi),
13982 mkexpr(vNlo), mkexpr(vMhi), mkexpr(vMlo)));
13983 stmt(IRStmt_Dirty(di));
13984 putQReg128(dd, mkexpr(res));
13985 switch (opc) {
13986 case BITS3(0,0,0): case BITS3(0,0,1): case BITS3(0,1,0):
13987 DIP("%s q%u, s%u, v%u.4s\n", inames[opc], dd, nn, mm);
13988 break;
13989 case BITS3(0,1,1): case BITS3(1,1,0):
13990 DIP("%s v%u.4s, v%u.4s, v%u.4s\n", inames[opc], dd, nn, mm);
13991 break;
13992 case BITS3(1,0,0): case BITS3(1,0,1):
13993 DIP("%s q%u, q%u, v%u.4s\n", inames[opc], dd, nn, mm);
13994 break;
13995 default:
13996 vassert(0);
13998 return True;
14001 return False;
14002 # undef INSN
14006 static
14007 Bool dis_AdvSIMD_crypto_two_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
14009 /* 31 28 23 21 16 11 9 4
14010 0101 1110 sz 10100 opc 10 n d
14011 Decode fields are: sz,opc
14013 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14014 if (INSN(31,24) != BITS8(0,1,0,1,1,1,1,0)
14015 || INSN(21,17) != BITS5(1,0,1,0,0) || INSN(11,10) != BITS2(1,0)) {
14016 return False;
14018 UInt sz = INSN(23,22);
14019 UInt opc = INSN(16,12);
14020 UInt nn = INSN(9,5);
14021 UInt dd = INSN(4,0);
14022 if (sz == BITS2(0,0) && opc <= BITS5(0,0,0,1,0)) {
14023 /* -------- 00,00000 SHA1H Sd, Sn -------- */
14024 /* -------- 00,00001 SHA1SU1 Vd.4S, Vn.4S -------- */
14025 /* -------- 00,00010 SHA256SU0 Vd.4S, Vn.4S -------- */
14026 vassert(opc < 3);
14027 const HChar* inames[3] = { "sha1h", "sha1su1", "sha256su0" };
14028 IRTemp vD = newTemp(Ity_V128);
14029 IRTemp vN = newTemp(Ity_V128);
14030 IRTemp vDhi = newTemp(Ity_I64);
14031 IRTemp vDlo = newTemp(Ity_I64);
14032 IRTemp vNhi = newTemp(Ity_I64);
14033 IRTemp vNlo = newTemp(Ity_I64);
14034 assign(vD, getQReg128(dd));
14035 assign(vN, getQReg128(nn));
14036 assign(vDhi, unop(Iop_V128HIto64, mkexpr(vD)));
14037 assign(vDlo, unop(Iop_V128to64, mkexpr(vD)));
14038 assign(vNhi, unop(Iop_V128HIto64, mkexpr(vN)));
14039 assign(vNlo, unop(Iop_V128to64, mkexpr(vN)));
14040 /* Mask off any bits of the N register operand that aren't actually
14041 needed, so that Memcheck doesn't complain unnecessarily. Also
14042 construct the calls, given that the helper functions don't take
14043 the same number of arguments. */
14044 IRDirty* di = NULL;
14045 IRTemp res = newTemp(Ity_V128);
14046 switch (opc) {
14047 case BITS5(0,0,0,0,0): {
14048 IRExpr* vNloMasked = unop(Iop_32Uto64,
14049 unop(Iop_64to32, mkexpr(vNlo)));
14050 di = unsafeIRDirty_1_N( res, 0/*regparms*/,
14051 "arm64g_dirtyhelper_SHA1H",
14052 &arm64g_dirtyhelper_SHA1H,
14053 mkIRExprVec_3(
14054 IRExpr_VECRET(),
14055 mkU64(0), vNloMasked) );
14056 break;
14058 case BITS5(0,0,0,0,1):
14059 di = unsafeIRDirty_1_N( res, 0/*regparms*/,
14060 "arm64g_dirtyhelper_SHA1SU1",
14061 &arm64g_dirtyhelper_SHA1SU1,
14062 mkIRExprVec_5(
14063 IRExpr_VECRET(),
14064 mkexpr(vDhi), mkexpr(vDlo),
14065 mkexpr(vNhi), mkexpr(vNlo)) );
14066 break;
14067 case BITS5(0,0,0,1,0):
14068 di = unsafeIRDirty_1_N( res, 0/*regparms*/,
14069 "arm64g_dirtyhelper_SHA256SU0",
14070 &arm64g_dirtyhelper_SHA256SU0,
14071 mkIRExprVec_5(
14072 IRExpr_VECRET(),
14073 mkexpr(vDhi), mkexpr(vDlo),
14074 mkexpr(vNhi), mkexpr(vNlo)) );
14075 break;
14076 default:
14077 vassert(0);
14079 stmt(IRStmt_Dirty(di));
14080 putQReg128(dd, mkexpr(res));
14081 switch (opc) {
14082 case BITS5(0,0,0,0,0):
14083 DIP("%s s%u, s%u\n", inames[opc], dd, nn);
14084 break;
14085 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,0):
14086 DIP("%s v%u.4s, v%u.4s\n", inames[opc], dd, nn);
14087 break;
14088 default:
14089 vassert(0);
14091 return True;
14094 return False;
14095 # undef INSN
14099 static
14100 Bool dis_AdvSIMD_fp_compare(/*MB_OUT*/DisResult* dres, UInt insn)
14102 /* 31 28 23 21 20 15 13 9 4
14103 000 11110 ty 1 m op 1000 n opcode2
14104 The first 3 bits are really "M 0 S", but M and S are always zero.
14105 Decode fields are: ty,op,opcode2
14107 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14108 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
14109 || INSN(21,21) != 1 || INSN(13,10) != BITS4(1,0,0,0)) {
14110 return False;
14112 UInt ty = INSN(23,22);
14113 UInt mm = INSN(20,16);
14114 UInt op = INSN(15,14);
14115 UInt nn = INSN(9,5);
14116 UInt opcode2 = INSN(4,0);
14117 vassert(ty < 4);
14119 if (ty <= X01 && op == X00
14120 && (opcode2 & BITS5(0,0,1,1,1)) == BITS5(0,0,0,0,0)) {
14121 /* -------- 0x,00,00000 FCMP d_d, s_s -------- */
14122 /* -------- 0x,00,01000 FCMP d_#0, s_#0 -------- */
14123 /* -------- 0x,00,10000 FCMPE d_d, s_s -------- */
14124 /* -------- 0x,00,11000 FCMPE d_#0, s_#0 -------- */
14125 /* 31 23 20 15 9 4
14126 000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm
14127 000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0
14128 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm
14129 000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0
14131 000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm
14132 000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0
14133 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm
14134 000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0
14136 FCMPE generates Invalid Operation exn if either arg is any kind
14137 of NaN. FCMP generates Invalid Operation exn if either arg is a
14138 signalling NaN. We ignore this detail here and produce the same
14139 IR for both.
14141 Bool isD = (ty & 1) == 1;
14142 Bool isCMPE = (opcode2 & 16) == 16;
14143 Bool cmpZero = (opcode2 & 8) == 8;
14144 IRType ity = isD ? Ity_F64 : Ity_F32;
14145 Bool valid = True;
14146 if (cmpZero && mm != 0) valid = False;
14147 if (valid) {
14148 IRTemp argL = newTemp(ity);
14149 IRTemp argR = newTemp(ity);
14150 IRTemp irRes = newTemp(Ity_I32);
14151 assign(argL, getQRegLO(nn, ity));
14152 assign(argR,
14153 cmpZero
14154 ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
14155 : getQRegLO(mm, ity));
14156 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
14157 mkexpr(argL), mkexpr(argR)));
14158 IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
14159 IRTemp nzcv_28x0 = newTemp(Ity_I64);
14160 assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
14161 setFlags_COPY(nzcv_28x0);
14162 DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ity),
14163 cmpZero ? "#0.0" : nameQRegLO(mm, ity));
14164 return True;
14166 return False;
14169 return False;
14170 # undef INSN
14174 static
14175 Bool dis_AdvSIMD_fp_conditional_compare(/*MB_OUT*/DisResult* dres, UInt insn)
14177 /* 31 28 23 21 20 15 11 9 4 3
14178 000 11110 ty 1 m cond 01 n op nzcv
14179 The first 3 bits are really "M 0 S", but M and S are always zero.
14180 Decode fields are: ty,op
14182 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14183 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
14184 || INSN(21,21) != 1 || INSN(11,10) != BITS2(0,1)) {
14185 return False;
14187 UInt ty = INSN(23,22);
14188 UInt mm = INSN(20,16);
14189 UInt cond = INSN(15,12);
14190 UInt nn = INSN(9,5);
14191 UInt op = INSN(4,4);
14192 UInt nzcv = INSN(3,0);
14193 vassert(ty < 4 && op <= 1);
14195 if (ty <= BITS2(0,1)) {
14196 /* -------- 00,0 FCCMP s_s -------- */
14197 /* -------- 00,1 FCCMPE s_s -------- */
14198 /* -------- 01,0 FCCMP d_d -------- */
14199 /* -------- 01,1 FCCMPE d_d -------- */
14201 /* FCCMPE generates Invalid Operation exn if either arg is any kind
14202 of NaN. FCCMP generates Invalid Operation exn if either arg is a
14203 signalling NaN. We ignore this detail here and produce the same
14204 IR for both.
14206 Bool isD = (ty & 1) == 1;
14207 Bool isCMPE = op == 1;
14208 IRType ity = isD ? Ity_F64 : Ity_F32;
14209 IRTemp argL = newTemp(ity);
14210 IRTemp argR = newTemp(ity);
14211 IRTemp irRes = newTemp(Ity_I32);
14212 assign(argL, getQRegLO(nn, ity));
14213 assign(argR, getQRegLO(mm, ity));
14214 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
14215 mkexpr(argL), mkexpr(argR)));
14216 IRTemp condT = newTemp(Ity_I1);
14217 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
14218 IRTemp nzcvT = mk_convert_IRCmpF64Result_to_NZCV(irRes);
14220 IRTemp nzcvT_28x0 = newTemp(Ity_I64);
14221 assign(nzcvT_28x0, binop(Iop_Shl64, mkexpr(nzcvT), mkU8(28)));
14223 IRExpr* nzcvF_28x0 = mkU64(((ULong)nzcv) << 28);
14225 IRTemp nzcv_28x0 = newTemp(Ity_I64);
14226 assign(nzcv_28x0, IRExpr_ITE(mkexpr(condT),
14227 mkexpr(nzcvT_28x0), nzcvF_28x0));
14228 setFlags_COPY(nzcv_28x0);
14229 DIP("fccmp%s %s, %s, #%u, %s\n", isCMPE ? "e" : "",
14230 nameQRegLO(nn, ity), nameQRegLO(mm, ity), nzcv, nameCC(cond));
14231 return True;
14234 return False;
14235 # undef INSN
14239 static
14240 Bool dis_AdvSIMD_fp_conditional_select(/*MB_OUT*/DisResult* dres, UInt insn)
14242 /* 31 23 21 20 15 11 9 5
14243 000 11110 ty 1 m cond 11 n d
14244 The first 3 bits are really "M 0 S", but M and S are always zero.
14245 Decode fields: ty
14247 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14248 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) || INSN(21,21) != 1
14249 || INSN(11,10) != BITS2(1,1)) {
14250 return False;
14252 UInt ty = INSN(23,22);
14253 UInt mm = INSN(20,16);
14254 UInt cond = INSN(15,12);
14255 UInt nn = INSN(9,5);
14256 UInt dd = INSN(4,0);
14257 if (ty <= X01) {
14258 /* -------- 00: FCSEL s_s -------- */
14259 /* -------- 00: FCSEL d_d -------- */
14260 IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
14261 IRTemp srcT = newTemp(ity);
14262 IRTemp srcF = newTemp(ity);
14263 IRTemp res = newTemp(ity);
14264 assign(srcT, getQRegLO(nn, ity));
14265 assign(srcF, getQRegLO(mm, ity));
14266 assign(res, IRExpr_ITE(
14267 unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
14268 mkexpr(srcT), mkexpr(srcF)));
14269 putQReg128(dd, mkV128(0x0000));
14270 putQRegLO(dd, mkexpr(res));
14271 DIP("fcsel %s, %s, %s, %s\n",
14272 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity),
14273 nameCC(cond));
14274 return True;
14276 return False;
14277 # undef INSN
14281 static
14282 Bool dis_AdvSIMD_fp_data_proc_1_source(/*MB_OUT*/DisResult* dres, UInt insn)
14284 /* 31 28 23 21 20 14 9 4
14285 000 11110 ty 1 opcode 10000 n d
14286 The first 3 bits are really "M 0 S", but M and S are always zero.
14287 Decode fields: ty,opcode
14289 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14290 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
14291 || INSN(21,21) != 1 || INSN(14,10) != BITS5(1,0,0,0,0)) {
14292 return False;
14294 UInt ty = INSN(23,22);
14295 UInt opcode = INSN(20,15);
14296 UInt nn = INSN(9,5);
14297 UInt dd = INSN(4,0);
14299 if (ty <= X01 && opcode <= BITS6(0,0,0,0,1,1)) {
14300 /* -------- 0x,000000: FMOV d_d, s_s -------- */
14301 /* -------- 0x,000001: FABS d_d, s_s -------- */
14302 /* -------- 0x,000010: FNEG d_d, s_s -------- */
14303 /* -------- 0x,000011: FSQRT d_d, s_s -------- */
14304 IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
14305 IRTemp src = newTemp(ity);
14306 IRTemp res = newTemp(ity);
14307 const HChar* nm = "??";
14308 assign(src, getQRegLO(nn, ity));
14309 switch (opcode) {
14310 case BITS6(0,0,0,0,0,0):
14311 nm = "fmov"; assign(res, mkexpr(src)); break;
14312 case BITS6(0,0,0,0,0,1):
14313 nm = "fabs"; assign(res, unop(mkABSF(ity), mkexpr(src))); break;
14314 case BITS6(0,0,0,0,1,0):
14315 nm = "fabs"; assign(res, unop(mkNEGF(ity), mkexpr(src))); break;
14316 case BITS6(0,0,0,0,1,1):
14317 nm = "fsqrt";
14318 assign(res, binop(mkSQRTF(ity),
14319 mkexpr(mk_get_IR_rounding_mode()),
14320 mkexpr(src))); break;
14321 default:
14322 vassert(0);
14324 putQReg128(dd, mkV128(0x0000));
14325 putQRegLO(dd, mkexpr(res));
14326 DIP("%s %s, %s\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
14327 return True;
14330 if ( (ty == X11 && (opcode == BITS6(0,0,0,1,0,0)
14331 || opcode == BITS6(0,0,0,1,0,1)))
14332 || (ty == X00 && (opcode == BITS6(0,0,0,1,1,1)
14333 || opcode == BITS6(0,0,0,1,0,1)))
14334 || (ty == X01 && (opcode == BITS6(0,0,0,1,1,1)
14335 || opcode == BITS6(0,0,0,1,0,0)))) {
14336 /* -------- 11,000100: FCVT s_h -------- */
14337 /* -------- 11,000101: FCVT d_h -------- */
14338 /* -------- 00,000111: FCVT h_s -------- */
14339 /* -------- 00,000101: FCVT d_s -------- */
14340 /* -------- 01,000111: FCVT h_d -------- */
14341 /* -------- 01,000100: FCVT s_d -------- */
14342 /* 31 23 21 16 14 9 4
14343 000 11110 11 10001 00 10000 n d FCVT Sd, Hn
14344 --------- 11 ----- 01 --------- FCVT Dd, Hn
14345 --------- 00 ----- 11 --------- FCVT Hd, Sn
14346 --------- 00 ----- 01 --------- FCVT Dd, Sn
14347 --------- 01 ----- 11 --------- FCVT Hd, Dn
14348 --------- 01 ----- 00 --------- FCVT Sd, Dn
14349 Rounding, when dst is smaller than src, is per the FPCR.
14351 UInt b2322 = ty;
14352 UInt b1615 = opcode & BITS2(1,1);
14353 switch ((b2322 << 2) | b1615) {
14354 case BITS4(0,0,0,1): // S -> D
14355 case BITS4(1,1,0,1): { // H -> D
14356 Bool srcIsH = b2322 == BITS2(1,1);
14357 IRType srcTy = srcIsH ? Ity_F16 : Ity_F32;
14358 IRTemp res = newTemp(Ity_F64);
14359 assign(res, unop(srcIsH ? Iop_F16toF64 : Iop_F32toF64,
14360 getQRegLO(nn, srcTy)));
14361 putQReg128(dd, mkV128(0x0000));
14362 putQRegLO(dd, mkexpr(res));
14363 DIP("fcvt %s, %s\n",
14364 nameQRegLO(dd, Ity_F64), nameQRegLO(nn, srcTy));
14365 return True;
14367 case BITS4(0,1,0,0): // D -> S
14368 case BITS4(0,1,1,1): { // D -> H
14369 Bool dstIsH = b1615 == BITS2(1,1);
14370 IRType dstTy = dstIsH ? Ity_F16 : Ity_F32;
14371 IRTemp res = newTemp(dstTy);
14372 assign(res, binop(dstIsH ? Iop_F64toF16 : Iop_F64toF32,
14373 mkexpr(mk_get_IR_rounding_mode()),
14374 getQRegLO(nn, Ity_F64)));
14375 putQReg128(dd, mkV128(0x0000));
14376 putQRegLO(dd, mkexpr(res));
14377 DIP("fcvt %s, %s\n",
14378 nameQRegLO(dd, dstTy), nameQRegLO(nn, Ity_F64));
14379 return True;
14381 case BITS4(0,0,1,1): // S -> H
14382 case BITS4(1,1,0,0): { // H -> S
14383 Bool toH = b1615 == BITS2(1,1);
14384 IRType srcTy = toH ? Ity_F32 : Ity_F16;
14385 IRType dstTy = toH ? Ity_F16 : Ity_F32;
14386 IRTemp res = newTemp(dstTy);
14387 if (toH) {
14388 assign(res, binop(Iop_F32toF16,
14389 mkexpr(mk_get_IR_rounding_mode()),
14390 getQRegLO(nn, srcTy)));
14392 } else {
14393 assign(res, unop(Iop_F16toF32,
14394 getQRegLO(nn, srcTy)));
14396 putQReg128(dd, mkV128(0x0000));
14397 putQRegLO(dd, mkexpr(res));
14398 DIP("fcvt %s, %s\n",
14399 nameQRegLO(dd, dstTy), nameQRegLO(nn, srcTy));
14400 return True;
14402 default:
14403 break;
14405 /* else unhandled */
14406 return False;
14409 if (ty <= X01
14410 && opcode >= BITS6(0,0,1,0,0,0) && opcode <= BITS6(0,0,1,1,1,1)
14411 && opcode != BITS6(0,0,1,1,0,1)) {
14412 /* -------- 0x,001000 FRINTN d_d, s_s -------- */
14413 /* -------- 0x,001001 FRINTP d_d, s_s -------- */
14414 /* -------- 0x,001010 FRINTM d_d, s_s -------- */
14415 /* -------- 0x,001011 FRINTZ d_d, s_s -------- */
14416 /* -------- 0x,001100 FRINTA d_d, s_s -------- */
14417 /* -------- 0x,001110 FRINTX d_d, s_s -------- */
14418 /* -------- 0x,001111 FRINTI d_d, s_s -------- */
14419 /* 31 23 21 17 14 9 4
14420 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR)
14422 x==0 => S-registers, x==1 => D-registers
14423 rm (17:15) encodings:
14424 111 per FPCR (FRINTI)
14425 001 +inf (FRINTP)
14426 010 -inf (FRINTM)
14427 011 zero (FRINTZ)
14428 000 tieeven (FRINTN) -- !! FIXME KLUDGED !!
14429 100 tieaway (FRINTA) -- !! FIXME KLUDGED !!
14430 110 per FPCR + "exact = TRUE" (FRINTX)
14431 101 unallocated
14433 Bool isD = (ty & 1) == 1;
14434 UInt rm = opcode & BITS6(0,0,0,1,1,1);
14435 IRType ity = isD ? Ity_F64 : Ity_F32;
14436 IRExpr* irrmE = NULL;
14437 UChar ch = '?';
14438 switch (rm) {
14439 case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
14440 case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
14441 case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
14442 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
14443 case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
14444 // I am unsure about the following, due to the "integral exact"
14445 // description in the manual. What does it mean? (frintx, that is)
14446 case BITS3(1,1,0):
14447 ch = 'x'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
14448 case BITS3(1,1,1):
14449 ch = 'i'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
14450 // The following is a kludge. There's no Irrm_ value to represent
14451 // this ("to nearest, with ties to even")
14452 case BITS3(0,0,0): ch = 'n'; irrmE = mkU32(Irrm_NEAREST); break;
14453 default: break;
14455 if (irrmE) {
14456 IRTemp src = newTemp(ity);
14457 IRTemp dst = newTemp(ity);
14458 assign(src, getQRegLO(nn, ity));
14459 assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
14460 irrmE, mkexpr(src)));
14461 putQReg128(dd, mkV128(0x0000));
14462 putQRegLO(dd, mkexpr(dst));
14463 DIP("frint%c %s, %s\n",
14464 ch, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
14465 return True;
14467 return False;
14470 return False;
14471 # undef INSN
14475 static
14476 Bool dis_AdvSIMD_fp_data_proc_2_source(/*MB_OUT*/DisResult* dres, UInt insn)
14478 /* 31 28 23 21 20 15 11 9 4
14479 000 11110 ty 1 m opcode 10 n d
14480 The first 3 bits are really "M 0 S", but M and S are always zero.
14481 Decode fields: ty, opcode
14483 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14484 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
14485 || INSN(21,21) != 1 || INSN(11,10) != BITS2(1,0)) {
14486 return False;
14488 UInt ty = INSN(23,22);
14489 UInt mm = INSN(20,16);
14490 UInt opcode = INSN(15,12);
14491 UInt nn = INSN(9,5);
14492 UInt dd = INSN(4,0);
14494 if (ty <= X01 && opcode <= BITS4(0,1,1,1)) {
14495 /* ------- 0x,0000: FMUL d_d, s_s ------- */
14496 /* ------- 0x,0001: FDIV d_d, s_s ------- */
14497 /* ------- 0x,0010: FADD d_d, s_s ------- */
14498 /* ------- 0x,0011: FSUB d_d, s_s ------- */
14499 /* ------- 0x,0100: FMAX d_d, s_s ------- */
14500 /* ------- 0x,0101: FMIN d_d, s_s ------- */
14501 /* ------- 0x,0110: FMAXNM d_d, s_s ------- (FIXME KLUDGED) */
14502 /* ------- 0x,0111: FMINNM d_d, s_s ------- (FIXME KLUDGED) */
14503 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
14504 IROp iop = Iop_INVALID;
14505 const HChar* nm = "???";
14506 switch (opcode) {
14507 case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ity); break;
14508 case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ity); break;
14509 case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ity); break;
14510 case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ity); break;
14511 case BITS4(0,1,0,0): nm = "fmax"; iop = mkVecMAXF(ty+2); break;
14512 case BITS4(0,1,0,1): nm = "fmin"; iop = mkVecMINF(ty+2); break;
14513 case BITS4(0,1,1,0): nm = "fmaxnm"; iop = mkVecMAXF(ty+2); break; //!!
14514 case BITS4(0,1,1,1): nm = "fminnm"; iop = mkVecMINF(ty+2); break; //!!
14515 default: vassert(0);
14517 if (opcode <= BITS4(0,0,1,1)) {
14518 // This is really not good code. TODO: avoid width-changing
14519 IRTemp res = newTemp(ity);
14520 assign(res, triop(iop, mkexpr(mk_get_IR_rounding_mode()),
14521 getQRegLO(nn, ity), getQRegLO(mm, ity)));
14522 putQReg128(dd, mkV128(0));
14523 putQRegLO(dd, mkexpr(res));
14524 } else {
14525 putQReg128(dd, unop(mkVecZEROHIxxOFV128(ty+2),
14526 binop(iop, getQReg128(nn), getQReg128(mm))));
14528 DIP("%s %s, %s, %s\n",
14529 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
14530 return True;
14533 if (ty <= X01 && opcode == BITS4(1,0,0,0)) {
14534 /* ------- 0x,1000: FNMUL d_d, s_s ------- */
14535 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
14536 IROp iop = mkMULF(ity);
14537 IROp iopn = mkNEGF(ity);
14538 const HChar* nm = "fnmul";
14539 IRExpr* resE = unop(iopn,
14540 triop(iop, mkexpr(mk_get_IR_rounding_mode()),
14541 getQRegLO(nn, ity), getQRegLO(mm, ity)));
14542 IRTemp res = newTemp(ity);
14543 assign(res, resE);
14544 putQReg128(dd, mkV128(0));
14545 putQRegLO(dd, mkexpr(res));
14546 DIP("%s %s, %s, %s\n",
14547 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
14548 return True;
14551 return False;
14552 # undef INSN
14556 static
14557 Bool dis_AdvSIMD_fp_data_proc_3_source(/*MB_OUT*/DisResult* dres, UInt insn)
14559 /* 31 28 23 21 20 15 14 9 4
14560 000 11111 ty o1 m o0 a n d
14561 The first 3 bits are really "M 0 S", but M and S are always zero.
14562 Decode fields: ty,o1,o0
14564 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14565 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,1)) {
14566 return False;
14568 UInt ty = INSN(23,22);
14569 UInt bitO1 = INSN(21,21);
14570 UInt mm = INSN(20,16);
14571 UInt bitO0 = INSN(15,15);
14572 UInt aa = INSN(14,10);
14573 UInt nn = INSN(9,5);
14574 UInt dd = INSN(4,0);
14575 vassert(ty < 4);
14577 if (ty <= X01) {
14578 /* -------- 0x,0,0 FMADD d_d_d_d, s_s_s_s -------- */
14579 /* -------- 0x,0,1 FMSUB d_d_d_d, s_s_s_s -------- */
14580 /* -------- 0x,1,0 FNMADD d_d_d_d, s_s_s_s -------- */
14581 /* -------- 0x,1,1 FNMSUB d_d_d_d, s_s_s_s -------- */
14582 /* -------------------- F{N}M{ADD,SUB} -------------------- */
14583 /* 31 22 20 15 14 9 4 ix
14584 000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa
14585 000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa
14586 000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa
14587 000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa
14588 where Fx=Dx when sz=1, Fx=Sx when sz=0
14590 -----SPEC------ ----IMPL----
14591 fmadd a + n * m fmadd (a, n, m)
14592 fmsub a + (-n) * m fmsub (a, n, m)
14593 fnmadd (-a) + (-n) * m fmadd (-a, -n, m)
14594 fnmsub (-a) + n * m fmadd (-a, n, m)
14596 Note Iop_MAdd/SubF32/64 take arguments in the order: rm, N, M, A
14598 Bool isD = (ty & 1) == 1;
14599 UInt ix = (bitO1 << 1) | bitO0;
14600 IRType ity = isD ? Ity_F64 : Ity_F32;
14601 IROp opFMADD = mkFMADDF(ity);
14602 IROp opFMSUB = mkFMSUBF(ity);
14603 IROp opNEG = mkNEGF(ity);
14604 IRTemp res = newTemp(ity);
14605 IRExpr* eA = getQRegLO(aa, ity);
14606 IRExpr* eN = getQRegLO(nn, ity);
14607 IRExpr* eM = getQRegLO(mm, ity);
14608 IRExpr* rm = mkexpr(mk_get_IR_rounding_mode());
14609 switch (ix) {
14610 case 0: /* FMADD */
14611 assign(res, qop(opFMADD, rm, eN, eM, eA));
14612 break;
14613 case 1: /* FMSUB */
14614 assign(res, qop(opFMSUB, rm, eN, eM, eA));
14615 break;
14616 case 2: /* FNMADD */
14617 assign(res, qop(opFMADD, rm, unop(opNEG, eN), eM,
14618 unop(opNEG,eA)));
14619 break;
14620 case 3: /* FNMSUB */
14621 assign(res, qop(opFMADD, rm, eN, eM, unop(opNEG, eA)));
14622 break;
14623 default:
14624 vassert(0);
14626 putQReg128(dd, mkV128(0x0000));
14627 putQRegLO(dd, mkexpr(res));
14628 const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
14629 DIP("%s %s, %s, %s, %s\n",
14630 names[ix], nameQRegLO(dd, ity), nameQRegLO(nn, ity),
14631 nameQRegLO(mm, ity), nameQRegLO(aa, ity));
14632 return True;
14635 return False;
14636 # undef INSN
14640 static
14641 Bool dis_AdvSIMD_fp_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
14643 /* 31 28 23 21 20 12 9 4
14644 000 11110 ty 1 imm8 100 imm5 d
14645 The first 3 bits are really "M 0 S", but M and S are always zero.
14647 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14648 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
14649 || INSN(21,21) != 1 || INSN(12,10) != BITS3(1,0,0)) {
14650 return False;
14652 UInt ty = INSN(23,22);
14653 UInt imm8 = INSN(20,13);
14654 UInt imm5 = INSN(9,5);
14655 UInt dd = INSN(4,0);
14657 /* ------- 00,00000: FMOV s_imm ------- */
14658 /* ------- 01,00000: FMOV d_imm ------- */
14659 if (ty <= X01 && imm5 == BITS5(0,0,0,0,0)) {
14660 Bool isD = (ty & 1) == 1;
14661 ULong imm = VFPExpandImm(imm8, isD ? 64 : 32);
14662 if (!isD) {
14663 vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
14665 putQReg128(dd, mkV128(0));
14666 putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
14667 DIP("fmov %s, #0x%llx\n",
14668 nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
14669 return True;
14672 return False;
14673 # undef INSN
14677 static
14678 Bool dis_AdvSIMD_fp_to_from_fixedp_conv(/*MB_OUT*/DisResult* dres, UInt insn)
14680 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14681 /* 31 30 29 28 23 21 20 18 15 9 4
14682 sf 0 0 11110 type 0 rmode opcode scale n d
14683 The first 3 bits are really "sf 0 S", but S is always zero.
14684 Decode fields: sf,type,rmode,opcode
14686 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14687 if (INSN(30,29) != BITS2(0,0)
14688 || INSN(28,24) != BITS5(1,1,1,1,0)
14689 || INSN(21,21) != 0) {
14690 return False;
14692 UInt bitSF = INSN(31,31);
14693 UInt ty = INSN(23,22); // type
14694 UInt rm = INSN(20,19); // rmode
14695 UInt op = INSN(18,16); // opcode
14696 UInt sc = INSN(15,10); // scale
14697 UInt nn = INSN(9,5);
14698 UInt dd = INSN(4,0);
14700 if (ty <= X01 && rm == X11
14701 && (op == BITS3(0,0,0) || op == BITS3(0,0,1))) {
14702 /* -------- (ix) sf ty rm opc -------- */
14703 /* -------- 0 0 00 11 000: FCVTZS w_s_#fbits -------- */
14704 /* -------- 1 0 01 11 000: FCVTZS w_d_#fbits -------- */
14705 /* -------- 2 1 00 11 000: FCVTZS x_s_#fbits -------- */
14706 /* -------- 3 1 01 11 000: FCVTZS x_d_#fbits -------- */
14708 /* -------- 4 0 00 11 001: FCVTZU w_s_#fbits -------- */
14709 /* -------- 5 0 01 11 001: FCVTZU w_d_#fbits -------- */
14710 /* -------- 6 1 00 11 001: FCVTZU x_s_#fbits -------- */
14711 /* -------- 7 1 01 11 001: FCVTZU x_d_#fbits -------- */
14712 Bool isI64 = bitSF == 1;
14713 Bool isF64 = (ty & 1) == 1;
14714 Bool isU = (op & 1) == 1;
14715 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
14717 Int fbits = 64 - sc;
14718 vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
14720 Double scale = two_to_the_plus(fbits);
14721 IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
14722 : IRExpr_Const(IRConst_F32( (Float)scale ));
14723 IROp opMUL = isF64 ? Iop_MulF64 : Iop_MulF32;
14725 const IROp ops[8]
14726 = { Iop_F32toI32S, Iop_F64toI32S, Iop_F32toI64S, Iop_F64toI64S,
14727 Iop_F32toI32U, Iop_F64toI32U, Iop_F32toI64U, Iop_F64toI64U };
14728 IRTemp irrm = newTemp(Ity_I32);
14729 assign(irrm, mkU32(Irrm_ZERO));
14731 IRExpr* src = getQRegLO(nn, isF64 ? Ity_F64 : Ity_F32);
14732 IRExpr* res = binop(ops[ix], mkexpr(irrm),
14733 triop(opMUL, mkexpr(irrm), src, scaleE));
14734 putIRegOrZR(isI64, dd, res);
14736 DIP("fcvtz%c %s, %s, #%d\n",
14737 isU ? 'u' : 's', nameIRegOrZR(isI64, dd),
14738 nameQRegLO(nn, isF64 ? Ity_F64 : Ity_F32), fbits);
14739 return True;
14742 /* ------ sf,ty,rm,opc ------ */
14743 /* ------ x,0x,00,010 SCVTF s/d, w/x, #fbits ------ */
14744 /* ------ x,0x,00,011 UCVTF s/d, w/x, #fbits ------ */
14745 /* (ix) sf S 28 ty rm opc 15 9 4
14746 0 0 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Wn, #fbits
14747 1 0 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Wn, #fbits
14748 2 1 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Xn, #fbits
14749 3 1 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Xn, #fbits
14751 4 0 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Wn, #fbits
14752 5 0 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Wn, #fbits
14753 6 1 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Xn, #fbits
14754 7 1 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Xn, #fbits
14756 These are signed/unsigned conversion from integer registers to
14757 FP registers, all 4 32/64-bit combinations, rounded per FPCR,
14758 scaled per |scale|.
14760 if (ty <= X01 && rm == X00
14761 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))
14762 && (bitSF == 1 || ((sc >> 5) & 1) == 1)) {
14763 Bool isI64 = bitSF == 1;
14764 Bool isF64 = (ty & 1) == 1;
14765 Bool isU = (op & 1) == 1;
14766 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
14768 Int fbits = 64 - sc;
14769 vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
14771 Double scale = two_to_the_minus(fbits);
14772 IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
14773 : IRExpr_Const(IRConst_F32( (Float)scale ));
14774 IROp opMUL = isF64 ? Iop_MulF64 : Iop_MulF32;
14776 const IROp ops[8]
14777 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
14778 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
14779 IRExpr* src = getIRegOrZR(isI64, nn);
14780 IRExpr* res = (isF64 && !isI64)
14781 ? unop(ops[ix], src)
14782 : binop(ops[ix],
14783 mkexpr(mk_get_IR_rounding_mode()), src);
14784 putQReg128(dd, mkV128(0));
14785 putQRegLO(dd, triop(opMUL, mkU32(Irrm_NEAREST), res, scaleE));
14787 DIP("%ccvtf %s, %s, #%d\n",
14788 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
14789 nameIRegOrZR(isI64, nn), fbits);
14790 return True;
14793 return False;
14794 # undef INSN
14798 static
14799 Bool dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult* dres, UInt insn)
14801 /* 31 30 29 28 23 21 20 18 15 9 4
14802 sf 0 0 11110 type 1 rmode opcode 000000 n d
14803 The first 3 bits are really "sf 0 S", but S is always zero.
14804 Decode fields: sf,type,rmode,opcode
14806 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14807 if (INSN(30,29) != BITS2(0,0)
14808 || INSN(28,24) != BITS5(1,1,1,1,0)
14809 || INSN(21,21) != 1
14810 || INSN(15,10) != BITS6(0,0,0,0,0,0)) {
14811 return False;
14813 UInt bitSF = INSN(31,31);
14814 UInt ty = INSN(23,22); // type
14815 UInt rm = INSN(20,19); // rmode
14816 UInt op = INSN(18,16); // opcode
14817 UInt nn = INSN(9,5);
14818 UInt dd = INSN(4,0);
14820 // op = 000, 001
14821 /* -------- FCVT{N,P,M,Z,A}{S,U} (scalar, integer) -------- */
14822 /* 30 23 20 18 15 9 4
14823 sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to
14824 sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest)
14825 ---------------- 01 -------------- FCVTP-------- (round to +inf)
14826 ---------------- 10 -------------- FCVTM-------- (round to -inf)
14827 ---------------- 11 -------------- FCVTZ-------- (round to zero)
14828 ---------------- 00 100 ---------- FCVTAS------- (nearest, ties away)
14829 ---------------- 00 101 ---------- FCVTAU------- (nearest, ties away)
14831 Rd is Xd when sf==1, Wd when sf==0
14832 Fn is Dn when x==1, Sn when x==0
14833 20:19 carry the rounding mode, using the same encoding as FPCR
14835 if (ty <= X01
14836 && ( ((op == BITS3(0,0,0) || op == BITS3(0,0,1)) && True)
14837 || ((op == BITS3(1,0,0) || op == BITS3(1,0,1)) && rm == BITS2(0,0))
14840 Bool isI64 = bitSF == 1;
14841 Bool isF64 = (ty & 1) == 1;
14842 Bool isU = (op & 1) == 1;
14843 /* Decide on the IR rounding mode to use. */
14844 IRRoundingMode irrm = 8; /*impossible*/
14845 HChar ch = '?';
14846 if (op == BITS3(0,0,0) || op == BITS3(0,0,1)) {
14847 switch (rm) {
14848 case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
14849 case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
14850 case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
14851 case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
14852 default: vassert(0);
14854 } else {
14855 vassert(op == BITS3(1,0,0) || op == BITS3(1,0,1));
14856 switch (rm) {
14857 case BITS2(0,0): ch = 'a'; irrm = Irrm_NEAREST; break;
14858 default: vassert(0);
14861 vassert(irrm != 8);
14862 /* Decide on the conversion primop, based on the source size,
14863 dest size and signedness (8 possibilities). Case coding:
14864 F32 ->s I32 0
14865 F32 ->u I32 1
14866 F32 ->s I64 2
14867 F32 ->u I64 3
14868 F64 ->s I32 4
14869 F64 ->u I32 5
14870 F64 ->s I64 6
14871 F64 ->u I64 7
14873 UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0);
14874 vassert(ix < 8);
14875 const IROp iops[8]
14876 = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
14877 Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
14878 IROp iop = iops[ix];
14879 // A bit of ATCery: bounce all cases we haven't seen an example of.
14880 if (/* F32toI32S */
14881 (iop == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */
14882 || (iop == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
14883 || (iop == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
14884 || (iop == Iop_F32toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,S */
14885 /* F32toI32U */
14886 || (iop == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */
14887 || (iop == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
14888 || (iop == Iop_F32toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Sn */
14889 || (iop == Iop_F32toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,S */
14890 /* F32toI64S */
14891 || (iop == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */
14892 || (iop == Iop_F32toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Sn */
14893 || (iop == Iop_F32toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Sn */
14894 || (iop == Iop_F32toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,S */
14895 /* F32toI64U */
14896 || (iop == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */
14897 || (iop == Iop_F32toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Sn */
14898 || (iop == Iop_F32toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Sn */
14899 || (iop == Iop_F32toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,S */
14900 /* F64toI32S */
14901 || (iop == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */
14902 || (iop == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
14903 || (iop == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
14904 || (iop == Iop_F64toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,D */
14905 /* F64toI32U */
14906 || (iop == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */
14907 || (iop == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
14908 || (iop == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
14909 || (iop == Iop_F64toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,D */
14910 /* F64toI64S */
14911 || (iop == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */
14912 || (iop == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
14913 || (iop == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
14914 || (iop == Iop_F64toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,D */
14915 /* F64toI64U */
14916 || (iop == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */
14917 || (iop == Iop_F64toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Dn */
14918 || (iop == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
14919 || (iop == Iop_F64toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,D */
14921 /* validated */
14922 } else {
14923 return False;
14925 IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
14926 IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
14927 IRTemp src = newTemp(srcTy);
14928 IRTemp dst = newTemp(dstTy);
14929 assign(src, getQRegLO(nn, srcTy));
14930 assign(dst, binop(iop, mkU32(irrm), mkexpr(src)));
14931 putIRegOrZR(isI64, dd, mkexpr(dst));
14932 DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
14933 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
14934 return True;
14937 // op = 010, 011
14938 /* -------------- {S,U}CVTF (scalar, integer) -------------- */
14939 /* (ix) sf S 28 ty rm op 15 9 4
14940 0 0 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
14941 1 0 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
14942 2 1 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
14943 3 1 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
14945 4 0 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
14946 5 0 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
14947 6 1 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
14948 7 1 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
14950 These are signed/unsigned conversion from integer registers to
14951 FP registers, all 4 32/64-bit combinations, rounded per FPCR.
14953 if (ty <= X01 && rm == X00 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))) {
14954 Bool isI64 = bitSF == 1;
14955 Bool isF64 = (ty & 1) == 1;
14956 Bool isU = (op & 1) == 1;
14957 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
14958 const IROp ops[8]
14959 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
14960 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
14961 IRExpr* src = getIRegOrZR(isI64, nn);
14962 IRExpr* res = (isF64 && !isI64)
14963 ? unop(ops[ix], src)
14964 : binop(ops[ix],
14965 mkexpr(mk_get_IR_rounding_mode()), src);
14966 putQReg128(dd, mkV128(0));
14967 putQRegLO(dd, res);
14968 DIP("%ccvtf %s, %s\n",
14969 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
14970 nameIRegOrZR(isI64, nn));
14971 return True;
14974 // op = 110, 111
14975 /* -------- FMOV (general) -------- */
14976 /* case sf S ty rm op 15 9 4
14977 (1) 0 0 0 11110 00 1 00 111 000000 n d FMOV Sd, Wn
14978 (2) 1 0 0 11110 01 1 00 111 000000 n d FMOV Dd, Xn
14979 (3) 1 0 0 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn
14981 (4) 0 0 0 11110 00 1 00 110 000000 n d FMOV Wd, Sn
14982 (5) 1 0 0 11110 01 1 00 110 000000 n d FMOV Xd, Dn
14983 (6) 1 0 0 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1]
14985 if (1) {
14986 UInt ix = 0; // case
14987 if (bitSF == 0) {
14988 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
14989 ix = 1;
14990 else
14991 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
14992 ix = 4;
14993 } else {
14994 vassert(bitSF == 1);
14995 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
14996 ix = 2;
14997 else
14998 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
14999 ix = 5;
15000 else
15001 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
15002 ix = 3;
15003 else
15004 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
15005 ix = 6;
15007 if (ix > 0) {
15008 switch (ix) {
15009 case 1:
15010 putQReg128(dd, mkV128(0));
15011 putQRegLO(dd, getIReg32orZR(nn));
15012 DIP("fmov s%u, w%u\n", dd, nn);
15013 break;
15014 case 2:
15015 putQReg128(dd, mkV128(0));
15016 putQRegLO(dd, getIReg64orZR(nn));
15017 DIP("fmov d%u, x%u\n", dd, nn);
15018 break;
15019 case 3:
15020 putQRegHI64(dd, getIReg64orZR(nn));
15021 DIP("fmov v%u.d[1], x%u\n", dd, nn);
15022 break;
15023 case 4:
15024 putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
15025 DIP("fmov w%u, s%u\n", dd, nn);
15026 break;
15027 case 5:
15028 putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
15029 DIP("fmov x%u, d%u\n", dd, nn);
15030 break;
15031 case 6:
15032 putIReg64orZR(dd, getQRegHI64(nn));
15033 DIP("fmov x%u, v%u.d[1]\n", dd, nn);
15034 break;
15035 default:
15036 vassert(0);
15038 return True;
15040 /* undecodable; fall through */
15043 return False;
15044 # undef INSN
15048 static
15049 Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn,
15050 const VexArchInfo* archinfo)
15052 Bool ok;
15053 ok = dis_AdvSIMD_EXT(dres, insn);
15054 if (UNLIKELY(ok)) return True;
15055 ok = dis_AdvSIMD_TBL_TBX(dres, insn);
15056 if (UNLIKELY(ok)) return True;
15057 ok = dis_AdvSIMD_ZIP_UZP_TRN(dres, insn);
15058 if (UNLIKELY(ok)) return True;
15059 ok = dis_AdvSIMD_across_lanes(dres, insn);
15060 if (UNLIKELY(ok)) return True;
15061 ok = dis_AdvSIMD_copy(dres, insn);
15062 if (UNLIKELY(ok)) return True;
15063 ok = dis_AdvSIMD_modified_immediate(dres, insn);
15064 if (UNLIKELY(ok)) return True;
15065 ok = dis_AdvSIMD_scalar_copy(dres, insn);
15066 if (UNLIKELY(ok)) return True;
15067 ok = dis_AdvSIMD_scalar_pairwise(dres, insn, archinfo);
15068 if (UNLIKELY(ok)) return True;
15069 ok = dis_AdvSIMD_scalar_shift_by_imm(dres, insn);
15070 if (UNLIKELY(ok)) return True;
15071 ok = dis_AdvSIMD_scalar_three_different(dres, insn);
15072 if (UNLIKELY(ok)) return True;
15073 ok = dis_AdvSIMD_scalar_three_same(dres, insn);
15074 if (UNLIKELY(ok)) return True;
15075 ok = dis_AdvSIMD_scalar_three_same_extra(dres, insn);
15076 if (UNLIKELY(ok)) return True;
15077 ok = dis_AdvSIMD_scalar_two_reg_misc(dres, insn);
15078 if (UNLIKELY(ok)) return True;
15079 ok = dis_AdvSIMD_scalar_x_indexed_element(dres, insn);
15080 if (UNLIKELY(ok)) return True;
15081 ok = dis_AdvSIMD_shift_by_immediate(dres, insn);
15082 if (UNLIKELY(ok)) return True;
15083 ok = dis_AdvSIMD_three_different(dres, insn);
15084 if (UNLIKELY(ok)) return True;
15085 ok = dis_AdvSIMD_three_same(dres, insn);
15086 if (UNLIKELY(ok)) return True;
15087 ok = dis_AdvSIMD_three_same_extra(dres, insn);
15088 if (UNLIKELY(ok)) return True;
15089 ok = dis_AdvSIMD_three_same_fp16(dres, insn);
15090 if (UNLIKELY(ok)) return True;
15091 ok = dis_AdvSIMD_two_reg_misc(dres, insn);
15092 if (UNLIKELY(ok)) return True;
15093 ok = dis_AdvSIMD_vector_x_indexed_elem(dres, insn);
15094 if (UNLIKELY(ok)) return True;
15095 ok = dis_AdvSIMD_crypto_aes(dres, insn);
15096 if (UNLIKELY(ok)) return True;
15097 ok = dis_AdvSIMD_crypto_three_reg_sha(dres, insn);
15098 if (UNLIKELY(ok)) return True;
15099 ok = dis_AdvSIMD_crypto_two_reg_sha(dres, insn);
15100 if (UNLIKELY(ok)) return True;
15101 ok = dis_AdvSIMD_fp_compare(dres, insn);
15102 if (UNLIKELY(ok)) return True;
15103 ok = dis_AdvSIMD_fp_conditional_compare(dres, insn);
15104 if (UNLIKELY(ok)) return True;
15105 ok = dis_AdvSIMD_fp_conditional_select(dres, insn);
15106 if (UNLIKELY(ok)) return True;
15107 ok = dis_AdvSIMD_fp_data_proc_1_source(dres, insn);
15108 if (UNLIKELY(ok)) return True;
15109 ok = dis_AdvSIMD_fp_data_proc_2_source(dres, insn);
15110 if (UNLIKELY(ok)) return True;
15111 ok = dis_AdvSIMD_fp_data_proc_3_source(dres, insn);
15112 if (UNLIKELY(ok)) return True;
15113 ok = dis_AdvSIMD_fp_immediate(dres, insn);
15114 if (UNLIKELY(ok)) return True;
15115 ok = dis_AdvSIMD_fp_to_from_fixedp_conv(dres, insn);
15116 if (UNLIKELY(ok)) return True;
15117 ok = dis_AdvSIMD_fp_to_from_int_conv(dres, insn);
15118 if (UNLIKELY(ok)) return True;
15119 return False;
15123 /*------------------------------------------------------------*/
15124 /*--- Disassemble a single ARM64 instruction ---*/
15125 /*------------------------------------------------------------*/
15127 /* Disassemble a single ARM64 instruction into IR. The instruction
15128 has is located at |guest_instr| and has guest IP of
15129 |guest_PC_curr_instr|, which will have been set before the call
15130 here. Returns True iff the instruction was decoded, in which case
15131 *dres will be set accordingly, or False, in which case *dres should
15132 be ignored by the caller. */
15134 static
15135 Bool disInstr_ARM64_WRK (
15136 /*MB_OUT*/DisResult* dres,
15137 const UChar* guest_instr,
15138 const VexArchInfo* archinfo,
15139 const VexAbiInfo* abiinfo,
15140 Bool sigill_diag
15143 // A macro to fish bits out of 'insn'.
15144 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
15146 //ZZ DisResult dres;
15147 //ZZ UInt insn;
15148 //ZZ //Bool allow_VFP = False;
15149 //ZZ //UInt hwcaps = archinfo->hwcaps;
15150 //ZZ IRTemp condT; /* :: Ity_I32 */
15151 //ZZ UInt summary;
15152 //ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text
15153 //ZZ
15154 //ZZ /* What insn variants are we supporting today? */
15155 //ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
15156 //ZZ // etc etc
15158 /* Set result defaults. */
15159 dres->whatNext = Dis_Continue;
15160 dres->len = 4;
15161 dres->jk_StopHere = Ijk_INVALID;
15162 dres->hint = Dis_HintNone;
15164 /* At least this is simple on ARM64: insns are all 4 bytes long, and
15165 4-aligned. So just fish the whole thing out of memory right now
15166 and have done. */
15167 UInt insn = getUIntLittleEndianly( guest_instr );
15169 if (0) vex_printf("insn: 0x%x\n", insn);
15171 DIP("\t(arm64) 0x%llx: ", (ULong)guest_PC_curr_instr);
15173 vassert(0 == (guest_PC_curr_instr & 3ULL));
15175 /* ----------------------------------------------------------- */
15177 /* Spot "Special" instructions (see comment at top of file). */
15179 const UChar* code = guest_instr;
15180 /* Spot the 16-byte preamble:
15181 93CC0D8C ror x12, x12, #3
15182 93CC358C ror x12, x12, #13
15183 93CCCD8C ror x12, x12, #51
15184 93CCF58C ror x12, x12, #61
15186 UInt word1 = 0x93CC0D8C;
15187 UInt word2 = 0x93CC358C;
15188 UInt word3 = 0x93CCCD8C;
15189 UInt word4 = 0x93CCF58C;
15190 if (getUIntLittleEndianly(code+ 0) == word1 &&
15191 getUIntLittleEndianly(code+ 4) == word2 &&
15192 getUIntLittleEndianly(code+ 8) == word3 &&
15193 getUIntLittleEndianly(code+12) == word4) {
15194 /* Got a "Special" instruction preamble. Which one is it? */
15195 if (getUIntLittleEndianly(code+16) == 0xAA0A014A
15196 /* orr x10,x10,x10 */) {
15197 /* X3 = client_request ( X4 ) */
15198 DIP("x3 = client_request ( x4 )\n");
15199 putPC(mkU64( guest_PC_curr_instr + 20 ));
15200 dres->jk_StopHere = Ijk_ClientReq;
15201 dres->whatNext = Dis_StopHere;
15202 return True;
15204 else
15205 if (getUIntLittleEndianly(code+16) == 0xAA0B016B
15206 /* orr x11,x11,x11 */) {
15207 /* X3 = guest_NRADDR */
15208 DIP("x3 = guest_NRADDR\n");
15209 dres->len = 20;
15210 putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
15211 return True;
15213 else
15214 if (getUIntLittleEndianly(code+16) == 0xAA0C018C
15215 /* orr x12,x12,x12 */) {
15216 /* branch-and-link-to-noredir X8 */
15217 DIP("branch-and-link-to-noredir x8\n");
15218 putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
15219 putPC(getIReg64orZR(8));
15220 dres->jk_StopHere = Ijk_NoRedir;
15221 dres->whatNext = Dis_StopHere;
15222 return True;
15224 else
15225 if (getUIntLittleEndianly(code+16) == 0xAA090129
15226 /* orr x9,x9,x9 */) {
15227 /* IR injection */
15228 DIP("IR injection\n");
15229 vex_inject_ir(irsb, Iend_LE);
15230 // Invalidate the current insn. The reason is that the IRop we're
15231 // injecting here can change. In which case the translation has to
15232 // be redone. For ease of handling, we simply invalidate all the
15233 // time.
15234 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr)));
15235 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(20)));
15236 putPC(mkU64( guest_PC_curr_instr + 20 ));
15237 dres->whatNext = Dis_StopHere;
15238 dres->jk_StopHere = Ijk_InvalICache;
15239 return True;
15241 /* We don't know what it is. */
15242 return False;
15243 /*NOTREACHED*/
15247 /* ----------------------------------------------------------- */
15249 /* Main ARM64 instruction decoder starts here. */
15251 Bool ok = False;
15253 /* insn[28:25] determines the top-level grouping, so let's start
15254 off with that.
15256 For all of these dis_ARM64_ functions, we pass *dres with the
15257 normal default results "insn OK, 4 bytes long, keep decoding" so
15258 they don't need to change it. However, decodes of control-flow
15259 insns may cause *dres to change.
15261 switch (INSN(28,25)) {
15262 case BITS4(1,0,0,0): case BITS4(1,0,0,1):
15263 // Data processing - immediate
15264 ok = dis_ARM64_data_processing_immediate(dres, insn, sigill_diag);
15265 break;
15266 case BITS4(1,0,1,0): case BITS4(1,0,1,1):
15267 // Branch, exception generation and system instructions
15268 ok = dis_ARM64_branch_etc(dres, insn, archinfo, abiinfo, sigill_diag);
15269 break;
15270 case BITS4(0,1,0,0): case BITS4(0,1,1,0):
15271 case BITS4(1,1,0,0): case BITS4(1,1,1,0):
15272 // Loads and stores
15273 ok = dis_ARM64_load_store(dres, insn, abiinfo, sigill_diag);
15274 break;
15275 case BITS4(0,1,0,1): case BITS4(1,1,0,1):
15276 // Data processing - register
15277 ok = dis_ARM64_data_processing_register(dres, insn, sigill_diag);
15278 break;
15279 case BITS4(0,1,1,1): case BITS4(1,1,1,1):
15280 // Data processing - SIMD and floating point
15281 ok = dis_ARM64_simd_and_fp(dres, insn, archinfo);
15282 break;
15283 case BITS4(0,0,0,0): case BITS4(0,0,0,1):
15284 case BITS4(0,0,1,0): case BITS4(0,0,1,1):
15285 // UNALLOCATED
15286 break;
15287 default:
15288 vassert(0); /* Can't happen */
15291 /* If the next-level down decoders failed, make sure |dres| didn't
15292 get changed. */
15293 if (!ok) {
15294 vassert(dres->whatNext == Dis_Continue);
15295 vassert(dres->len == 4);
15296 vassert(dres->jk_StopHere == Ijk_INVALID);
15299 return ok;
15301 # undef INSN
15305 /*------------------------------------------------------------*/
15306 /*--- Top-level fn ---*/
15307 /*------------------------------------------------------------*/
15309 /* Disassemble a single instruction into IR. The instruction
15310 is located in host memory at &guest_code[delta]. */
15312 DisResult disInstr_ARM64 ( IRSB* irsb_IN,
15313 const UChar* guest_code_IN,
15314 Long delta_IN,
15315 Addr guest_IP,
15316 VexArch guest_arch,
15317 const VexArchInfo* archinfo,
15318 const VexAbiInfo* abiinfo,
15319 VexEndness host_endness_IN,
15320 Bool sigill_diag_IN )
15322 DisResult dres;
15323 vex_bzero(&dres, sizeof(dres));
15325 /* Set globals (see top of this file) */
15326 vassert(guest_arch == VexArchARM64);
15328 irsb = irsb_IN;
15329 host_endness = host_endness_IN;
15330 guest_PC_curr_instr = (Addr64)guest_IP;
15332 /* Sanity checks */
15333 /* (x::UInt - 2) <= 15 === x >= 2 && x <= 17 (I hope) */
15334 vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15);
15335 vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15);
15337 /* Try to decode */
15338 Bool ok = disInstr_ARM64_WRK( &dres,
15339 &guest_code_IN[delta_IN],
15340 archinfo, abiinfo, sigill_diag_IN );
15341 if (ok) {
15342 /* All decode successes end up here. */
15343 vassert(dres.len == 4 || dres.len == 20);
15344 switch (dres.whatNext) {
15345 case Dis_Continue:
15346 putPC( mkU64(dres.len + guest_PC_curr_instr) );
15347 break;
15348 case Dis_StopHere:
15349 break;
15350 default:
15351 vassert(0);
15353 DIP("\n");
15354 } else {
15355 /* All decode failures end up here. */
15356 if (sigill_diag_IN) {
15357 Int i, j;
15358 UChar buf[64];
15359 UInt insn
15360 = getUIntLittleEndianly( &guest_code_IN[delta_IN] );
15361 vex_bzero(buf, sizeof(buf));
15362 for (i = j = 0; i < 32; i++) {
15363 if (i > 0) {
15364 if ((i & 7) == 0) buf[j++] = ' ';
15365 else if ((i & 3) == 0) buf[j++] = '\'';
15367 buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
15369 vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
15370 vex_printf("disInstr(arm64): %s\n", buf);
15373 /* Tell the dispatcher that this insn cannot be decoded, and so
15374 has not been executed, and (is currently) the next to be
15375 executed. PC should be up-to-date since it is made so at the
15376 start of each insn, but nevertheless be paranoid and update
15377 it again right now. */
15378 putPC( mkU64(guest_PC_curr_instr) );
15379 dres.len = 0;
15380 dres.whatNext = Dis_StopHere;
15381 dres.jk_StopHere = Ijk_NoDecode;
15383 return dres;
15387 /*--------------------------------------------------------------------*/
15388 /*--- end guest_arm64_toIR.c ---*/
15389 /*--------------------------------------------------------------------*/