Initial implementation of C-source-level &&-idiom recovery
[valgrind.git] / VEX / priv / guest_arm64_toIR.c
blob6eb896cdb2de2b0f5d4e58df108ed6ad22ed2d19
1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*--------------------------------------------------------------------*/
4 /*--- begin guest_arm64_toIR.c ---*/
5 /*--------------------------------------------------------------------*/
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
11 Copyright (C) 2013-2017 OpenWorks
12 info@open-works.net
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
27 The GNU General Public License is contained in the file COPYING.
30 /* KNOWN LIMITATIONS 2014-Nov-16
32 * Correctness: FMAXNM, FMINNM are implemented the same as FMAX/FMIN.
34 Also FP comparison "unordered" .. is implemented as normal FP
35 comparison.
37 Both should be fixed. They behave incorrectly in the presence of
38 NaNs.
40 FMULX is treated the same as FMUL. That's also not correct.
42 * Floating multiply-add (etc) insns. Are split into a multiply and
43 an add, and so suffer double rounding and hence sometimes the
44 least significant mantissa bit is incorrect. Fix: use the IR
45 multiply-add IROps instead.
47 * FRINTA, FRINTN are kludged .. they just round to nearest. No special
48 handling for the "ties" case. FRINTX might be dubious too.
50 * Ditto FCVTXN. No idea what "round to odd" means. This implementation
51 just rounds to nearest.
54 /* "Special" instructions.
56 This instruction decoder can decode four special instructions
57 which mean nothing natively (are no-ops as far as regs/mem are
58 concerned) but have meaning for supporting Valgrind. A special
59 instruction is flagged by a 16-byte preamble:
61 93CC0D8C 93CC358C 93CCCD8C 93CCF58C
62 (ror x12, x12, #3; ror x12, x12, #13
63 ror x12, x12, #51; ror x12, x12, #61)
65 Following that, one of the following 3 are allowed
66 (standard interpretation in parentheses):
68 AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 )
69 AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR
70 AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8
71 AA090129 (orr x9,x9,x9) IR injection
73 Any other bytes following the 16-byte preamble are illegal and
74 constitute a failure in instruction decoding. This all assumes
75 that the preamble will never occur except in specific code
76 fragments designed for Valgrind to catch.
79 /* Translates ARM64 code to IR. */
81 #include "libvex_basictypes.h"
82 #include "libvex_ir.h"
83 #include "libvex.h"
84 #include "libvex_guest_arm64.h"
86 #include "main_util.h"
87 #include "main_globals.h"
88 #include "guest_generic_bb_to_IR.h"
89 #include "guest_arm64_defs.h"
92 /*------------------------------------------------------------*/
93 /*--- Globals ---*/
94 /*------------------------------------------------------------*/
96 /* These are set at the start of the translation of a instruction, so
97 that we don't have to pass them around endlessly. CONST means does
98 not change during translation of the instruction.
101 /* CONST: what is the host's endianness? We need to know this in
102 order to do sub-register accesses to the SIMD/FP registers
103 correctly. */
104 static VexEndness host_endness;
106 /* CONST: The guest address for the instruction currently being
107 translated. */
108 static Addr64 guest_PC_curr_instr;
110 /* MOD: The IRSB* into which we're generating code. */
111 static IRSB* irsb;
114 /*------------------------------------------------------------*/
115 /*--- Debugging output ---*/
116 /*------------------------------------------------------------*/
118 #define DIP(format, args...) \
119 if (vex_traceflags & VEX_TRACE_FE) \
120 vex_printf(format, ## args)
122 #define DIS(buf, format, args...) \
123 if (vex_traceflags & VEX_TRACE_FE) \
124 vex_sprintf(buf, format, ## args)
127 /*------------------------------------------------------------*/
128 /*--- Helper bits and pieces for deconstructing the ---*/
129 /*--- arm insn stream. ---*/
130 /*------------------------------------------------------------*/
132 /* Do a little-endian load of a 32-bit word, regardless of the
133 endianness of the underlying host. */
134 static inline UInt getUIntLittleEndianly ( const UChar* p )
136 UInt w = 0;
137 w = (w << 8) | p[3];
138 w = (w << 8) | p[2];
139 w = (w << 8) | p[1];
140 w = (w << 8) | p[0];
141 return w;
144 /* Sign extend a N-bit value up to 64 bits, by copying
145 bit N-1 into all higher positions. */
146 static ULong sx_to_64 ( ULong x, UInt n )
148 vassert(n > 1 && n < 64);
149 x <<= (64-n);
150 Long r = (Long)x;
151 r >>= (64-n);
152 return (ULong)r;
155 //ZZ /* Do a little-endian load of a 16-bit word, regardless of the
156 //ZZ endianness of the underlying host. */
157 //ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
158 //ZZ {
159 //ZZ UShort w = 0;
160 //ZZ w = (w << 8) | p[1];
161 //ZZ w = (w << 8) | p[0];
162 //ZZ return w;
163 //ZZ }
164 //ZZ
165 //ZZ static UInt ROR32 ( UInt x, UInt sh ) {
166 //ZZ vassert(sh >= 0 && sh < 32);
167 //ZZ if (sh == 0)
168 //ZZ return x;
169 //ZZ else
170 //ZZ return (x << (32-sh)) | (x >> sh);
171 //ZZ }
172 //ZZ
173 //ZZ static Int popcount32 ( UInt x )
174 //ZZ {
175 //ZZ Int res = 0, i;
176 //ZZ for (i = 0; i < 32; i++) {
177 //ZZ res += (x & 1);
178 //ZZ x >>= 1;
179 //ZZ }
180 //ZZ return res;
181 //ZZ }
182 //ZZ
183 //ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
184 //ZZ {
185 //ZZ UInt mask = 1 << ix;
186 //ZZ x &= ~mask;
187 //ZZ x |= ((b << ix) & mask);
188 //ZZ return x;
189 //ZZ }
191 #define BITS2(_b1,_b0) \
192 (((_b1) << 1) | (_b0))
194 #define BITS3(_b2,_b1,_b0) \
195 (((_b2) << 2) | ((_b1) << 1) | (_b0))
197 #define BITS4(_b3,_b2,_b1,_b0) \
198 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
200 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
201 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
202 | BITS4((_b3),(_b2),(_b1),(_b0)))
204 #define BITS5(_b4,_b3,_b2,_b1,_b0) \
205 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
206 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \
207 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
208 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
209 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
211 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
212 (((_b8) << 8) \
213 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
215 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
216 (((_b9) << 9) | ((_b8) << 8) \
217 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
219 #define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
220 (((_b10) << 10) \
221 | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
223 #define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
224 (((_b11) << 11) \
225 | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
227 #define X00 BITS2(0,0)
228 #define X01 BITS2(0,1)
229 #define X10 BITS2(1,0)
230 #define X11 BITS2(1,1)
232 // produces _uint[_bMax:_bMin]
233 #define SLICE_UInt(_uint,_bMax,_bMin) \
234 (( ((UInt)(_uint)) >> (_bMin)) \
235 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
238 /*------------------------------------------------------------*/
239 /*--- Helper bits and pieces for creating IR fragments. ---*/
240 /*------------------------------------------------------------*/
242 static IRExpr* mkV128 ( UShort w )
244 return IRExpr_Const(IRConst_V128(w));
247 static IRExpr* mkU64 ( ULong i )
249 return IRExpr_Const(IRConst_U64(i));
252 static IRExpr* mkU32 ( UInt i )
254 return IRExpr_Const(IRConst_U32(i));
257 static IRExpr* mkU16 ( UInt i )
259 vassert(i < 65536);
260 return IRExpr_Const(IRConst_U16(i));
263 static IRExpr* mkU8 ( UInt i )
265 vassert(i < 256);
266 return IRExpr_Const(IRConst_U8( (UChar)i ));
269 static IRExpr* mkexpr ( IRTemp tmp )
271 return IRExpr_RdTmp(tmp);
274 static IRExpr* unop ( IROp op, IRExpr* a )
276 return IRExpr_Unop(op, a);
279 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
281 return IRExpr_Binop(op, a1, a2);
284 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
286 return IRExpr_Triop(op, a1, a2, a3);
289 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
291 return IRExpr_Load(Iend_LE, ty, addr);
294 /* Add a statement to the list held by "irbb". */
295 static void stmt ( IRStmt* st )
297 addStmtToIRSB( irsb, st );
300 static void assign ( IRTemp dst, IRExpr* e )
302 stmt( IRStmt_WrTmp(dst, e) );
305 static void storeLE ( IRExpr* addr, IRExpr* data )
307 stmt( IRStmt_Store(Iend_LE, addr, data) );
310 //ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
311 //ZZ {
312 //ZZ if (guardT == IRTemp_INVALID) {
313 //ZZ /* unconditional */
314 //ZZ storeLE(addr, data);
315 //ZZ } else {
316 //ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data,
317 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
318 //ZZ }
319 //ZZ }
320 //ZZ
321 //ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
322 //ZZ IRExpr* addr, IRExpr* alt,
323 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
324 //ZZ {
325 //ZZ if (guardT == IRTemp_INVALID) {
326 //ZZ /* unconditional */
327 //ZZ IRExpr* loaded = NULL;
328 //ZZ switch (cvt) {
329 //ZZ case ILGop_Ident32:
330 //ZZ loaded = loadLE(Ity_I32, addr); break;
331 //ZZ case ILGop_8Uto32:
332 //ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
333 //ZZ case ILGop_8Sto32:
334 //ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
335 //ZZ case ILGop_16Uto32:
336 //ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
337 //ZZ case ILGop_16Sto32:
338 //ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
339 //ZZ default:
340 //ZZ vassert(0);
341 //ZZ }
342 //ZZ vassert(loaded != NULL);
343 //ZZ assign(dst, loaded);
344 //ZZ } else {
345 //ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the
346 //ZZ loaded data before putting the data in 'dst'. If the load
347 //ZZ does not take place, 'alt' is placed directly in 'dst'. */
348 //ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
349 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
350 //ZZ }
351 //ZZ }
353 /* Generate a new temporary of the given type. */
354 static IRTemp newTemp ( IRType ty )
356 vassert(isPlausibleIRType(ty));
357 return newIRTemp( irsb->tyenv, ty );
360 /* This is used in many places, so the brevity is an advantage. */
361 static IRTemp newTempV128(void)
363 return newTemp(Ity_V128);
366 /* Initialise V128 temporaries en masse. */
367 static
368 void newTempsV128_2(IRTemp* t1, IRTemp* t2)
370 vassert(t1 && *t1 == IRTemp_INVALID);
371 vassert(t2 && *t2 == IRTemp_INVALID);
372 *t1 = newTempV128();
373 *t2 = newTempV128();
376 static
377 void newTempsV128_3(IRTemp* t1, IRTemp* t2, IRTemp* t3)
379 vassert(t1 && *t1 == IRTemp_INVALID);
380 vassert(t2 && *t2 == IRTemp_INVALID);
381 vassert(t3 && *t3 == IRTemp_INVALID);
382 *t1 = newTempV128();
383 *t2 = newTempV128();
384 *t3 = newTempV128();
387 static
388 void newTempsV128_4(IRTemp* t1, IRTemp* t2, IRTemp* t3, IRTemp* t4)
390 vassert(t1 && *t1 == IRTemp_INVALID);
391 vassert(t2 && *t2 == IRTemp_INVALID);
392 vassert(t3 && *t3 == IRTemp_INVALID);
393 vassert(t4 && *t4 == IRTemp_INVALID);
394 *t1 = newTempV128();
395 *t2 = newTempV128();
396 *t3 = newTempV128();
397 *t4 = newTempV128();
400 static
401 void newTempsV128_7(IRTemp* t1, IRTemp* t2, IRTemp* t3,
402 IRTemp* t4, IRTemp* t5, IRTemp* t6, IRTemp* t7)
404 vassert(t1 && *t1 == IRTemp_INVALID);
405 vassert(t2 && *t2 == IRTemp_INVALID);
406 vassert(t3 && *t3 == IRTemp_INVALID);
407 vassert(t4 && *t4 == IRTemp_INVALID);
408 vassert(t5 && *t5 == IRTemp_INVALID);
409 vassert(t6 && *t6 == IRTemp_INVALID);
410 vassert(t7 && *t7 == IRTemp_INVALID);
411 *t1 = newTempV128();
412 *t2 = newTempV128();
413 *t3 = newTempV128();
414 *t4 = newTempV128();
415 *t5 = newTempV128();
416 *t6 = newTempV128();
417 *t7 = newTempV128();
420 //ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
421 //ZZ IRRoundingMode. */
422 //ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
423 //ZZ {
424 //ZZ return mkU32(Irrm_NEAREST);
425 //ZZ }
426 //ZZ
427 //ZZ /* Generate an expression for SRC rotated right by ROT. */
428 //ZZ static IRExpr* genROR32( IRTemp src, Int rot )
429 //ZZ {
430 //ZZ vassert(rot >= 0 && rot < 32);
431 //ZZ if (rot == 0)
432 //ZZ return mkexpr(src);
433 //ZZ return
434 //ZZ binop(Iop_Or32,
435 //ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
436 //ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
437 //ZZ }
438 //ZZ
439 //ZZ static IRExpr* mkU128 ( ULong i )
440 //ZZ {
441 //ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
442 //ZZ }
443 //ZZ
444 //ZZ /* Generate a 4-aligned version of the given expression if
445 //ZZ the given condition is true. Else return it unchanged. */
446 //ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
447 //ZZ {
448 //ZZ if (b)
449 //ZZ return binop(Iop_And32, e, mkU32(~3));
450 //ZZ else
451 //ZZ return e;
452 //ZZ }
454 /* Other IR construction helpers. */
455 static IROp mkAND ( IRType ty ) {
456 switch (ty) {
457 case Ity_I32: return Iop_And32;
458 case Ity_I64: return Iop_And64;
459 default: vpanic("mkAND");
463 static IROp mkOR ( IRType ty ) {
464 switch (ty) {
465 case Ity_I32: return Iop_Or32;
466 case Ity_I64: return Iop_Or64;
467 default: vpanic("mkOR");
471 static IROp mkXOR ( IRType ty ) {
472 switch (ty) {
473 case Ity_I32: return Iop_Xor32;
474 case Ity_I64: return Iop_Xor64;
475 default: vpanic("mkXOR");
479 static IROp mkSHL ( IRType ty ) {
480 switch (ty) {
481 case Ity_I32: return Iop_Shl32;
482 case Ity_I64: return Iop_Shl64;
483 default: vpanic("mkSHL");
487 static IROp mkSHR ( IRType ty ) {
488 switch (ty) {
489 case Ity_I32: return Iop_Shr32;
490 case Ity_I64: return Iop_Shr64;
491 default: vpanic("mkSHR");
495 static IROp mkSAR ( IRType ty ) {
496 switch (ty) {
497 case Ity_I32: return Iop_Sar32;
498 case Ity_I64: return Iop_Sar64;
499 default: vpanic("mkSAR");
503 static IROp mkNOT ( IRType ty ) {
504 switch (ty) {
505 case Ity_I32: return Iop_Not32;
506 case Ity_I64: return Iop_Not64;
507 default: vpanic("mkNOT");
511 static IROp mkADD ( IRType ty ) {
512 switch (ty) {
513 case Ity_I32: return Iop_Add32;
514 case Ity_I64: return Iop_Add64;
515 default: vpanic("mkADD");
519 static IROp mkSUB ( IRType ty ) {
520 switch (ty) {
521 case Ity_I32: return Iop_Sub32;
522 case Ity_I64: return Iop_Sub64;
523 default: vpanic("mkSUB");
527 static IROp mkADDF ( IRType ty ) {
528 switch (ty) {
529 case Ity_F32: return Iop_AddF32;
530 case Ity_F64: return Iop_AddF64;
531 default: vpanic("mkADDF");
535 static IROp mkSUBF ( IRType ty ) {
536 switch (ty) {
537 case Ity_F32: return Iop_SubF32;
538 case Ity_F64: return Iop_SubF64;
539 default: vpanic("mkSUBF");
543 static IROp mkMULF ( IRType ty ) {
544 switch (ty) {
545 case Ity_F32: return Iop_MulF32;
546 case Ity_F64: return Iop_MulF64;
547 default: vpanic("mkMULF");
551 static IROp mkDIVF ( IRType ty ) {
552 switch (ty) {
553 case Ity_F32: return Iop_DivF32;
554 case Ity_F64: return Iop_DivF64;
555 default: vpanic("mkMULF");
559 static IROp mkNEGF ( IRType ty ) {
560 switch (ty) {
561 case Ity_F32: return Iop_NegF32;
562 case Ity_F64: return Iop_NegF64;
563 default: vpanic("mkNEGF");
567 static IROp mkABSF ( IRType ty ) {
568 switch (ty) {
569 case Ity_F32: return Iop_AbsF32;
570 case Ity_F64: return Iop_AbsF64;
571 default: vpanic("mkNEGF");
575 static IROp mkSQRTF ( IRType ty ) {
576 switch (ty) {
577 case Ity_F32: return Iop_SqrtF32;
578 case Ity_F64: return Iop_SqrtF64;
579 default: vpanic("mkNEGF");
583 static IROp mkVecADD ( UInt size ) {
584 const IROp ops[4]
585 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
586 vassert(size < 4);
587 return ops[size];
590 static IROp mkVecQADDU ( UInt size ) {
591 const IROp ops[4]
592 = { Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2 };
593 vassert(size < 4);
594 return ops[size];
597 static IROp mkVecQADDS ( UInt size ) {
598 const IROp ops[4]
599 = { Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2 };
600 vassert(size < 4);
601 return ops[size];
604 static IROp mkVecQADDEXTSUSATUU ( UInt size ) {
605 const IROp ops[4]
606 = { Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8,
607 Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2 };
608 vassert(size < 4);
609 return ops[size];
612 static IROp mkVecQADDEXTUSSATSS ( UInt size ) {
613 const IROp ops[4]
614 = { Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8,
615 Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2 };
616 vassert(size < 4);
617 return ops[size];
620 static IROp mkVecSUB ( UInt size ) {
621 const IROp ops[4]
622 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
623 vassert(size < 4);
624 return ops[size];
627 static IROp mkVecQSUBU ( UInt size ) {
628 const IROp ops[4]
629 = { Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2 };
630 vassert(size < 4);
631 return ops[size];
634 static IROp mkVecQSUBS ( UInt size ) {
635 const IROp ops[4]
636 = { Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2 };
637 vassert(size < 4);
638 return ops[size];
641 static IROp mkVecSARN ( UInt size ) {
642 const IROp ops[4]
643 = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
644 vassert(size < 4);
645 return ops[size];
648 static IROp mkVecSHRN ( UInt size ) {
649 const IROp ops[4]
650 = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
651 vassert(size < 4);
652 return ops[size];
655 static IROp mkVecSHLN ( UInt size ) {
656 const IROp ops[4]
657 = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
658 vassert(size < 4);
659 return ops[size];
662 static IROp mkVecCATEVENLANES ( UInt size ) {
663 const IROp ops[4]
664 = { Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8,
665 Iop_CatEvenLanes32x4, Iop_InterleaveLO64x2 };
666 vassert(size < 4);
667 return ops[size];
670 static IROp mkVecCATODDLANES ( UInt size ) {
671 const IROp ops[4]
672 = { Iop_CatOddLanes8x16, Iop_CatOddLanes16x8,
673 Iop_CatOddLanes32x4, Iop_InterleaveHI64x2 };
674 vassert(size < 4);
675 return ops[size];
678 static IROp mkVecINTERLEAVELO ( UInt size ) {
679 const IROp ops[4]
680 = { Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
681 Iop_InterleaveLO32x4, Iop_InterleaveLO64x2 };
682 vassert(size < 4);
683 return ops[size];
686 static IROp mkVecINTERLEAVEHI ( UInt size ) {
687 const IROp ops[4]
688 = { Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
689 Iop_InterleaveHI32x4, Iop_InterleaveHI64x2 };
690 vassert(size < 4);
691 return ops[size];
694 static IROp mkVecMAXU ( UInt size ) {
695 const IROp ops[4]
696 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
697 vassert(size < 4);
698 return ops[size];
701 static IROp mkVecMAXS ( UInt size ) {
702 const IROp ops[4]
703 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
704 vassert(size < 4);
705 return ops[size];
708 static IROp mkVecMINU ( UInt size ) {
709 const IROp ops[4]
710 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
711 vassert(size < 4);
712 return ops[size];
715 static IROp mkVecMINS ( UInt size ) {
716 const IROp ops[4]
717 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
718 vassert(size < 4);
719 return ops[size];
722 static IROp mkVecMUL ( UInt size ) {
723 const IROp ops[4]
724 = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
725 vassert(size < 3);
726 return ops[size];
729 static IROp mkVecMULLU ( UInt sizeNarrow ) {
730 const IROp ops[4]
731 = { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2, Iop_INVALID };
732 vassert(sizeNarrow < 3);
733 return ops[sizeNarrow];
736 static IROp mkVecMULLS ( UInt sizeNarrow ) {
737 const IROp ops[4]
738 = { Iop_Mull8Sx8, Iop_Mull16Sx4, Iop_Mull32Sx2, Iop_INVALID };
739 vassert(sizeNarrow < 3);
740 return ops[sizeNarrow];
743 static IROp mkVecQDMULLS ( UInt sizeNarrow ) {
744 const IROp ops[4]
745 = { Iop_INVALID, Iop_QDMull16Sx4, Iop_QDMull32Sx2, Iop_INVALID };
746 vassert(sizeNarrow < 3);
747 return ops[sizeNarrow];
750 static IROp mkVecCMPEQ ( UInt size ) {
751 const IROp ops[4]
752 = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 };
753 vassert(size < 4);
754 return ops[size];
757 static IROp mkVecCMPGTU ( UInt size ) {
758 const IROp ops[4]
759 = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
760 vassert(size < 4);
761 return ops[size];
764 static IROp mkVecCMPGTS ( UInt size ) {
765 const IROp ops[4]
766 = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
767 vassert(size < 4);
768 return ops[size];
771 static IROp mkVecABS ( UInt size ) {
772 const IROp ops[4]
773 = { Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2 };
774 vassert(size < 4);
775 return ops[size];
778 static IROp mkVecZEROHIxxOFV128 ( UInt size ) {
779 const IROp ops[4]
780 = { Iop_ZeroHI120ofV128, Iop_ZeroHI112ofV128,
781 Iop_ZeroHI96ofV128, Iop_ZeroHI64ofV128 };
782 vassert(size < 4);
783 return ops[size];
786 static IRExpr* mkU ( IRType ty, ULong imm ) {
787 switch (ty) {
788 case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
789 case Ity_I64: return mkU64(imm);
790 default: vpanic("mkU");
794 static IROp mkVecQDMULHIS ( UInt size ) {
795 const IROp ops[4]
796 = { Iop_INVALID, Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, Iop_INVALID };
797 vassert(size < 4);
798 return ops[size];
801 static IROp mkVecQRDMULHIS ( UInt size ) {
802 const IROp ops[4]
803 = { Iop_INVALID, Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, Iop_INVALID };
804 vassert(size < 4);
805 return ops[size];
808 static IROp mkVecQANDUQSH ( UInt size ) {
809 const IROp ops[4]
810 = { Iop_QandUQsh8x16, Iop_QandUQsh16x8,
811 Iop_QandUQsh32x4, Iop_QandUQsh64x2 };
812 vassert(size < 4);
813 return ops[size];
816 static IROp mkVecQANDSQSH ( UInt size ) {
817 const IROp ops[4]
818 = { Iop_QandSQsh8x16, Iop_QandSQsh16x8,
819 Iop_QandSQsh32x4, Iop_QandSQsh64x2 };
820 vassert(size < 4);
821 return ops[size];
824 static IROp mkVecQANDUQRSH ( UInt size ) {
825 const IROp ops[4]
826 = { Iop_QandUQRsh8x16, Iop_QandUQRsh16x8,
827 Iop_QandUQRsh32x4, Iop_QandUQRsh64x2 };
828 vassert(size < 4);
829 return ops[size];
832 static IROp mkVecQANDSQRSH ( UInt size ) {
833 const IROp ops[4]
834 = { Iop_QandSQRsh8x16, Iop_QandSQRsh16x8,
835 Iop_QandSQRsh32x4, Iop_QandSQRsh64x2 };
836 vassert(size < 4);
837 return ops[size];
840 static IROp mkVecSHU ( UInt size ) {
841 const IROp ops[4]
842 = { Iop_Sh8Ux16, Iop_Sh16Ux8, Iop_Sh32Ux4, Iop_Sh64Ux2 };
843 vassert(size < 4);
844 return ops[size];
847 static IROp mkVecSHS ( UInt size ) {
848 const IROp ops[4]
849 = { Iop_Sh8Sx16, Iop_Sh16Sx8, Iop_Sh32Sx4, Iop_Sh64Sx2 };
850 vassert(size < 4);
851 return ops[size];
854 static IROp mkVecRSHU ( UInt size ) {
855 const IROp ops[4]
856 = { Iop_Rsh8Ux16, Iop_Rsh16Ux8, Iop_Rsh32Ux4, Iop_Rsh64Ux2 };
857 vassert(size < 4);
858 return ops[size];
861 static IROp mkVecRSHS ( UInt size ) {
862 const IROp ops[4]
863 = { Iop_Rsh8Sx16, Iop_Rsh16Sx8, Iop_Rsh32Sx4, Iop_Rsh64Sx2 };
864 vassert(size < 4);
865 return ops[size];
868 static IROp mkVecNARROWUN ( UInt sizeNarrow ) {
869 const IROp ops[4]
870 = { Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4,
871 Iop_NarrowUn64to32x2, Iop_INVALID };
872 vassert(sizeNarrow < 4);
873 return ops[sizeNarrow];
876 static IROp mkVecQNARROWUNSU ( UInt sizeNarrow ) {
877 const IROp ops[4]
878 = { Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4,
879 Iop_QNarrowUn64Sto32Ux2, Iop_INVALID };
880 vassert(sizeNarrow < 4);
881 return ops[sizeNarrow];
884 static IROp mkVecQNARROWUNSS ( UInt sizeNarrow ) {
885 const IROp ops[4]
886 = { Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4,
887 Iop_QNarrowUn64Sto32Sx2, Iop_INVALID };
888 vassert(sizeNarrow < 4);
889 return ops[sizeNarrow];
892 static IROp mkVecQNARROWUNUU ( UInt sizeNarrow ) {
893 const IROp ops[4]
894 = { Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4,
895 Iop_QNarrowUn64Uto32Ux2, Iop_INVALID };
896 vassert(sizeNarrow < 4);
897 return ops[sizeNarrow];
900 static IROp mkVecQANDqshrNNARROWUU ( UInt sizeNarrow ) {
901 const IROp ops[4]
902 = { Iop_QandQShrNnarrow16Uto8Ux8, Iop_QandQShrNnarrow32Uto16Ux4,
903 Iop_QandQShrNnarrow64Uto32Ux2, Iop_INVALID };
904 vassert(sizeNarrow < 4);
905 return ops[sizeNarrow];
908 static IROp mkVecQANDqsarNNARROWSS ( UInt sizeNarrow ) {
909 const IROp ops[4]
910 = { Iop_QandQSarNnarrow16Sto8Sx8, Iop_QandQSarNnarrow32Sto16Sx4,
911 Iop_QandQSarNnarrow64Sto32Sx2, Iop_INVALID };
912 vassert(sizeNarrow < 4);
913 return ops[sizeNarrow];
916 static IROp mkVecQANDqsarNNARROWSU ( UInt sizeNarrow ) {
917 const IROp ops[4]
918 = { Iop_QandQSarNnarrow16Sto8Ux8, Iop_QandQSarNnarrow32Sto16Ux4,
919 Iop_QandQSarNnarrow64Sto32Ux2, Iop_INVALID };
920 vassert(sizeNarrow < 4);
921 return ops[sizeNarrow];
924 static IROp mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow ) {
925 const IROp ops[4]
926 = { Iop_QandQRShrNnarrow16Uto8Ux8, Iop_QandQRShrNnarrow32Uto16Ux4,
927 Iop_QandQRShrNnarrow64Uto32Ux2, Iop_INVALID };
928 vassert(sizeNarrow < 4);
929 return ops[sizeNarrow];
932 static IROp mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow ) {
933 const IROp ops[4]
934 = { Iop_QandQRSarNnarrow16Sto8Sx8, Iop_QandQRSarNnarrow32Sto16Sx4,
935 Iop_QandQRSarNnarrow64Sto32Sx2, Iop_INVALID };
936 vassert(sizeNarrow < 4);
937 return ops[sizeNarrow];
940 static IROp mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow ) {
941 const IROp ops[4]
942 = { Iop_QandQRSarNnarrow16Sto8Ux8, Iop_QandQRSarNnarrow32Sto16Ux4,
943 Iop_QandQRSarNnarrow64Sto32Ux2, Iop_INVALID };
944 vassert(sizeNarrow < 4);
945 return ops[sizeNarrow];
948 static IROp mkVecQSHLNSATUU ( UInt size ) {
949 const IROp ops[4]
950 = { Iop_QShlNsatUU8x16, Iop_QShlNsatUU16x8,
951 Iop_QShlNsatUU32x4, Iop_QShlNsatUU64x2 };
952 vassert(size < 4);
953 return ops[size];
956 static IROp mkVecQSHLNSATSS ( UInt size ) {
957 const IROp ops[4]
958 = { Iop_QShlNsatSS8x16, Iop_QShlNsatSS16x8,
959 Iop_QShlNsatSS32x4, Iop_QShlNsatSS64x2 };
960 vassert(size < 4);
961 return ops[size];
964 static IROp mkVecQSHLNSATSU ( UInt size ) {
965 const IROp ops[4]
966 = { Iop_QShlNsatSU8x16, Iop_QShlNsatSU16x8,
967 Iop_QShlNsatSU32x4, Iop_QShlNsatSU64x2 };
968 vassert(size < 4);
969 return ops[size];
972 static IROp mkVecADDF ( UInt size ) {
973 const IROp ops[4]
974 = { Iop_INVALID, Iop_INVALID, Iop_Add32Fx4, Iop_Add64Fx2 };
975 vassert(size < 4);
976 return ops[size];
979 static IROp mkVecMAXF ( UInt size ) {
980 const IROp ops[4]
981 = { Iop_INVALID, Iop_INVALID, Iop_Max32Fx4, Iop_Max64Fx2 };
982 vassert(size < 4);
983 return ops[size];
986 static IROp mkVecMINF ( UInt size ) {
987 const IROp ops[4]
988 = { Iop_INVALID, Iop_INVALID, Iop_Min32Fx4, Iop_Min64Fx2 };
989 vassert(size < 4);
990 return ops[size];
993 /* Generate IR to create 'arg rotated right by imm', for sane values
994 of 'ty' and 'imm'. */
995 static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
997 UInt w = 0;
998 if (ty == Ity_I64) {
999 w = 64;
1000 } else {
1001 vassert(ty == Ity_I32);
1002 w = 32;
1004 vassert(w != 0);
1005 vassert(imm < w);
1006 if (imm == 0) {
1007 return arg;
1009 IRTemp res = newTemp(ty);
1010 assign(res, binop(mkOR(ty),
1011 binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
1012 binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
1013 return res;
1016 /* Generate IR to set the returned temp to either all-zeroes or
1017 all ones, as a copy of arg<imm>. */
1018 static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
1020 UInt w = 0;
1021 if (ty == Ity_I64) {
1022 w = 64;
1023 } else {
1024 vassert(ty == Ity_I32);
1025 w = 32;
1027 vassert(w != 0);
1028 vassert(imm < w);
1029 IRTemp res = newTemp(ty);
1030 assign(res, binop(mkSAR(ty),
1031 binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
1032 mkU8(w - 1)));
1033 return res;
1036 /* S-widen 8/16/32/64 bit int expr to 64. */
1037 static IRExpr* widenSto64 ( IRType srcTy, IRExpr* e )
1039 switch (srcTy) {
1040 case Ity_I64: return e;
1041 case Ity_I32: return unop(Iop_32Sto64, e);
1042 case Ity_I16: return unop(Iop_16Sto64, e);
1043 case Ity_I8: return unop(Iop_8Sto64, e);
1044 default: vpanic("widenSto64(arm64)");
1048 /* U-widen 8/16/32/64 bit int expr to 64. */
1049 static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
1051 switch (srcTy) {
1052 case Ity_I64: return e;
1053 case Ity_I32: return unop(Iop_32Uto64, e);
1054 case Ity_I16: return unop(Iop_16Uto64, e);
1055 case Ity_I8: return unop(Iop_8Uto64, e);
1056 default: vpanic("widenUto64(arm64)");
1060 /* Narrow 64 bit int expr to 8/16/32/64. Clearly only some
1061 of these combinations make sense. */
1062 static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
1064 switch (dstTy) {
1065 case Ity_I64: return e;
1066 case Ity_I32: return unop(Iop_64to32, e);
1067 case Ity_I16: return unop(Iop_64to16, e);
1068 case Ity_I8: return unop(Iop_64to8, e);
1069 default: vpanic("narrowFrom64(arm64)");
1074 /*------------------------------------------------------------*/
1075 /*--- Helpers for accessing guest registers. ---*/
1076 /*------------------------------------------------------------*/
1078 #define OFFB_X0 offsetof(VexGuestARM64State,guest_X0)
1079 #define OFFB_X1 offsetof(VexGuestARM64State,guest_X1)
1080 #define OFFB_X2 offsetof(VexGuestARM64State,guest_X2)
1081 #define OFFB_X3 offsetof(VexGuestARM64State,guest_X3)
1082 #define OFFB_X4 offsetof(VexGuestARM64State,guest_X4)
1083 #define OFFB_X5 offsetof(VexGuestARM64State,guest_X5)
1084 #define OFFB_X6 offsetof(VexGuestARM64State,guest_X6)
1085 #define OFFB_X7 offsetof(VexGuestARM64State,guest_X7)
1086 #define OFFB_X8 offsetof(VexGuestARM64State,guest_X8)
1087 #define OFFB_X9 offsetof(VexGuestARM64State,guest_X9)
1088 #define OFFB_X10 offsetof(VexGuestARM64State,guest_X10)
1089 #define OFFB_X11 offsetof(VexGuestARM64State,guest_X11)
1090 #define OFFB_X12 offsetof(VexGuestARM64State,guest_X12)
1091 #define OFFB_X13 offsetof(VexGuestARM64State,guest_X13)
1092 #define OFFB_X14 offsetof(VexGuestARM64State,guest_X14)
1093 #define OFFB_X15 offsetof(VexGuestARM64State,guest_X15)
1094 #define OFFB_X16 offsetof(VexGuestARM64State,guest_X16)
1095 #define OFFB_X17 offsetof(VexGuestARM64State,guest_X17)
1096 #define OFFB_X18 offsetof(VexGuestARM64State,guest_X18)
1097 #define OFFB_X19 offsetof(VexGuestARM64State,guest_X19)
1098 #define OFFB_X20 offsetof(VexGuestARM64State,guest_X20)
1099 #define OFFB_X21 offsetof(VexGuestARM64State,guest_X21)
1100 #define OFFB_X22 offsetof(VexGuestARM64State,guest_X22)
1101 #define OFFB_X23 offsetof(VexGuestARM64State,guest_X23)
1102 #define OFFB_X24 offsetof(VexGuestARM64State,guest_X24)
1103 #define OFFB_X25 offsetof(VexGuestARM64State,guest_X25)
1104 #define OFFB_X26 offsetof(VexGuestARM64State,guest_X26)
1105 #define OFFB_X27 offsetof(VexGuestARM64State,guest_X27)
1106 #define OFFB_X28 offsetof(VexGuestARM64State,guest_X28)
1107 #define OFFB_X29 offsetof(VexGuestARM64State,guest_X29)
1108 #define OFFB_X30 offsetof(VexGuestARM64State,guest_X30)
1110 #define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP)
1111 #define OFFB_PC offsetof(VexGuestARM64State,guest_PC)
1113 #define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP)
1114 #define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1)
1115 #define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2)
1116 #define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP)
1118 #define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
1119 #define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR)
1121 #define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0)
1122 #define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1)
1123 #define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2)
1124 #define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3)
1125 #define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4)
1126 #define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5)
1127 #define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6)
1128 #define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7)
1129 #define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8)
1130 #define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9)
1131 #define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10)
1132 #define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11)
1133 #define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12)
1134 #define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13)
1135 #define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14)
1136 #define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15)
1137 #define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16)
1138 #define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17)
1139 #define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18)
1140 #define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19)
1141 #define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20)
1142 #define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21)
1143 #define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22)
1144 #define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23)
1145 #define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24)
1146 #define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25)
1147 #define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26)
1148 #define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27)
1149 #define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28)
1150 #define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29)
1151 #define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30)
1152 #define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31)
1154 #define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR)
1155 #define OFFB_QCFLAG offsetof(VexGuestARM64State,guest_QCFLAG)
1157 #define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART)
1158 #define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN)
1160 #define OFFB_LLSC_SIZE offsetof(VexGuestARM64State,guest_LLSC_SIZE)
1161 #define OFFB_LLSC_ADDR offsetof(VexGuestARM64State,guest_LLSC_ADDR)
1162 #define OFFB_LLSC_DATA offsetof(VexGuestARM64State,guest_LLSC_DATA)
1165 /* ---------------- Integer registers ---------------- */
1167 static Int offsetIReg64 ( UInt iregNo )
1169 /* Do we care about endianness here? We do if sub-parts of integer
1170 registers are accessed. */
1171 switch (iregNo) {
1172 case 0: return OFFB_X0;
1173 case 1: return OFFB_X1;
1174 case 2: return OFFB_X2;
1175 case 3: return OFFB_X3;
1176 case 4: return OFFB_X4;
1177 case 5: return OFFB_X5;
1178 case 6: return OFFB_X6;
1179 case 7: return OFFB_X7;
1180 case 8: return OFFB_X8;
1181 case 9: return OFFB_X9;
1182 case 10: return OFFB_X10;
1183 case 11: return OFFB_X11;
1184 case 12: return OFFB_X12;
1185 case 13: return OFFB_X13;
1186 case 14: return OFFB_X14;
1187 case 15: return OFFB_X15;
1188 case 16: return OFFB_X16;
1189 case 17: return OFFB_X17;
1190 case 18: return OFFB_X18;
1191 case 19: return OFFB_X19;
1192 case 20: return OFFB_X20;
1193 case 21: return OFFB_X21;
1194 case 22: return OFFB_X22;
1195 case 23: return OFFB_X23;
1196 case 24: return OFFB_X24;
1197 case 25: return OFFB_X25;
1198 case 26: return OFFB_X26;
1199 case 27: return OFFB_X27;
1200 case 28: return OFFB_X28;
1201 case 29: return OFFB_X29;
1202 case 30: return OFFB_X30;
1203 /* but not 31 */
1204 default: vassert(0);
1208 static Int offsetIReg64orSP ( UInt iregNo )
1210 return iregNo == 31 ? OFFB_XSP : offsetIReg64(iregNo);
1213 static const HChar* nameIReg64orZR ( UInt iregNo )
1215 vassert(iregNo < 32);
1216 static const HChar* names[32]
1217 = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
1218 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
1219 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
1220 "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
1221 return names[iregNo];
1224 static const HChar* nameIReg64orSP ( UInt iregNo )
1226 if (iregNo == 31) {
1227 return "sp";
1229 vassert(iregNo < 31);
1230 return nameIReg64orZR(iregNo);
1233 static IRExpr* getIReg64orSP ( UInt iregNo )
1235 vassert(iregNo < 32);
1236 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1239 static IRExpr* getIReg64orZR ( UInt iregNo )
1241 if (iregNo == 31) {
1242 return mkU64(0);
1244 vassert(iregNo < 31);
1245 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1248 static void putIReg64orSP ( UInt iregNo, IRExpr* e )
1250 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1251 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1254 static void putIReg64orZR ( UInt iregNo, IRExpr* e )
1256 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1257 if (iregNo == 31) {
1258 return;
1260 vassert(iregNo < 31);
1261 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1264 static const HChar* nameIReg32orZR ( UInt iregNo )
1266 vassert(iregNo < 32);
1267 static const HChar* names[32]
1268 = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
1269 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
1270 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
1271 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
1272 return names[iregNo];
1275 static const HChar* nameIReg32orSP ( UInt iregNo )
1277 if (iregNo == 31) {
1278 return "wsp";
1280 vassert(iregNo < 31);
1281 return nameIReg32orZR(iregNo);
1284 static IRExpr* getIReg32orSP ( UInt iregNo )
1286 vassert(iregNo < 32);
1287 return unop(Iop_64to32,
1288 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1291 static IRExpr* getIReg32orZR ( UInt iregNo )
1293 if (iregNo == 31) {
1294 return mkU32(0);
1296 vassert(iregNo < 31);
1297 return unop(Iop_64to32,
1298 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1301 static void putIReg32orSP ( UInt iregNo, IRExpr* e )
1303 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1304 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1307 static void putIReg32orZR ( UInt iregNo, IRExpr* e )
1309 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1310 if (iregNo == 31) {
1311 return;
1313 vassert(iregNo < 31);
1314 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1317 static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
1319 vassert(is64 == True || is64 == False);
1320 return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
1323 static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
1325 vassert(is64 == True || is64 == False);
1326 return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
1329 static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
1331 vassert(is64 == True || is64 == False);
1332 return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
1335 static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
1337 vassert(is64 == True || is64 == False);
1338 if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
1341 static void putPC ( IRExpr* e )
1343 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1344 stmt( IRStmt_Put(OFFB_PC, e) );
1348 /* ---------------- Vector (Q) registers ---------------- */
1350 static Int offsetQReg128 ( UInt qregNo )
1352 /* We don't care about endianness at this point. It only becomes
1353 relevant when dealing with sections of these registers.*/
1354 switch (qregNo) {
1355 case 0: return OFFB_Q0;
1356 case 1: return OFFB_Q1;
1357 case 2: return OFFB_Q2;
1358 case 3: return OFFB_Q3;
1359 case 4: return OFFB_Q4;
1360 case 5: return OFFB_Q5;
1361 case 6: return OFFB_Q6;
1362 case 7: return OFFB_Q7;
1363 case 8: return OFFB_Q8;
1364 case 9: return OFFB_Q9;
1365 case 10: return OFFB_Q10;
1366 case 11: return OFFB_Q11;
1367 case 12: return OFFB_Q12;
1368 case 13: return OFFB_Q13;
1369 case 14: return OFFB_Q14;
1370 case 15: return OFFB_Q15;
1371 case 16: return OFFB_Q16;
1372 case 17: return OFFB_Q17;
1373 case 18: return OFFB_Q18;
1374 case 19: return OFFB_Q19;
1375 case 20: return OFFB_Q20;
1376 case 21: return OFFB_Q21;
1377 case 22: return OFFB_Q22;
1378 case 23: return OFFB_Q23;
1379 case 24: return OFFB_Q24;
1380 case 25: return OFFB_Q25;
1381 case 26: return OFFB_Q26;
1382 case 27: return OFFB_Q27;
1383 case 28: return OFFB_Q28;
1384 case 29: return OFFB_Q29;
1385 case 30: return OFFB_Q30;
1386 case 31: return OFFB_Q31;
1387 default: vassert(0);
1391 /* Write to a complete Qreg. */
1392 static void putQReg128 ( UInt qregNo, IRExpr* e )
1394 vassert(qregNo < 32);
1395 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
1396 stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
1399 /* Read a complete Qreg. */
1400 static IRExpr* getQReg128 ( UInt qregNo )
1402 vassert(qregNo < 32);
1403 return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
1406 /* Produce the IR type for some sub-part of a vector. For 32- and 64-
1407 bit sub-parts we can choose either integer or float types, and
1408 choose float on the basis that that is the common use case and so
1409 will give least interference with Put-to-Get forwarding later
1410 on. */
1411 static IRType preferredVectorSubTypeFromSize ( UInt szB )
1413 switch (szB) {
1414 case 1: return Ity_I8;
1415 case 2: return Ity_I16;
1416 case 4: return Ity_I32; //Ity_F32;
1417 case 8: return Ity_F64;
1418 case 16: return Ity_V128;
1419 default: vassert(0);
1423 /* Find the offset of the laneNo'th lane of type laneTy in the given
1424 Qreg. Since the host is little-endian, the least significant lane
1425 has the lowest offset. */
1426 static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
1428 vassert(host_endness == VexEndnessLE);
1429 Int base = offsetQReg128(qregNo);
1430 /* Since the host is little-endian, the least significant lane
1431 will be at the lowest address. */
1432 /* Restrict this to known types, so as to avoid silently accepting
1433 stupid types. */
1434 UInt laneSzB = 0;
1435 switch (laneTy) {
1436 case Ity_I8: laneSzB = 1; break;
1437 case Ity_F16: case Ity_I16: laneSzB = 2; break;
1438 case Ity_F32: case Ity_I32: laneSzB = 4; break;
1439 case Ity_F64: case Ity_I64: laneSzB = 8; break;
1440 case Ity_V128: laneSzB = 16; break;
1441 default: break;
1443 vassert(laneSzB > 0);
1444 UInt minOff = laneNo * laneSzB;
1445 UInt maxOff = minOff + laneSzB - 1;
1446 vassert(maxOff < 16);
1447 return base + minOff;
1450 /* Put to the least significant lane of a Qreg. */
1451 static void putQRegLO ( UInt qregNo, IRExpr* e )
1453 IRType ty = typeOfIRExpr(irsb->tyenv, e);
1454 Int off = offsetQRegLane(qregNo, ty, 0);
1455 switch (ty) {
1456 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
1457 case Ity_F16: case Ity_F32: case Ity_F64: case Ity_V128:
1458 break;
1459 default:
1460 vassert(0); // Other cases are probably invalid
1462 stmt(IRStmt_Put(off, e));
1465 /* Get from the least significant lane of a Qreg. */
1466 static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
1468 Int off = offsetQRegLane(qregNo, ty, 0);
1469 switch (ty) {
1470 case Ity_I8:
1471 case Ity_F16: case Ity_I16:
1472 case Ity_I32: case Ity_I64:
1473 case Ity_F32: case Ity_F64: case Ity_V128:
1474 break;
1475 default:
1476 vassert(0); // Other cases are ATC
1478 return IRExpr_Get(off, ty);
1481 static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
1483 static const HChar* namesQ[32]
1484 = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1485 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
1486 "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
1487 "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
1488 static const HChar* namesD[32]
1489 = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
1490 "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
1491 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
1492 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
1493 static const HChar* namesS[32]
1494 = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
1495 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
1496 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
1497 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
1498 static const HChar* namesH[32]
1499 = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
1500 "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
1501 "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
1502 "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
1503 static const HChar* namesB[32]
1504 = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
1505 "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
1506 "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
1507 "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
1508 vassert(qregNo < 32);
1509 switch (sizeofIRType(laneTy)) {
1510 case 1: return namesB[qregNo];
1511 case 2: return namesH[qregNo];
1512 case 4: return namesS[qregNo];
1513 case 8: return namesD[qregNo];
1514 case 16: return namesQ[qregNo];
1515 default: vassert(0);
1517 /*NOTREACHED*/
1520 static const HChar* nameQReg128 ( UInt qregNo )
1522 return nameQRegLO(qregNo, Ity_V128);
1525 /* Find the offset of the most significant half (8 bytes) of the given
1526 Qreg. This requires knowing the endianness of the host. */
1527 static Int offsetQRegHI64 ( UInt qregNo )
1529 return offsetQRegLane(qregNo, Ity_I64, 1);
1532 static IRExpr* getQRegHI64 ( UInt qregNo )
1534 return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
1537 static void putQRegHI64 ( UInt qregNo, IRExpr* e )
1539 IRType ty = typeOfIRExpr(irsb->tyenv, e);
1540 Int off = offsetQRegHI64(qregNo);
1541 switch (ty) {
1542 case Ity_I64: case Ity_F64:
1543 break;
1544 default:
1545 vassert(0); // Other cases are plain wrong
1547 stmt(IRStmt_Put(off, e));
1550 /* Put to a specified lane of a Qreg. */
1551 static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
1553 IRType laneTy = typeOfIRExpr(irsb->tyenv, e);
1554 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1555 switch (laneTy) {
1556 case Ity_F64: case Ity_I64:
1557 case Ity_I32: case Ity_F32:
1558 case Ity_I16: case Ity_F16:
1559 case Ity_I8:
1560 break;
1561 default:
1562 vassert(0); // Other cases are ATC
1564 stmt(IRStmt_Put(off, e));
1567 /* Get from a specified lane of a Qreg. */
1568 static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
1570 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1571 switch (laneTy) {
1572 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
1573 case Ity_F64: case Ity_F32: case Ity_F16:
1574 break;
1575 default:
1576 vassert(0); // Other cases are ATC
1578 return IRExpr_Get(off, laneTy);
1582 //ZZ /* ---------------- Misc registers ---------------- */
1583 //ZZ
1584 //ZZ static void putMiscReg32 ( UInt gsoffset,
1585 //ZZ IRExpr* e, /* :: Ity_I32 */
1586 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
1587 //ZZ {
1588 //ZZ switch (gsoffset) {
1589 //ZZ case OFFB_FPSCR: break;
1590 //ZZ case OFFB_QFLAG32: break;
1591 //ZZ case OFFB_GEFLAG0: break;
1592 //ZZ case OFFB_GEFLAG1: break;
1593 //ZZ case OFFB_GEFLAG2: break;
1594 //ZZ case OFFB_GEFLAG3: break;
1595 //ZZ default: vassert(0); /* awaiting more cases */
1596 //ZZ }
1597 //ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1598 //ZZ
1599 //ZZ if (guardT == IRTemp_INVALID) {
1600 //ZZ /* unconditional write */
1601 //ZZ stmt(IRStmt_Put(gsoffset, e));
1602 //ZZ } else {
1603 //ZZ stmt(IRStmt_Put(
1604 //ZZ gsoffset,
1605 //ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
1606 //ZZ e, IRExpr_Get(gsoffset, Ity_I32) )
1607 //ZZ ));
1608 //ZZ }
1609 //ZZ }
1610 //ZZ
1611 //ZZ static IRTemp get_ITSTATE ( void )
1612 //ZZ {
1613 //ZZ ASSERT_IS_THUMB;
1614 //ZZ IRTemp t = newTemp(Ity_I32);
1615 //ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
1616 //ZZ return t;
1617 //ZZ }
1618 //ZZ
1619 //ZZ static void put_ITSTATE ( IRTemp t )
1620 //ZZ {
1621 //ZZ ASSERT_IS_THUMB;
1622 //ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
1623 //ZZ }
1624 //ZZ
1625 //ZZ static IRTemp get_QFLAG32 ( void )
1626 //ZZ {
1627 //ZZ IRTemp t = newTemp(Ity_I32);
1628 //ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
1629 //ZZ return t;
1630 //ZZ }
1631 //ZZ
1632 //ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
1633 //ZZ {
1634 //ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
1635 //ZZ }
1636 //ZZ
1637 //ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
1638 //ZZ Status Register) to indicate that overflow or saturation occurred.
1639 //ZZ Nb: t must be zero to denote no saturation, and any nonzero
1640 //ZZ value to indicate saturation. */
1641 //ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
1642 //ZZ {
1643 //ZZ IRTemp old = get_QFLAG32();
1644 //ZZ IRTemp nyu = newTemp(Ity_I32);
1645 //ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
1646 //ZZ put_QFLAG32(nyu, condT);
1647 //ZZ }
1650 /* ---------------- FPCR stuff ---------------- */
1652 /* Generate IR to get hold of the rounding mode bits in FPCR, and
1653 convert them to IR format. Bind the final result to the
1654 returned temp. */
1655 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1657 /* The ARMvfp encoding for rounding mode bits is:
1658 00 to nearest
1659 01 to +infinity
1660 10 to -infinity
1661 11 to zero
1662 We need to convert that to the IR encoding:
1663 00 to nearest (the default)
1664 10 to +infinity
1665 01 to -infinity
1666 11 to zero
1667 Which can be done by swapping bits 0 and 1.
1668 The rmode bits are at 23:22 in FPSCR.
1670 IRTemp armEncd = newTemp(Ity_I32);
1671 IRTemp swapped = newTemp(Ity_I32);
1672 /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that
1673 we don't zero out bits 24 and above, since the assignment to
1674 'swapped' will mask them out anyway. */
1675 assign(armEncd,
1676 binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
1677 /* Now swap them. */
1678 assign(swapped,
1679 binop(Iop_Or32,
1680 binop(Iop_And32,
1681 binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1682 mkU32(2)),
1683 binop(Iop_And32,
1684 binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1685 mkU32(1))
1687 return swapped;
1691 /*------------------------------------------------------------*/
1692 /*--- Helpers for flag handling and conditional insns ---*/
1693 /*------------------------------------------------------------*/
1695 static const HChar* nameARM64Condcode ( ARM64Condcode cond )
1697 switch (cond) {
1698 case ARM64CondEQ: return "eq";
1699 case ARM64CondNE: return "ne";
1700 case ARM64CondCS: return "cs"; // or 'hs'
1701 case ARM64CondCC: return "cc"; // or 'lo'
1702 case ARM64CondMI: return "mi";
1703 case ARM64CondPL: return "pl";
1704 case ARM64CondVS: return "vs";
1705 case ARM64CondVC: return "vc";
1706 case ARM64CondHI: return "hi";
1707 case ARM64CondLS: return "ls";
1708 case ARM64CondGE: return "ge";
1709 case ARM64CondLT: return "lt";
1710 case ARM64CondGT: return "gt";
1711 case ARM64CondLE: return "le";
1712 case ARM64CondAL: return "al";
1713 case ARM64CondNV: return "nv";
1714 default: vpanic("name_ARM64Condcode");
1718 /* and a handy shorthand for it */
1719 static const HChar* nameCC ( ARM64Condcode cond ) {
1720 return nameARM64Condcode(cond);
1724 /* Build IR to calculate some particular condition from stored
1725 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1726 Ity_I64, suitable for narrowing. Although the return type is
1727 Ity_I64, the returned value is either 0 or 1. 'cond' must be
1728 :: Ity_I64 and must denote the condition to compute in
1729 bits 7:4, and be zero everywhere else.
1731 static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
1733 vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
1734 /* And 'cond' had better produce a value in which only bits 7:4 are
1735 nonzero. However, obviously we can't assert for that. */
1737 /* So what we're constructing for the first argument is
1738 "(cond << 4) | stored-operation".
1739 However, as per comments above, 'cond' must be supplied
1740 pre-shifted to this function.
1742 This pairing scheme requires that the ARM64_CC_OP_ values all fit
1743 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest
1744 8 bits of the first argument. */
1745 IRExpr** args
1746 = mkIRExprVec_4(
1747 binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
1748 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1749 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1750 IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
1752 IRExpr* call
1753 = mkIRExprCCall(
1754 Ity_I64,
1755 0/*regparm*/,
1756 "arm64g_calculate_condition", &arm64g_calculate_condition,
1757 args
1760 /* Exclude the requested condition, OP and NDEP from definedness
1761 checking. We're only interested in DEP1 and DEP2. */
1762 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1763 return call;
1767 /* Build IR to calculate some particular condition from stored
1768 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1769 Ity_I64, suitable for narrowing. Although the return type is
1770 Ity_I64, the returned value is either 0 or 1.
1772 static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
1774 /* First arg is "(cond << 4) | condition". This requires that the
1775 ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a
1776 (COND, OP) pair in the lowest 8 bits of the first argument. */
1777 vassert(cond >= 0 && cond <= 15);
1778 return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
1782 /* Build IR to calculate just the carry flag from stored
1783 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1784 Ity_I64. */
1785 static IRExpr* mk_arm64g_calculate_flag_c ( void )
1787 IRExpr** args
1788 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1789 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1790 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1791 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1792 IRExpr* call
1793 = mkIRExprCCall(
1794 Ity_I64,
1795 0/*regparm*/,
1796 "arm64g_calculate_flag_c", &arm64g_calculate_flag_c,
1797 args
1799 /* Exclude OP and NDEP from definedness checking. We're only
1800 interested in DEP1 and DEP2. */
1801 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1802 return call;
1806 //ZZ /* Build IR to calculate just the overflow flag from stored
1807 //ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1808 //ZZ Ity_I32. */
1809 //ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
1810 //ZZ {
1811 //ZZ IRExpr** args
1812 //ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
1813 //ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1814 //ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1815 //ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1816 //ZZ IRExpr* call
1817 //ZZ = mkIRExprCCall(
1818 //ZZ Ity_I32,
1819 //ZZ 0/*regparm*/,
1820 //ZZ "armg_calculate_flag_v", &armg_calculate_flag_v,
1821 //ZZ args
1822 //ZZ );
1823 //ZZ /* Exclude OP and NDEP from definedness checking. We're only
1824 //ZZ interested in DEP1 and DEP2. */
1825 //ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1826 //ZZ return call;
1827 //ZZ }
1830 /* Build IR to calculate N Z C V in bits 31:28 of the
1831 returned word. */
1832 static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
1834 IRExpr** args
1835 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1836 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1837 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1838 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1839 IRExpr* call
1840 = mkIRExprCCall(
1841 Ity_I64,
1842 0/*regparm*/,
1843 "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
1844 args
1846 /* Exclude OP and NDEP from definedness checking. We're only
1847 interested in DEP1 and DEP2. */
1848 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1849 return call;
1853 /* Build IR to set the flags thunk, in the most general case. */
1854 static
1855 void setFlags_D1_D2_ND ( UInt cc_op,
1856 IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
1858 vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
1859 vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
1860 vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
1861 vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
1862 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) ));
1863 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1864 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1865 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1868 /* Build IR to set the flags thunk after ADD or SUB. */
1869 static
1870 void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
1872 IRTemp argL64 = IRTemp_INVALID;
1873 IRTemp argR64 = IRTemp_INVALID;
1874 IRTemp z64 = newTemp(Ity_I64);
1875 if (is64) {
1876 argL64 = argL;
1877 argR64 = argR;
1878 } else {
1879 argL64 = newTemp(Ity_I64);
1880 argR64 = newTemp(Ity_I64);
1881 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1882 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1884 assign(z64, mkU64(0));
1885 UInt cc_op = ARM64G_CC_OP_NUMBER;
1886 /**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; }
1887 else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
1888 else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; }
1889 else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
1890 else { vassert(0); }
1891 setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
1894 /* Build IR to set the flags thunk after ADC or SBC. */
1895 static
1896 void setFlags_ADC_SBC ( Bool is64, Bool isSBC,
1897 IRTemp argL, IRTemp argR, IRTemp oldC )
1899 IRTemp argL64 = IRTemp_INVALID;
1900 IRTemp argR64 = IRTemp_INVALID;
1901 IRTemp oldC64 = IRTemp_INVALID;
1902 if (is64) {
1903 argL64 = argL;
1904 argR64 = argR;
1905 oldC64 = oldC;
1906 } else {
1907 argL64 = newTemp(Ity_I64);
1908 argR64 = newTemp(Ity_I64);
1909 oldC64 = newTemp(Ity_I64);
1910 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1911 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1912 assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC)));
1914 UInt cc_op = ARM64G_CC_OP_NUMBER;
1915 /**/ if ( isSBC && is64) { cc_op = ARM64G_CC_OP_SBC64; }
1916 else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; }
1917 else if (!isSBC && is64) { cc_op = ARM64G_CC_OP_ADC64; }
1918 else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; }
1919 else { vassert(0); }
1920 setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64);
1923 /* Build IR to set the flags thunk after ADD or SUB, if the given
1924 condition evaluates to True at run time. If not, the flags are set
1925 to the specified NZCV value. */
1926 static
1927 void setFlags_ADD_SUB_conditionally (
1928 Bool is64, Bool isSUB,
1929 IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
1932 /* Generate IR as follows:
1933 CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
1934 CC_DEP1 = ITE(cond, argL64, nzcv << 28)
1935 CC_DEP2 = ITE(cond, argR64, 0)
1936 CC_NDEP = 0
1939 IRTemp z64 = newTemp(Ity_I64);
1940 assign(z64, mkU64(0));
1942 /* Establish the operation and operands for the True case. */
1943 IRTemp t_dep1 = IRTemp_INVALID;
1944 IRTemp t_dep2 = IRTemp_INVALID;
1945 UInt t_op = ARM64G_CC_OP_NUMBER;
1946 /**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; }
1947 else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
1948 else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; }
1949 else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
1950 else { vassert(0); }
1951 /* */
1952 if (is64) {
1953 t_dep1 = argL;
1954 t_dep2 = argR;
1955 } else {
1956 t_dep1 = newTemp(Ity_I64);
1957 t_dep2 = newTemp(Ity_I64);
1958 assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
1959 assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
1962 /* Establish the operation and operands for the False case. */
1963 IRTemp f_dep1 = newTemp(Ity_I64);
1964 IRTemp f_dep2 = z64;
1965 UInt f_op = ARM64G_CC_OP_COPY;
1966 assign(f_dep1, mkU64(nzcv << 28));
1968 /* Final thunk values */
1969 IRTemp dep1 = newTemp(Ity_I64);
1970 IRTemp dep2 = newTemp(Ity_I64);
1971 IRTemp op = newTemp(Ity_I64);
1973 assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
1974 assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
1975 assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
1977 /* finally .. */
1978 stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) ));
1979 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
1980 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
1981 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
1984 /* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
1985 static
1986 void setFlags_LOGIC ( Bool is64, IRTemp res )
1988 IRTemp res64 = IRTemp_INVALID;
1989 IRTemp z64 = newTemp(Ity_I64);
1990 UInt cc_op = ARM64G_CC_OP_NUMBER;
1991 if (is64) {
1992 res64 = res;
1993 cc_op = ARM64G_CC_OP_LOGIC64;
1994 } else {
1995 res64 = newTemp(Ity_I64);
1996 assign(res64, unop(Iop_32Uto64, mkexpr(res)));
1997 cc_op = ARM64G_CC_OP_LOGIC32;
1999 assign(z64, mkU64(0));
2000 setFlags_D1_D2_ND(cc_op, res64, z64, z64);
2003 /* Build IR to set the flags thunk to a given NZCV value. NZCV is
2004 located in bits 31:28 of the supplied value. */
2005 static
2006 void setFlags_COPY ( IRTemp nzcv_28x0 )
2008 IRTemp z64 = newTemp(Ity_I64);
2009 assign(z64, mkU64(0));
2010 setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
2014 //ZZ /* Minor variant of the above that sets NDEP to zero (if it
2015 //ZZ sets it at all) */
2016 //ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
2017 //ZZ IRTemp t_dep2,
2018 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2019 //ZZ {
2020 //ZZ IRTemp z32 = newTemp(Ity_I32);
2021 //ZZ assign( z32, mkU32(0) );
2022 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
2023 //ZZ }
2024 //ZZ
2025 //ZZ
2026 //ZZ /* Minor variant of the above that sets DEP2 to zero (if it
2027 //ZZ sets it at all) */
2028 //ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
2029 //ZZ IRTemp t_ndep,
2030 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2031 //ZZ {
2032 //ZZ IRTemp z32 = newTemp(Ity_I32);
2033 //ZZ assign( z32, mkU32(0) );
2034 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
2035 //ZZ }
2036 //ZZ
2037 //ZZ
2038 //ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
2039 //ZZ sets them at all) */
2040 //ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
2041 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2042 //ZZ {
2043 //ZZ IRTemp z32 = newTemp(Ity_I32);
2044 //ZZ assign( z32, mkU32(0) );
2045 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
2046 //ZZ }
2049 /*------------------------------------------------------------*/
2050 /*--- Misc math helpers ---*/
2051 /*------------------------------------------------------------*/
2053 /* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
2054 static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
2056 IRTemp maskT = newTemp(Ity_I64);
2057 IRTemp res = newTemp(Ity_I64);
2058 vassert(sh >= 1 && sh <= 63);
2059 assign(maskT, mkU64(mask));
2060 assign( res,
2061 binop(Iop_Or64,
2062 binop(Iop_Shr64,
2063 binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
2064 mkU8(sh)),
2065 binop(Iop_And64,
2066 binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
2067 mkexpr(maskT))
2070 return res;
2073 /* Generates byte swaps within 32-bit lanes. */
2074 static IRTemp math_UINTSWAP64 ( IRTemp src )
2076 IRTemp res;
2077 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2078 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2079 return res;
2082 /* Generates byte swaps within 16-bit lanes. */
2083 static IRTemp math_USHORTSWAP64 ( IRTemp src )
2085 IRTemp res;
2086 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2087 return res;
2090 /* Generates a 64-bit byte swap. */
2091 static IRTemp math_BYTESWAP64 ( IRTemp src )
2093 IRTemp res;
2094 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2095 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2096 res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
2097 return res;
2100 /* Generates a 64-bit bit swap. */
2101 static IRTemp math_BITSWAP64 ( IRTemp src )
2103 IRTemp res;
2104 res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
2105 res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
2106 res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
2107 return math_BYTESWAP64(res);
2110 /* Duplicates the bits at the bottom of the given word to fill the
2111 whole word. src :: Ity_I64 is assumed to have zeroes everywhere
2112 except for the bottom bits. */
2113 static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
2115 if (srcTy == Ity_I8) {
2116 IRTemp t16 = newTemp(Ity_I64);
2117 assign(t16, binop(Iop_Or64, mkexpr(src),
2118 binop(Iop_Shl64, mkexpr(src), mkU8(8))));
2119 IRTemp t32 = newTemp(Ity_I64);
2120 assign(t32, binop(Iop_Or64, mkexpr(t16),
2121 binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
2122 IRTemp t64 = newTemp(Ity_I64);
2123 assign(t64, binop(Iop_Or64, mkexpr(t32),
2124 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2125 return t64;
2127 if (srcTy == Ity_I16) {
2128 IRTemp t32 = newTemp(Ity_I64);
2129 assign(t32, binop(Iop_Or64, mkexpr(src),
2130 binop(Iop_Shl64, mkexpr(src), mkU8(16))));
2131 IRTemp t64 = newTemp(Ity_I64);
2132 assign(t64, binop(Iop_Or64, mkexpr(t32),
2133 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2134 return t64;
2136 if (srcTy == Ity_I32) {
2137 IRTemp t64 = newTemp(Ity_I64);
2138 assign(t64, binop(Iop_Or64, mkexpr(src),
2139 binop(Iop_Shl64, mkexpr(src), mkU8(32))));
2140 return t64;
2142 if (srcTy == Ity_I64) {
2143 return src;
2145 vassert(0);
2149 /* Duplicates the src element exactly so as to fill a V128 value. */
2150 static IRTemp math_DUP_TO_V128 ( IRTemp src, IRType srcTy )
2152 IRTemp res = newTempV128();
2153 if (srcTy == Ity_F64) {
2154 IRTemp i64 = newTemp(Ity_I64);
2155 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(src)));
2156 assign(res, binop(Iop_64HLtoV128, mkexpr(i64), mkexpr(i64)));
2157 return res;
2159 if (srcTy == Ity_F32) {
2160 IRTemp i64a = newTemp(Ity_I64);
2161 assign(i64a, unop(Iop_32Uto64, unop(Iop_ReinterpF32asI32, mkexpr(src))));
2162 IRTemp i64b = newTemp(Ity_I64);
2163 assign(i64b, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(i64a), mkU8(32)),
2164 mkexpr(i64a)));
2165 assign(res, binop(Iop_64HLtoV128, mkexpr(i64b), mkexpr(i64b)));
2166 return res;
2168 if (srcTy == Ity_I64) {
2169 assign(res, binop(Iop_64HLtoV128, mkexpr(src), mkexpr(src)));
2170 return res;
2172 if (srcTy == Ity_I32 || srcTy == Ity_I16 || srcTy == Ity_I8) {
2173 IRTemp t1 = newTemp(Ity_I64);
2174 assign(t1, widenUto64(srcTy, mkexpr(src)));
2175 IRTemp t2 = math_DUP_TO_64(t1, srcTy);
2176 assign(res, binop(Iop_64HLtoV128, mkexpr(t2), mkexpr(t2)));
2177 return res;
2179 vassert(0);
2183 /* |fullWidth| is a full V128 width result. Depending on bitQ,
2184 zero out the upper half. */
2185 static IRExpr* math_MAYBE_ZERO_HI64 ( UInt bitQ, IRTemp fullWidth )
2187 if (bitQ == 1) return mkexpr(fullWidth);
2188 if (bitQ == 0) return unop(Iop_ZeroHI64ofV128, mkexpr(fullWidth));
2189 vassert(0);
2192 /* The same, but from an expression instead. */
2193 static IRExpr* math_MAYBE_ZERO_HI64_fromE ( UInt bitQ, IRExpr* fullWidth )
2195 IRTemp fullWidthT = newTempV128();
2196 assign(fullWidthT, fullWidth);
2197 return math_MAYBE_ZERO_HI64(bitQ, fullWidthT);
2201 /*------------------------------------------------------------*/
2202 /*--- FP comparison helpers ---*/
2203 /*------------------------------------------------------------*/
2205 /* irRes :: Ity_I32 holds a floating point comparison result encoded
2206 as an IRCmpF64Result. Generate code to convert it to an
2207 ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
2208 Assign a new temp to hold that value, and return the temp. */
2209 static
2210 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
2212 IRTemp ix = newTemp(Ity_I64);
2213 IRTemp termL = newTemp(Ity_I64);
2214 IRTemp termR = newTemp(Ity_I64);
2215 IRTemp nzcv = newTemp(Ity_I64);
2216 IRTemp irRes = newTemp(Ity_I64);
2218 /* This is where the fun starts. We have to convert 'irRes' from
2219 an IR-convention return result (IRCmpF64Result) to an
2220 ARM-encoded (N,Z,C,V) group. The final result is in the bottom
2221 4 bits of 'nzcv'. */
2222 /* Map compare result from IR to ARM(nzcv) */
2224 FP cmp result | IR | ARM(nzcv)
2225 --------------------------------
2226 UN 0x45 0011
2227 LT 0x01 1000
2228 GT 0x00 0010
2229 EQ 0x40 0110
2231 /* Now since you're probably wondering WTF ..
2233 ix fishes the useful bits out of the IR value, bits 6 and 0, and
2234 places them side by side, giving a number which is 0, 1, 2 or 3.
2236 termL is a sequence cooked up by GNU superopt. It converts ix
2237 into an almost correct value NZCV value (incredibly), except
2238 for the case of UN, where it produces 0100 instead of the
2239 required 0011.
2241 termR is therefore a correction term, also computed from ix. It
2242 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get
2243 the final correct value, we subtract termR from termL.
2245 Don't take my word for it. There's a test program at the bottom
2246 of guest_arm_toIR.c, to try this out with.
2248 assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
2250 assign(
2252 binop(Iop_Or64,
2253 binop(Iop_And64,
2254 binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
2255 mkU64(3)),
2256 binop(Iop_And64, mkexpr(irRes), mkU64(1))));
2258 assign(
2259 termL,
2260 binop(Iop_Add64,
2261 binop(Iop_Shr64,
2262 binop(Iop_Sub64,
2263 binop(Iop_Shl64,
2264 binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
2265 mkU8(62)),
2266 mkU64(1)),
2267 mkU8(61)),
2268 mkU64(1)));
2270 assign(
2271 termR,
2272 binop(Iop_And64,
2273 binop(Iop_And64,
2274 mkexpr(ix),
2275 binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
2276 mkU64(1)));
2278 assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
2279 return nzcv;
2283 /*------------------------------------------------------------*/
2284 /*--- Data processing (immediate) ---*/
2285 /*------------------------------------------------------------*/
2287 /* Helper functions for supporting "DecodeBitMasks" */
2289 static ULong dbm_ROR ( Int width, ULong x, Int rot )
2291 vassert(width > 0 && width <= 64);
2292 vassert(rot >= 0 && rot < width);
2293 if (rot == 0) return x;
2294 ULong res = x >> rot;
2295 res |= (x << (width - rot));
2296 if (width < 64)
2297 res &= ((1ULL << width) - 1);
2298 return res;
2301 static ULong dbm_RepTo64( Int esize, ULong x )
2303 switch (esize) {
2304 case 64:
2305 return x;
2306 case 32:
2307 x &= 0xFFFFFFFF; x |= (x << 32);
2308 return x;
2309 case 16:
2310 x &= 0xFFFF; x |= (x << 16); x |= (x << 32);
2311 return x;
2312 case 8:
2313 x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32);
2314 return x;
2315 case 4:
2316 x &= 0xF; x |= (x << 4); x |= (x << 8);
2317 x |= (x << 16); x |= (x << 32);
2318 return x;
2319 case 2:
2320 x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8);
2321 x |= (x << 16); x |= (x << 32);
2322 return x;
2323 default:
2324 break;
2326 vpanic("dbm_RepTo64");
2327 /*NOTREACHED*/
2328 return 0;
2331 static Int dbm_highestSetBit ( ULong x )
2333 Int i;
2334 for (i = 63; i >= 0; i--) {
2335 if (x & (1ULL << i))
2336 return i;
2338 vassert(x == 0);
2339 return -1;
2342 static
2343 Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask,
2344 ULong immN, ULong imms, ULong immr, Bool immediate,
2345 UInt M /*32 or 64*/)
2347 vassert(immN < (1ULL << 1));
2348 vassert(imms < (1ULL << 6));
2349 vassert(immr < (1ULL << 6));
2350 vassert(immediate == False || immediate == True);
2351 vassert(M == 32 || M == 64);
2353 Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) );
2354 if (len < 1) { /* printf("fail1\n"); */ return False; }
2355 vassert(len <= 6);
2356 vassert(M >= (1 << len));
2358 vassert(len >= 1 && len <= 6);
2359 ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len);
2360 (1 << len) - 1;
2361 vassert(levels >= 1 && levels <= 63);
2363 if (immediate && ((imms & levels) == levels)) {
2364 /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
2365 return False;
2368 ULong S = imms & levels;
2369 ULong R = immr & levels;
2370 Int diff = S - R;
2371 diff &= 63;
2372 Int esize = 1 << len;
2373 vassert(2 <= esize && esize <= 64);
2375 /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
2376 same below with d. S can be 63 in which case we have an out of
2377 range and hence undefined shift. */
2378 vassert(S >= 0 && S <= 63);
2379 vassert(esize >= (S+1));
2380 ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
2381 //(1ULL << (S+1)) - 1;
2382 ((1ULL << S) - 1) + (1ULL << S);
2384 Int d = // diff<len-1:0>
2385 diff & ((1 << len)-1);
2386 vassert(esize >= (d+1));
2387 vassert(d >= 0 && d <= 63);
2389 ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
2390 //(1ULL << (d+1)) - 1;
2391 ((1ULL << d) - 1) + (1ULL << d);
2393 if (esize != 64) vassert(elem_s < (1ULL << esize));
2394 if (esize != 64) vassert(elem_d < (1ULL << esize));
2396 if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
2397 if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
2399 return True;
2403 static
2404 Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres,
2405 UInt insn)
2407 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2409 /* insn[28:23]
2410 10000x PC-rel addressing
2411 10001x Add/subtract (immediate)
2412 100100 Logical (immediate)
2413 100101 Move Wide (immediate)
2414 100110 Bitfield
2415 100111 Extract
2418 /* ------------------ ADD/SUB{,S} imm12 ------------------ */
2419 if (INSN(28,24) == BITS5(1,0,0,0,1)) {
2420 Bool is64 = INSN(31,31) == 1;
2421 Bool isSub = INSN(30,30) == 1;
2422 Bool setCC = INSN(29,29) == 1;
2423 UInt sh = INSN(23,22);
2424 UInt uimm12 = INSN(21,10);
2425 UInt nn = INSN(9,5);
2426 UInt dd = INSN(4,0);
2427 const HChar* nm = isSub ? "sub" : "add";
2428 if (sh >= 2) {
2429 /* Invalid; fall through */
2430 } else {
2431 vassert(sh <= 1);
2432 uimm12 <<= (12 * sh);
2433 if (is64) {
2434 IRTemp argL = newTemp(Ity_I64);
2435 IRTemp argR = newTemp(Ity_I64);
2436 IRTemp res = newTemp(Ity_I64);
2437 assign(argL, getIReg64orSP(nn));
2438 assign(argR, mkU64(uimm12));
2439 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
2440 mkexpr(argL), mkexpr(argR)));
2441 if (setCC) {
2442 putIReg64orZR(dd, mkexpr(res));
2443 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
2444 DIP("%ss %s, %s, 0x%x\n",
2445 nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
2446 } else {
2447 putIReg64orSP(dd, mkexpr(res));
2448 DIP("%s %s, %s, 0x%x\n",
2449 nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
2451 } else {
2452 IRTemp argL = newTemp(Ity_I32);
2453 IRTemp argR = newTemp(Ity_I32);
2454 IRTemp res = newTemp(Ity_I32);
2455 assign(argL, getIReg32orSP(nn));
2456 assign(argR, mkU32(uimm12));
2457 assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32,
2458 mkexpr(argL), mkexpr(argR)));
2459 if (setCC) {
2460 putIReg32orZR(dd, mkexpr(res));
2461 setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR);
2462 DIP("%ss %s, %s, 0x%x\n",
2463 nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
2464 } else {
2465 putIReg32orSP(dd, mkexpr(res));
2466 DIP("%s %s, %s, 0x%x\n",
2467 nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
2470 return True;
2474 /* -------------------- ADR/ADRP -------------------- */
2475 if (INSN(28,24) == BITS5(1,0,0,0,0)) {
2476 UInt bP = INSN(31,31);
2477 UInt immLo = INSN(30,29);
2478 UInt immHi = INSN(23,5);
2479 UInt rD = INSN(4,0);
2480 ULong uimm = (immHi << 2) | immLo;
2481 ULong simm = sx_to_64(uimm, 21);
2482 ULong val;
2483 if (bP) {
2484 val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
2485 } else {
2486 val = guest_PC_curr_instr + simm;
2488 putIReg64orZR(rD, mkU64(val));
2489 DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
2490 return True;
2493 /* -------------------- LOGIC(imm) -------------------- */
2494 if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
2495 /* 31 30 28 22 21 15 9 4
2496 sf op 100100 N immr imms Rn Rd
2497 op=00: AND Rd|SP, Rn, #imm
2498 op=01: ORR Rd|SP, Rn, #imm
2499 op=10: EOR Rd|SP, Rn, #imm
2500 op=11: ANDS Rd|ZR, Rn, #imm
2502 Bool is64 = INSN(31,31) == 1;
2503 UInt op = INSN(30,29);
2504 UInt N = INSN(22,22);
2505 UInt immR = INSN(21,16);
2506 UInt immS = INSN(15,10);
2507 UInt nn = INSN(9,5);
2508 UInt dd = INSN(4,0);
2509 ULong imm = 0;
2510 Bool ok;
2511 if (N == 1 && !is64)
2512 goto after_logic_imm; /* not allowed; fall through */
2513 ok = dbm_DecodeBitMasks(&imm, NULL,
2514 N, immS, immR, True, is64 ? 64 : 32);
2515 if (!ok)
2516 goto after_logic_imm;
2518 const HChar* names[4] = { "and", "orr", "eor", "ands" };
2519 const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
2520 const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
2522 vassert(op < 4);
2523 if (is64) {
2524 IRExpr* argL = getIReg64orZR(nn);
2525 IRExpr* argR = mkU64(imm);
2526 IRTemp res = newTemp(Ity_I64);
2527 assign(res, binop(ops64[op], argL, argR));
2528 if (op < 3) {
2529 putIReg64orSP(dd, mkexpr(res));
2530 DIP("%s %s, %s, 0x%llx\n", names[op],
2531 nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
2532 } else {
2533 putIReg64orZR(dd, mkexpr(res));
2534 setFlags_LOGIC(True/*is64*/, res);
2535 DIP("%s %s, %s, 0x%llx\n", names[op],
2536 nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
2538 } else {
2539 IRExpr* argL = getIReg32orZR(nn);
2540 IRExpr* argR = mkU32((UInt)imm);
2541 IRTemp res = newTemp(Ity_I32);
2542 assign(res, binop(ops32[op], argL, argR));
2543 if (op < 3) {
2544 putIReg32orSP(dd, mkexpr(res));
2545 DIP("%s %s, %s, 0x%x\n", names[op],
2546 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
2547 } else {
2548 putIReg32orZR(dd, mkexpr(res));
2549 setFlags_LOGIC(False/*!is64*/, res);
2550 DIP("%s %s, %s, 0x%x\n", names[op],
2551 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
2554 return True;
2556 after_logic_imm:
2558 /* -------------------- MOV{Z,N,K} -------------------- */
2559 if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
2560 /* 31 30 28 22 20 4
2561 | | | | | |
2562 sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw))
2563 sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw))
2564 sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw))
2566 Bool is64 = INSN(31,31) == 1;
2567 UInt subopc = INSN(30,29);
2568 UInt hw = INSN(22,21);
2569 UInt imm16 = INSN(20,5);
2570 UInt dd = INSN(4,0);
2571 if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) {
2572 /* invalid; fall through */
2573 } else {
2574 ULong imm64 = ((ULong)imm16) << (16 * hw);
2575 if (!is64)
2576 vassert(imm64 < 0x100000000ULL);
2577 switch (subopc) {
2578 case BITS2(1,0): // MOVZ
2579 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2580 DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2581 break;
2582 case BITS2(0,0): // MOVN
2583 imm64 = ~imm64;
2584 if (!is64)
2585 imm64 &= 0xFFFFFFFFULL;
2586 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2587 DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2588 break;
2589 case BITS2(1,1): // MOVK
2590 /* This is more complex. We are inserting a slice into
2591 the destination register, so we need to have the old
2592 value of it. */
2593 if (is64) {
2594 IRTemp old = newTemp(Ity_I64);
2595 assign(old, getIReg64orZR(dd));
2596 ULong mask = 0xFFFFULL << (16 * hw);
2597 IRExpr* res
2598 = binop(Iop_Or64,
2599 binop(Iop_And64, mkexpr(old), mkU64(~mask)),
2600 mkU64(imm64));
2601 putIReg64orZR(dd, res);
2602 DIP("movk %s, 0x%x, lsl %u\n",
2603 nameIReg64orZR(dd), imm16, 16*hw);
2604 } else {
2605 IRTemp old = newTemp(Ity_I32);
2606 assign(old, getIReg32orZR(dd));
2607 vassert(hw <= 1);
2608 UInt mask = ((UInt)0xFFFF) << (16 * hw);
2609 IRExpr* res
2610 = binop(Iop_Or32,
2611 binop(Iop_And32, mkexpr(old), mkU32(~mask)),
2612 mkU32((UInt)imm64));
2613 putIReg32orZR(dd, res);
2614 DIP("movk %s, 0x%x, lsl %u\n",
2615 nameIReg32orZR(dd), imm16, 16*hw);
2617 break;
2618 default:
2619 vassert(0);
2621 return True;
2625 /* -------------------- {U,S,}BFM -------------------- */
2626 /* 30 28 22 21 15 9 4
2628 sf 10 100110 N immr imms nn dd
2629 UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2630 UBFM Xd, Xn, #immr, #imms when sf=1, N=1
2632 sf 00 100110 N immr imms nn dd
2633 SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2634 SBFM Xd, Xn, #immr, #imms when sf=1, N=1
2636 sf 01 100110 N immr imms nn dd
2637 BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2638 BFM Xd, Xn, #immr, #imms when sf=1, N=1
2640 if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
2641 UInt sf = INSN(31,31);
2642 UInt opc = INSN(30,29);
2643 UInt N = INSN(22,22);
2644 UInt immR = INSN(21,16);
2645 UInt immS = INSN(15,10);
2646 UInt nn = INSN(9,5);
2647 UInt dd = INSN(4,0);
2648 Bool inZero = False;
2649 Bool extend = False;
2650 const HChar* nm = "???";
2651 /* skip invalid combinations */
2652 switch (opc) {
2653 case BITS2(0,0):
2654 inZero = True; extend = True; nm = "sbfm"; break;
2655 case BITS2(0,1):
2656 inZero = False; extend = False; nm = "bfm"; break;
2657 case BITS2(1,0):
2658 inZero = True; extend = False; nm = "ubfm"; break;
2659 case BITS2(1,1):
2660 goto after_bfm; /* invalid */
2661 default:
2662 vassert(0);
2664 if (sf == 1 && N != 1) goto after_bfm;
2665 if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0
2666 || ((immS >> 5) & 1) != 0)) goto after_bfm;
2667 ULong wmask = 0, tmask = 0;
2668 Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
2669 N, immS, immR, False, sf == 1 ? 64 : 32);
2670 if (!ok) goto after_bfm; /* hmmm */
2672 Bool is64 = sf == 1;
2673 IRType ty = is64 ? Ity_I64 : Ity_I32;
2675 IRTemp dst = newTemp(ty);
2676 IRTemp src = newTemp(ty);
2677 IRTemp bot = newTemp(ty);
2678 IRTemp top = newTemp(ty);
2679 IRTemp res = newTemp(ty);
2680 assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
2681 assign(src, getIRegOrZR(is64, nn));
2682 /* perform bitfield move on low bits */
2683 assign(bot, binop(mkOR(ty),
2684 binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
2685 binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
2686 mkU(ty, wmask))));
2687 /* determine extension bits (sign, zero or dest register) */
2688 assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
2689 /* combine extension bits and result bits */
2690 assign(res, binop(mkOR(ty),
2691 binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
2692 binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
2693 putIRegOrZR(is64, dd, mkexpr(res));
2694 DIP("%s %s, %s, immR=%u, immS=%u\n",
2695 nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
2696 return True;
2698 after_bfm:
2700 /* ---------------------- EXTR ---------------------- */
2701 /* 30 28 22 20 15 9 4
2702 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6
2703 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
2705 if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
2706 Bool is64 = INSN(31,31) == 1;
2707 UInt mm = INSN(20,16);
2708 UInt imm6 = INSN(15,10);
2709 UInt nn = INSN(9,5);
2710 UInt dd = INSN(4,0);
2711 Bool valid = True;
2712 if (INSN(31,31) != INSN(22,22))
2713 valid = False;
2714 if (!is64 && imm6 >= 32)
2715 valid = False;
2716 if (!valid) goto after_extr;
2717 IRType ty = is64 ? Ity_I64 : Ity_I32;
2718 IRTemp srcHi = newTemp(ty);
2719 IRTemp srcLo = newTemp(ty);
2720 IRTemp res = newTemp(ty);
2721 assign(srcHi, getIRegOrZR(is64, nn));
2722 assign(srcLo, getIRegOrZR(is64, mm));
2723 if (imm6 == 0) {
2724 assign(res, mkexpr(srcLo));
2725 } else {
2726 UInt szBits = 8 * sizeofIRType(ty);
2727 vassert(imm6 > 0 && imm6 < szBits);
2728 assign(res, binop(mkOR(ty),
2729 binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
2730 binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
2732 putIRegOrZR(is64, dd, mkexpr(res));
2733 DIP("extr %s, %s, %s, #%u\n",
2734 nameIRegOrZR(is64,dd),
2735 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
2736 return True;
2738 after_extr:
2740 vex_printf("ARM64 front end: data_processing_immediate\n");
2741 return False;
2742 # undef INSN
2746 /*------------------------------------------------------------*/
2747 /*--- Data processing (register) instructions ---*/
2748 /*------------------------------------------------------------*/
2750 static const HChar* nameSH ( UInt sh ) {
2751 switch (sh) {
2752 case 0: return "lsl";
2753 case 1: return "lsr";
2754 case 2: return "asr";
2755 case 3: return "ror";
2756 default: vassert(0);
2760 /* Generate IR to get a register value, possibly shifted by an
2761 immediate. Returns either a 32- or 64-bit temporary holding the
2762 result. After the shift, the value can optionally be NOT-ed
2763 too.
2765 sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be
2766 in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR
2767 isn't allowed, but it's the job of the caller to check that.
2769 static IRTemp getShiftedIRegOrZR ( Bool is64,
2770 UInt sh_how, UInt sh_amt, UInt regNo,
2771 Bool invert )
2773 vassert(sh_how < 4);
2774 vassert(sh_amt < (is64 ? 64 : 32));
2775 IRType ty = is64 ? Ity_I64 : Ity_I32;
2776 IRTemp t0 = newTemp(ty);
2777 assign(t0, getIRegOrZR(is64, regNo));
2778 IRTemp t1 = newTemp(ty);
2779 switch (sh_how) {
2780 case BITS2(0,0):
2781 assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
2782 break;
2783 case BITS2(0,1):
2784 assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
2785 break;
2786 case BITS2(1,0):
2787 assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
2788 break;
2789 case BITS2(1,1):
2790 assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
2791 break;
2792 default:
2793 vassert(0);
2795 if (invert) {
2796 IRTemp t2 = newTemp(ty);
2797 assign(t2, unop(mkNOT(ty), mkexpr(t1)));
2798 return t2;
2799 } else {
2800 return t1;
2805 static
2806 Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres,
2807 UInt insn)
2809 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2811 /* ------------------- ADD/SUB(reg) ------------------- */
2812 /* x==0 => 32 bit op x==1 => 64 bit op
2813 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
2815 31 30 29 28 23 21 20 15 9 4
2816 | | | | | | | | | |
2817 x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6)
2818 x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6)
2819 x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6)
2820 x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6)
2822 if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
2823 UInt bX = INSN(31,31);
2824 UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */
2825 UInt bS = INSN(29, 29); /* set flags? */
2826 UInt sh = INSN(23,22);
2827 UInt rM = INSN(20,16);
2828 UInt imm6 = INSN(15,10);
2829 UInt rN = INSN(9,5);
2830 UInt rD = INSN(4,0);
2831 Bool isSUB = bOP == 1;
2832 Bool is64 = bX == 1;
2833 IRType ty = is64 ? Ity_I64 : Ity_I32;
2834 if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) {
2835 /* invalid; fall through */
2836 } else {
2837 IRTemp argL = newTemp(ty);
2838 assign(argL, getIRegOrZR(is64, rN));
2839 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
2840 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2841 IRTemp res = newTemp(ty);
2842 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2843 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2844 if (bS) {
2845 setFlags_ADD_SUB(is64, isSUB, argL, argR);
2847 DIP("%s%s %s, %s, %s, %s #%u\n",
2848 bOP ? "sub" : "add", bS ? "s" : "",
2849 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2850 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2851 return True;
2855 /* ------------------- ADC/SBC(reg) ------------------- */
2856 /* x==0 => 32 bit op x==1 => 64 bit op
2858 31 30 29 28 23 21 20 15 9 4
2859 | | | | | | | | | |
2860 x 0 0 11010 00 0 Rm 000000 Rn Rd ADC Rd,Rn,Rm
2861 x 0 1 11010 00 0 Rm 000000 Rn Rd ADCS Rd,Rn,Rm
2862 x 1 0 11010 00 0 Rm 000000 Rn Rd SBC Rd,Rn,Rm
2863 x 1 1 11010 00 0 Rm 000000 Rn Rd SBCS Rd,Rn,Rm
2866 if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) {
2867 UInt bX = INSN(31,31);
2868 UInt bOP = INSN(30,30); /* 0: ADC, 1: SBC */
2869 UInt bS = INSN(29,29); /* set flags */
2870 UInt rM = INSN(20,16);
2871 UInt rN = INSN(9,5);
2872 UInt rD = INSN(4,0);
2874 Bool isSUB = bOP == 1;
2875 Bool is64 = bX == 1;
2876 IRType ty = is64 ? Ity_I64 : Ity_I32;
2878 IRTemp oldC = newTemp(ty);
2879 assign(oldC,
2880 is64 ? mk_arm64g_calculate_flag_c()
2881 : unop(Iop_64to32, mk_arm64g_calculate_flag_c()) );
2883 IRTemp argL = newTemp(ty);
2884 assign(argL, getIRegOrZR(is64, rN));
2885 IRTemp argR = newTemp(ty);
2886 assign(argR, getIRegOrZR(is64, rM));
2888 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2889 IRTemp res = newTemp(ty);
2890 if (isSUB) {
2891 IRExpr* one = is64 ? mkU64(1) : mkU32(1);
2892 IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32;
2893 assign(res,
2894 binop(op,
2895 binop(op, mkexpr(argL), mkexpr(argR)),
2896 binop(xorOp, mkexpr(oldC), one)));
2897 } else {
2898 assign(res,
2899 binop(op,
2900 binop(op, mkexpr(argL), mkexpr(argR)),
2901 mkexpr(oldC)));
2904 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2906 if (bS) {
2907 setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC);
2910 DIP("%s%s %s, %s, %s\n",
2911 bOP ? "sbc" : "adc", bS ? "s" : "",
2912 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2913 nameIRegOrZR(is64, rM));
2914 return True;
2917 /* -------------------- LOGIC(reg) -------------------- */
2918 /* x==0 => 32 bit op x==1 => 64 bit op
2919 N==0 => inv? is no-op (no inversion)
2920 N==1 => inv? is NOT
2921 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
2923 31 30 28 23 21 20 15 9 4
2924 | | | | | | | | |
2925 x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6))
2926 x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6))
2927 x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6))
2928 x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6))
2929 With N=1, the names are: BIC ORN EON BICS
2931 if (INSN(28,24) == BITS5(0,1,0,1,0)) {
2932 UInt bX = INSN(31,31);
2933 UInt sh = INSN(23,22);
2934 UInt bN = INSN(21,21);
2935 UInt rM = INSN(20,16);
2936 UInt imm6 = INSN(15,10);
2937 UInt rN = INSN(9,5);
2938 UInt rD = INSN(4,0);
2939 Bool is64 = bX == 1;
2940 IRType ty = is64 ? Ity_I64 : Ity_I32;
2941 if (!is64 && imm6 > 31) {
2942 /* invalid; fall though */
2943 } else {
2944 IRTemp argL = newTemp(ty);
2945 assign(argL, getIRegOrZR(is64, rN));
2946 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
2947 IROp op = Iop_INVALID;
2948 switch (INSN(30,29)) {
2949 case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
2950 case BITS2(0,1): op = mkOR(ty); break;
2951 case BITS2(1,0): op = mkXOR(ty); break;
2952 default: vassert(0);
2954 IRTemp res = newTemp(ty);
2955 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2956 if (INSN(30,29) == BITS2(1,1)) {
2957 setFlags_LOGIC(is64, res);
2959 putIRegOrZR(is64, rD, mkexpr(res));
2961 static const HChar* names_op[8]
2962 = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
2963 vassert(((bN << 2) | INSN(30,29)) < 8);
2964 const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)];
2965 /* Special-case the printing of "MOV" */
2966 if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) {
2967 DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
2968 nameIRegOrZR(is64, rM));
2969 } else {
2970 DIP("%s %s, %s, %s, %s #%u\n", nm_op,
2971 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2972 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2974 return True;
2978 /* -------------------- {U,S}MULH -------------------- */
2979 /* 31 23 22 20 15 9 4
2980 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm
2981 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm
2983 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
2984 && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
2985 Bool isU = INSN(23,23) == 1;
2986 UInt mm = INSN(20,16);
2987 UInt nn = INSN(9,5);
2988 UInt dd = INSN(4,0);
2989 putIReg64orZR(dd, unop(Iop_128HIto64,
2990 binop(isU ? Iop_MullU64 : Iop_MullS64,
2991 getIReg64orZR(nn), getIReg64orZR(mm))));
2992 DIP("%cmulh %s, %s, %s\n",
2993 isU ? 'u' : 's',
2994 nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
2995 return True;
2998 /* -------------------- M{ADD,SUB} -------------------- */
2999 /* 31 30 20 15 14 9 4
3000 sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n
3001 sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n
3003 if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
3004 Bool is64 = INSN(31,31) == 1;
3005 UInt mm = INSN(20,16);
3006 Bool isAdd = INSN(15,15) == 0;
3007 UInt aa = INSN(14,10);
3008 UInt nn = INSN(9,5);
3009 UInt dd = INSN(4,0);
3010 if (is64) {
3011 putIReg64orZR(
3013 binop(isAdd ? Iop_Add64 : Iop_Sub64,
3014 getIReg64orZR(aa),
3015 binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
3016 } else {
3017 putIReg32orZR(
3019 binop(isAdd ? Iop_Add32 : Iop_Sub32,
3020 getIReg32orZR(aa),
3021 binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
3023 DIP("%s %s, %s, %s, %s\n",
3024 isAdd ? "madd" : "msub",
3025 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3026 nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
3027 return True;
3030 /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
3031 /* 31 30 28 20 15 11 9 4
3032 sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm
3033 sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm
3034 sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm
3035 sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm
3036 In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
3038 if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
3039 Bool is64 = INSN(31,31) == 1;
3040 UInt b30 = INSN(30,30);
3041 UInt mm = INSN(20,16);
3042 UInt cond = INSN(15,12);
3043 UInt b10 = INSN(10,10);
3044 UInt nn = INSN(9,5);
3045 UInt dd = INSN(4,0);
3046 UInt op = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */
3047 IRType ty = is64 ? Ity_I64 : Ity_I32;
3048 IRExpr* argL = getIRegOrZR(is64, nn);
3049 IRExpr* argR = getIRegOrZR(is64, mm);
3050 switch (op) {
3051 case BITS2(0,0):
3052 break;
3053 case BITS2(0,1):
3054 argR = binop(mkADD(ty), argR, mkU(ty,1));
3055 break;
3056 case BITS2(1,0):
3057 argR = unop(mkNOT(ty), argR);
3058 break;
3059 case BITS2(1,1):
3060 argR = binop(mkSUB(ty), mkU(ty,0), argR);
3061 break;
3062 default:
3063 vassert(0);
3065 putIRegOrZR(
3066 is64, dd,
3067 IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
3068 argL, argR)
3070 const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
3071 DIP("%s %s, %s, %s, %s\n", op_nm[op],
3072 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3073 nameIRegOrZR(is64, mm), nameCC(cond));
3074 return True;
3077 /* -------------- ADD/SUB(extended reg) -------------- */
3078 /* 28 20 15 12 9 4
3079 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld
3080 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld
3082 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld
3083 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld
3085 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld
3086 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld
3088 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld
3089 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld
3091 The 'm' operand is extended per opt, thusly:
3093 000 Xm & 0xFF UXTB
3094 001 Xm & 0xFFFF UXTH
3095 010 Xm & (2^32)-1 UXTW
3096 011 Xm UXTX
3098 100 Xm sx from bit 7 SXTB
3099 101 Xm sx from bit 15 SXTH
3100 110 Xm sx from bit 31 SXTW
3101 111 Xm SXTX
3103 In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
3104 operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX
3105 are the identity operation on Wm.
3107 After extension, the value is shifted left by imm3 bits, which
3108 may only be in the range 0 .. 4 inclusive.
3110 if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
3111 Bool is64 = INSN(31,31) == 1;
3112 Bool isSub = INSN(30,30) == 1;
3113 Bool setCC = INSN(29,29) == 1;
3114 UInt mm = INSN(20,16);
3115 UInt opt = INSN(15,13);
3116 UInt imm3 = INSN(12,10);
3117 UInt nn = INSN(9,5);
3118 UInt dd = INSN(4,0);
3119 const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
3120 "sxtb", "sxth", "sxtw", "sxtx" };
3121 /* Do almost the same thing in the 32- and 64-bit cases. */
3122 IRTemp xN = newTemp(Ity_I64);
3123 IRTemp xM = newTemp(Ity_I64);
3124 assign(xN, getIReg64orSP(nn));
3125 assign(xM, getIReg64orZR(mm));
3126 IRExpr* xMw = mkexpr(xM); /* "xM widened" */
3127 Int shSX = 0;
3128 /* widen Xm .. */
3129 switch (opt) {
3130 case BITS3(0,0,0): // UXTB
3131 xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
3132 case BITS3(0,0,1): // UXTH
3133 xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
3134 case BITS3(0,1,0): // UXTW -- noop for the 32bit case
3135 if (is64) {
3136 xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
3138 break;
3139 case BITS3(0,1,1): // UXTX -- always a noop
3140 break;
3141 case BITS3(1,0,0): // SXTB
3142 shSX = 56; goto sxTo64;
3143 case BITS3(1,0,1): // SXTH
3144 shSX = 48; goto sxTo64;
3145 case BITS3(1,1,0): // SXTW -- noop for the 32bit case
3146 if (is64) {
3147 shSX = 32; goto sxTo64;
3149 break;
3150 case BITS3(1,1,1): // SXTX -- always a noop
3151 break;
3152 sxTo64:
3153 vassert(shSX >= 32);
3154 xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
3155 mkU8(shSX));
3156 break;
3157 default:
3158 vassert(0);
3160 /* and now shift */
3161 IRTemp argL = xN;
3162 IRTemp argR = newTemp(Ity_I64);
3163 assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
3164 IRTemp res = newTemp(Ity_I64);
3165 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
3166 mkexpr(argL), mkexpr(argR)));
3167 if (is64) {
3168 if (setCC) {
3169 putIReg64orZR(dd, mkexpr(res));
3170 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
3171 } else {
3172 putIReg64orSP(dd, mkexpr(res));
3174 } else {
3175 if (setCC) {
3176 IRTemp argL32 = newTemp(Ity_I32);
3177 IRTemp argR32 = newTemp(Ity_I32);
3178 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
3179 assign(argL32, unop(Iop_64to32, mkexpr(argL)));
3180 assign(argR32, unop(Iop_64to32, mkexpr(argR)));
3181 setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32);
3182 } else {
3183 putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
3186 DIP("%s%s %s, %s, %s %s lsl %u\n",
3187 isSub ? "sub" : "add", setCC ? "s" : "",
3188 setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
3189 nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
3190 nameExt[opt], imm3);
3191 return True;
3194 /* ---------------- CCMP/CCMN(imm) ---------------- */
3195 /* Bizarrely, these appear in the "data processing register"
3196 category, even though they are operations against an
3197 immediate. */
3198 /* 31 29 20 15 11 9 3
3199 sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond
3200 sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond
3202 Operation is:
3203 (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
3204 (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
3206 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3207 && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
3208 Bool is64 = INSN(31,31) == 1;
3209 Bool isSUB = INSN(30,30) == 1;
3210 UInt imm5 = INSN(20,16);
3211 UInt cond = INSN(15,12);
3212 UInt nn = INSN(9,5);
3213 UInt nzcv = INSN(3,0);
3215 IRTemp condT = newTemp(Ity_I1);
3216 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3218 IRType ty = is64 ? Ity_I64 : Ity_I32;
3219 IRTemp argL = newTemp(ty);
3220 IRTemp argR = newTemp(ty);
3222 if (is64) {
3223 assign(argL, getIReg64orZR(nn));
3224 assign(argR, mkU64(imm5));
3225 } else {
3226 assign(argL, getIReg32orZR(nn));
3227 assign(argR, mkU32(imm5));
3229 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3231 DIP("ccm%c %s, #%u, #%u, %s\n",
3232 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3233 imm5, nzcv, nameCC(cond));
3234 return True;
3237 /* ---------------- CCMP/CCMN(reg) ---------------- */
3238 /* 31 29 20 15 11 9 3
3239 sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond
3240 sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond
3241 Operation is:
3242 (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
3243 (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
3245 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3246 && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
3247 Bool is64 = INSN(31,31) == 1;
3248 Bool isSUB = INSN(30,30) == 1;
3249 UInt mm = INSN(20,16);
3250 UInt cond = INSN(15,12);
3251 UInt nn = INSN(9,5);
3252 UInt nzcv = INSN(3,0);
3254 IRTemp condT = newTemp(Ity_I1);
3255 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3257 IRType ty = is64 ? Ity_I64 : Ity_I32;
3258 IRTemp argL = newTemp(ty);
3259 IRTemp argR = newTemp(ty);
3261 if (is64) {
3262 assign(argL, getIReg64orZR(nn));
3263 assign(argR, getIReg64orZR(mm));
3264 } else {
3265 assign(argL, getIReg32orZR(nn));
3266 assign(argR, getIReg32orZR(mm));
3268 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3270 DIP("ccm%c %s, %s, #%u, %s\n",
3271 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3272 nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
3273 return True;
3277 /* -------------- REV/REV16/REV32/RBIT -------------- */
3278 /* 31 30 28 20 15 11 9 4
3280 1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn
3281 0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn
3283 1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn
3284 0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn
3286 1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn
3287 0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn
3289 1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn
3291 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3292 && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
3293 UInt b31 = INSN(31,31);
3294 UInt opc = INSN(11,10);
3296 UInt ix = 0;
3297 /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
3298 else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
3299 else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
3300 else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
3301 else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
3302 else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
3303 else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
3304 if (ix >= 1 && ix <= 7) {
3305 Bool is64 = ix == 1 || ix == 3 || ix == 5 || ix == 7;
3306 UInt nn = INSN(9,5);
3307 UInt dd = INSN(4,0);
3308 IRTemp src = newTemp(Ity_I64);
3309 IRTemp dst = IRTemp_INVALID;
3310 IRTemp (*math)(IRTemp) = NULL;
3311 switch (ix) {
3312 case 1: case 2: math = math_BYTESWAP64; break;
3313 case 3: case 4: math = math_BITSWAP64; break;
3314 case 5: case 6: math = math_USHORTSWAP64; break;
3315 case 7: math = math_UINTSWAP64; break;
3316 default: vassert(0);
3318 const HChar* names[7]
3319 = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
3320 const HChar* nm = names[ix-1];
3321 vassert(math);
3322 if (ix == 6) {
3323 /* This has to be special cased, since the logic below doesn't
3324 handle it correctly. */
3325 assign(src, getIReg64orZR(nn));
3326 dst = math(src);
3327 putIReg64orZR(dd,
3328 unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
3329 } else if (is64) {
3330 assign(src, getIReg64orZR(nn));
3331 dst = math(src);
3332 putIReg64orZR(dd, mkexpr(dst));
3333 } else {
3334 assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
3335 dst = math(src);
3336 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3338 DIP("%s %s, %s\n", nm,
3339 nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
3340 return True;
3342 /* else fall through */
3345 /* -------------------- CLZ/CLS -------------------- */
3346 /* 30 28 24 20 15 9 4
3347 sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn
3348 sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn
3350 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3351 && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
3352 Bool is64 = INSN(31,31) == 1;
3353 Bool isCLS = INSN(10,10) == 1;
3354 UInt nn = INSN(9,5);
3355 UInt dd = INSN(4,0);
3356 IRTemp src = newTemp(Ity_I64);
3357 IRTemp srcZ = newTemp(Ity_I64);
3358 IRTemp dst = newTemp(Ity_I64);
3359 /* Get the argument, widened out to 64 bit */
3360 if (is64) {
3361 assign(src, getIReg64orZR(nn));
3362 } else {
3363 assign(src, binop(Iop_Shl64,
3364 unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
3366 /* If this is CLS, mash the arg around accordingly */
3367 if (isCLS) {
3368 IRExpr* one = mkU8(1);
3369 assign(srcZ,
3370 binop(Iop_Xor64,
3371 binop(Iop_Shl64, mkexpr(src), one),
3372 binop(Iop_Shl64, binop(Iop_Shr64, mkexpr(src), one), one)));
3373 } else {
3374 assign(srcZ, mkexpr(src));
3376 /* And compute CLZ. */
3377 if (is64) {
3378 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3379 mkU64(isCLS ? 63 : 64),
3380 unop(Iop_Clz64, mkexpr(srcZ))));
3381 putIReg64orZR(dd, mkexpr(dst));
3382 } else {
3383 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3384 mkU64(isCLS ? 31 : 32),
3385 unop(Iop_Clz64, mkexpr(srcZ))));
3386 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3388 DIP("cl%c %s, %s\n", isCLS ? 's' : 'z',
3389 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
3390 return True;
3393 /* ------------------ LSLV/LSRV/ASRV/RORV ------------------ */
3394 /* 30 28 20 15 11 9 4
3395 sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm
3396 sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm
3397 sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm
3398 sf 00 1101 0110 m 0010 11 n d RORV Rd,Rn,Rm
3400 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3401 && INSN(15,12) == BITS4(0,0,1,0)) {
3402 Bool is64 = INSN(31,31) == 1;
3403 UInt mm = INSN(20,16);
3404 UInt op = INSN(11,10);
3405 UInt nn = INSN(9,5);
3406 UInt dd = INSN(4,0);
3407 IRType ty = is64 ? Ity_I64 : Ity_I32;
3408 IRTemp srcL = newTemp(ty);
3409 IRTemp srcR = newTemp(Ity_I64);
3410 IRTemp res = newTemp(ty);
3411 IROp iop = Iop_INVALID;
3412 assign(srcL, getIRegOrZR(is64, nn));
3413 assign(srcR, binop(Iop_And64, getIReg64orZR(mm),
3414 mkU64(is64 ? 63 : 31)));
3415 if (op < 3) {
3416 // LSLV, LSRV, ASRV
3417 switch (op) {
3418 case BITS2(0,0): iop = mkSHL(ty); break;
3419 case BITS2(0,1): iop = mkSHR(ty); break;
3420 case BITS2(1,0): iop = mkSAR(ty); break;
3421 default: vassert(0);
3423 assign(res, binop(iop, mkexpr(srcL),
3424 unop(Iop_64to8, mkexpr(srcR))));
3425 } else {
3426 // RORV
3427 IROp opSHL = mkSHL(ty);
3428 IROp opSHR = mkSHR(ty);
3429 IROp opOR = mkOR(ty);
3430 IRExpr* width = mkU64(is64 ? 64: 32);
3431 assign(
3432 res,
3433 IRExpr_ITE(
3434 binop(Iop_CmpEQ64, mkexpr(srcR), mkU64(0)),
3435 mkexpr(srcL),
3436 binop(opOR,
3437 binop(opSHL,
3438 mkexpr(srcL),
3439 unop(Iop_64to8, binop(Iop_Sub64, width,
3440 mkexpr(srcR)))),
3441 binop(opSHR,
3442 mkexpr(srcL), unop(Iop_64to8, mkexpr(srcR))))
3445 putIRegOrZR(is64, dd, mkexpr(res));
3446 vassert(op < 4);
3447 const HChar* names[4] = { "lslv", "lsrv", "asrv", "rorv" };
3448 DIP("%s %s, %s, %s\n",
3449 names[op], nameIRegOrZR(is64,dd),
3450 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
3451 return True;
3454 /* -------------------- SDIV/UDIV -------------------- */
3455 /* 30 28 20 15 10 9 4
3456 sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm
3457 sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm
3459 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3460 && INSN(15,11) == BITS5(0,0,0,0,1)) {
3461 Bool is64 = INSN(31,31) == 1;
3462 UInt mm = INSN(20,16);
3463 Bool isS = INSN(10,10) == 1;
3464 UInt nn = INSN(9,5);
3465 UInt dd = INSN(4,0);
3466 if (isS) {
3467 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
3468 getIRegOrZR(is64, nn),
3469 getIRegOrZR(is64, mm)));
3470 } else {
3471 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
3472 getIRegOrZR(is64, nn),
3473 getIRegOrZR(is64, mm)));
3475 DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
3476 nameIRegOrZR(is64, dd),
3477 nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
3478 return True;
3481 /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
3482 /* 31 23 20 15 14 9 4
3483 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa
3484 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa
3485 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa
3486 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa
3487 with operation
3488 Xd = Xa +/- (Wn *u/s Wm)
3490 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
3491 Bool isU = INSN(23,23) == 1;
3492 UInt mm = INSN(20,16);
3493 Bool isAdd = INSN(15,15) == 0;
3494 UInt aa = INSN(14,10);
3495 UInt nn = INSN(9,5);
3496 UInt dd = INSN(4,0);
3497 IRTemp wN = newTemp(Ity_I32);
3498 IRTemp wM = newTemp(Ity_I32);
3499 IRTemp xA = newTemp(Ity_I64);
3500 IRTemp muld = newTemp(Ity_I64);
3501 IRTemp res = newTemp(Ity_I64);
3502 assign(wN, getIReg32orZR(nn));
3503 assign(wM, getIReg32orZR(mm));
3504 assign(xA, getIReg64orZR(aa));
3505 assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
3506 mkexpr(wN), mkexpr(wM)));
3507 assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
3508 mkexpr(xA), mkexpr(muld)));
3509 putIReg64orZR(dd, mkexpr(res));
3510 DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
3511 nameIReg64orZR(dd), nameIReg32orZR(nn),
3512 nameIReg32orZR(mm), nameIReg64orZR(aa));
3513 return True;
3516 /* -------------------- CRC32/CRC32C -------------------- */
3517 /* 31 30 20 15 11 9 4
3518 sf 00 1101 0110 m 0100 sz n d CRC32<sz> Wd, Wn, Wm|Xm
3519 sf 00 1101 0110 m 0101 sz n d CRC32C<sz> Wd, Wn, Wm|Xm
3521 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3522 && INSN(15,13) == BITS3(0,1,0)) {
3523 UInt bitSF = INSN(31,31);
3524 UInt mm = INSN(20,16);
3525 UInt bitC = INSN(12,12);
3526 UInt sz = INSN(11,10);
3527 UInt nn = INSN(9,5);
3528 UInt dd = INSN(4,0);
3529 vassert(sz >= 0 && sz <= 3);
3530 if ((bitSF == 0 && sz <= BITS2(1,0))
3531 || (bitSF == 1 && sz == BITS2(1,1))) {
3532 UInt ix = (bitC == 1 ? 4 : 0) | sz;
3533 void* helpers[8]
3534 = { &arm64g_calc_crc32b, &arm64g_calc_crc32h,
3535 &arm64g_calc_crc32w, &arm64g_calc_crc32x,
3536 &arm64g_calc_crc32cb, &arm64g_calc_crc32ch,
3537 &arm64g_calc_crc32cw, &arm64g_calc_crc32cx };
3538 const HChar* hNames[8]
3539 = { "arm64g_calc_crc32b", "arm64g_calc_crc32h",
3540 "arm64g_calc_crc32w", "arm64g_calc_crc32x",
3541 "arm64g_calc_crc32cb", "arm64g_calc_crc32ch",
3542 "arm64g_calc_crc32cw", "arm64g_calc_crc32cx" };
3543 const HChar* iNames[8]
3544 = { "crc32b", "crc32h", "crc32w", "crc32x",
3545 "crc32cb", "crc32ch", "crc32cw", "crc32cx" };
3547 IRTemp srcN = newTemp(Ity_I64);
3548 assign(srcN, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
3550 IRTemp srcM = newTemp(Ity_I64);
3551 IRExpr* at64 = getIReg64orZR(mm);
3552 switch (sz) {
3553 case BITS2(0,0):
3554 assign(srcM, binop(Iop_And64, at64, mkU64(0xFF))); break;
3555 case BITS2(0,1):
3556 assign(srcM, binop(Iop_And64, at64, mkU64(0xFFFF))); break;
3557 case BITS2(1,0):
3558 assign(srcM, binop(Iop_And64, at64, mkU64(0xFFFFFFFF))); break;
3559 case BITS2(1,1):
3560 assign(srcM, at64); break;
3561 default:
3562 vassert(0);
3565 vassert(ix >= 0 && ix <= 7);
3567 putIReg64orZR(
3569 unop(Iop_32Uto64,
3570 unop(Iop_64to32,
3571 mkIRExprCCall(Ity_I64, 0/*regparm*/,
3572 hNames[ix], helpers[ix],
3573 mkIRExprVec_2(mkexpr(srcN),
3574 mkexpr(srcM))))));
3576 DIP("%s %s, %s, %s\n", iNames[ix],
3577 nameIReg32orZR(dd),
3578 nameIReg32orZR(nn), nameIRegOrZR(bitSF == 1, mm));
3579 return True;
3581 /* fall through */
3584 vex_printf("ARM64 front end: data_processing_register\n");
3585 return False;
3586 # undef INSN
3590 /*------------------------------------------------------------*/
3591 /*--- Math helpers for vector interleave/deinterleave ---*/
3592 /*------------------------------------------------------------*/
3594 #define EX(_tmp) \
3595 mkexpr(_tmp)
3596 #define SL(_hi128,_lo128,_nbytes) \
3597 ( (_nbytes) == 0 \
3598 ? (_lo128) \
3599 : triop(Iop_SliceV128,(_hi128),(_lo128),mkU8(_nbytes)) )
3600 #define ROR(_v128,_nbytes) \
3601 SL((_v128),(_v128),(_nbytes))
3602 #define ROL(_v128,_nbytes) \
3603 SL((_v128),(_v128),16-(_nbytes))
3604 #define SHR(_v128,_nbytes) \
3605 binop(Iop_ShrV128,(_v128),mkU8(8*(_nbytes)))
3606 #define SHL(_v128,_nbytes) \
3607 binop(Iop_ShlV128,(_v128),mkU8(8*(_nbytes)))
3608 #define ILO64x2(_argL,_argR) \
3609 binop(Iop_InterleaveLO64x2,(_argL),(_argR))
3610 #define IHI64x2(_argL,_argR) \
3611 binop(Iop_InterleaveHI64x2,(_argL),(_argR))
3612 #define ILO32x4(_argL,_argR) \
3613 binop(Iop_InterleaveLO32x4,(_argL),(_argR))
3614 #define IHI32x4(_argL,_argR) \
3615 binop(Iop_InterleaveHI32x4,(_argL),(_argR))
3616 #define ILO16x8(_argL,_argR) \
3617 binop(Iop_InterleaveLO16x8,(_argL),(_argR))
3618 #define IHI16x8(_argL,_argR) \
3619 binop(Iop_InterleaveHI16x8,(_argL),(_argR))
3620 #define ILO8x16(_argL,_argR) \
3621 binop(Iop_InterleaveLO8x16,(_argL),(_argR))
3622 #define IHI8x16(_argL,_argR) \
3623 binop(Iop_InterleaveHI8x16,(_argL),(_argR))
3624 #define CEV32x4(_argL,_argR) \
3625 binop(Iop_CatEvenLanes32x4,(_argL),(_argR))
3626 #define COD32x4(_argL,_argR) \
3627 binop(Iop_CatOddLanes32x4,(_argL),(_argR))
3628 #define COD16x8(_argL,_argR) \
3629 binop(Iop_CatOddLanes16x8,(_argL),(_argR))
3630 #define COD8x16(_argL,_argR) \
3631 binop(Iop_CatOddLanes8x16,(_argL),(_argR))
3632 #define CEV8x16(_argL,_argR) \
3633 binop(Iop_CatEvenLanes8x16,(_argL),(_argR))
3634 #define AND(_arg1,_arg2) \
3635 binop(Iop_AndV128,(_arg1),(_arg2))
3636 #define OR2(_arg1,_arg2) \
3637 binop(Iop_OrV128,(_arg1),(_arg2))
3638 #define OR3(_arg1,_arg2,_arg3) \
3639 binop(Iop_OrV128,(_arg1),binop(Iop_OrV128,(_arg2),(_arg3)))
3640 #define OR4(_arg1,_arg2,_arg3,_arg4) \
3641 binop(Iop_OrV128, \
3642 binop(Iop_OrV128,(_arg1),(_arg2)), \
3643 binop(Iop_OrV128,(_arg3),(_arg4)))
3646 /* Do interleaving for 1 128 bit vector, for ST1 insns. */
3647 static
3648 void math_INTERLEAVE1_128( /*OUTx1*/ IRTemp* i0,
3649 UInt laneSzBlg2, IRTemp u0 )
3651 assign(*i0, mkexpr(u0));
3655 /* Do interleaving for 2 128 bit vectors, for ST2 insns. */
3656 static
3657 void math_INTERLEAVE2_128( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
3658 UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
3660 /* This is pretty easy, since we have primitives directly to
3661 hand. */
3662 if (laneSzBlg2 == 3) {
3663 // 64x2
3664 // u1 == B1 B0, u0 == A1 A0
3665 // i1 == B1 A1, i0 == B0 A0
3666 assign(*i0, binop(Iop_InterleaveLO64x2, mkexpr(u1), mkexpr(u0)));
3667 assign(*i1, binop(Iop_InterleaveHI64x2, mkexpr(u1), mkexpr(u0)));
3668 return;
3670 if (laneSzBlg2 == 2) {
3671 // 32x4
3672 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3673 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3674 assign(*i0, binop(Iop_InterleaveLO32x4, mkexpr(u1), mkexpr(u0)));
3675 assign(*i1, binop(Iop_InterleaveHI32x4, mkexpr(u1), mkexpr(u0)));
3676 return;
3678 if (laneSzBlg2 == 1) {
3679 // 16x8
3680 // u1 == B{7..0}, u0 == A{7..0}
3681 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3682 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3683 assign(*i0, binop(Iop_InterleaveLO16x8, mkexpr(u1), mkexpr(u0)));
3684 assign(*i1, binop(Iop_InterleaveHI16x8, mkexpr(u1), mkexpr(u0)));
3685 return;
3687 if (laneSzBlg2 == 0) {
3688 // 8x16
3689 // u1 == B{f..0}, u0 == A{f..0}
3690 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3691 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3692 assign(*i0, binop(Iop_InterleaveLO8x16, mkexpr(u1), mkexpr(u0)));
3693 assign(*i1, binop(Iop_InterleaveHI8x16, mkexpr(u1), mkexpr(u0)));
3694 return;
3696 /*NOTREACHED*/
3697 vassert(0);
3701 /* Do interleaving for 3 128 bit vectors, for ST3 insns. */
3702 static
3703 void math_INTERLEAVE3_128(
3704 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
3705 UInt laneSzBlg2,
3706 IRTemp u0, IRTemp u1, IRTemp u2 )
3708 if (laneSzBlg2 == 3) {
3709 // 64x2
3710 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
3711 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
3712 assign(*i2, IHI64x2( EX(u2), EX(u1) ));
3713 assign(*i1, ILO64x2( ROR(EX(u0),8), EX(u2) ));
3714 assign(*i0, ILO64x2( EX(u1), EX(u0) ));
3715 return;
3718 if (laneSzBlg2 == 2) {
3719 // 32x4
3720 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
3721 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
3722 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
3723 IRTemp p0 = newTempV128();
3724 IRTemp p1 = newTempV128();
3725 IRTemp p2 = newTempV128();
3726 IRTemp c1100 = newTempV128();
3727 IRTemp c0011 = newTempV128();
3728 IRTemp c0110 = newTempV128();
3729 assign(c1100, mkV128(0xFF00));
3730 assign(c0011, mkV128(0x00FF));
3731 assign(c0110, mkV128(0x0FF0));
3732 // First interleave them at 64x2 granularity,
3733 // generating partial ("p") values.
3734 math_INTERLEAVE3_128(&p0, &p1, &p2, 3, u0, u1, u2);
3735 // And more shuffling around for the final answer
3736 assign(*i2, OR2( AND( IHI32x4(EX(p2), ROL(EX(p2),8)), EX(c1100) ),
3737 AND( IHI32x4(ROR(EX(p1),4), EX(p2)), EX(c0011) ) ));
3738 assign(*i1, OR3( SHL(EX(p2),12),
3739 AND(EX(p1),EX(c0110)),
3740 SHR(EX(p0),12) ));
3741 assign(*i0, OR2( AND( ILO32x4(EX(p0),ROL(EX(p1),4)), EX(c1100) ),
3742 AND( ILO32x4(ROR(EX(p0),8),EX(p0)), EX(c0011) ) ));
3743 return;
3746 if (laneSzBlg2 == 1) {
3747 // 16x8
3748 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
3749 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
3750 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
3752 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
3753 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
3754 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
3756 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
3757 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
3758 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
3759 IRTemp p0 = newTempV128();
3760 IRTemp p1 = newTempV128();
3761 IRTemp p2 = newTempV128();
3762 IRTemp c1000 = newTempV128();
3763 IRTemp c0100 = newTempV128();
3764 IRTemp c0010 = newTempV128();
3765 IRTemp c0001 = newTempV128();
3766 assign(c1000, mkV128(0xF000));
3767 assign(c0100, mkV128(0x0F00));
3768 assign(c0010, mkV128(0x00F0));
3769 assign(c0001, mkV128(0x000F));
3770 // First interleave them at 32x4 granularity,
3771 // generating partial ("p") values.
3772 math_INTERLEAVE3_128(&p0, &p1, &p2, 2, u0, u1, u2);
3773 // And more shuffling around for the final answer
3774 assign(*i2,
3775 OR4( AND( IHI16x8( EX(p2), ROL(EX(p2),4) ), EX(c1000) ),
3776 AND( IHI16x8( ROL(EX(p2),6), EX(p2) ), EX(c0100) ),
3777 AND( IHI16x8( ROL(EX(p2),2), ROL(EX(p2),6) ), EX(c0010) ),
3778 AND( ILO16x8( ROR(EX(p2),2), ROL(EX(p1),2) ), EX(c0001) )
3780 assign(*i1,
3781 OR4( AND( IHI16x8( ROL(EX(p1),4), ROR(EX(p2),2) ), EX(c1000) ),
3782 AND( IHI16x8( EX(p1), ROL(EX(p1),4) ), EX(c0100) ),
3783 AND( IHI16x8( ROL(EX(p1),4), ROL(EX(p1),8) ), EX(c0010) ),
3784 AND( IHI16x8( ROR(EX(p0),6), ROL(EX(p1),4) ), EX(c0001) )
3786 assign(*i0,
3787 OR4( AND( IHI16x8( ROR(EX(p1),2), ROL(EX(p0),2) ), EX(c1000) ),
3788 AND( IHI16x8( ROL(EX(p0),2), ROL(EX(p0),6) ), EX(c0100) ),
3789 AND( IHI16x8( ROL(EX(p0),8), ROL(EX(p0),2) ), EX(c0010) ),
3790 AND( IHI16x8( ROL(EX(p0),4), ROL(EX(p0),8) ), EX(c0001) )
3792 return;
3795 if (laneSzBlg2 == 0) {
3796 // 8x16. It doesn't seem worth the hassle of first doing a
3797 // 16x8 interleave, so just generate all 24 partial results
3798 // directly :-(
3799 // u2 == Cf .. C0, u1 == Bf .. B0, u0 == Af .. A0
3800 // i2 == Cf Bf Af Ce .. Bb Ab Ca
3801 // i1 == Ba Aa C9 B9 .. A6 C5 B5
3802 // i0 == A5 C4 B4 A4 .. C0 B0 A0
3804 IRTemp i2_FEDC = newTempV128(); IRTemp i2_BA98 = newTempV128();
3805 IRTemp i2_7654 = newTempV128(); IRTemp i2_3210 = newTempV128();
3806 IRTemp i1_FEDC = newTempV128(); IRTemp i1_BA98 = newTempV128();
3807 IRTemp i1_7654 = newTempV128(); IRTemp i1_3210 = newTempV128();
3808 IRTemp i0_FEDC = newTempV128(); IRTemp i0_BA98 = newTempV128();
3809 IRTemp i0_7654 = newTempV128(); IRTemp i0_3210 = newTempV128();
3810 IRTemp i2_hi64 = newTempV128(); IRTemp i2_lo64 = newTempV128();
3811 IRTemp i1_hi64 = newTempV128(); IRTemp i1_lo64 = newTempV128();
3812 IRTemp i0_hi64 = newTempV128(); IRTemp i0_lo64 = newTempV128();
3814 // eg XXXX(qqq, CC, 0xF, BB, 0xA)) sets qqq to be a vector
3815 // of the form 14 bytes junk : CC[0xF] : BB[0xA]
3817 # define XXXX(_tempName,_srcVec1,_srcShift1,_srcVec2,_srcShift2) \
3818 IRTemp t_##_tempName = newTempV128(); \
3819 assign(t_##_tempName, \
3820 ILO8x16( ROR(EX(_srcVec1),(_srcShift1)), \
3821 ROR(EX(_srcVec2),(_srcShift2)) ) )
3823 // Let CC, BB, AA be (handy) aliases of u2, u1, u0 respectively
3824 IRTemp CC = u2; IRTemp BB = u1; IRTemp AA = u0;
3826 // The slicing and reassembly are done as interleavedly as possible,
3827 // so as to minimise the demand for registers in the back end, which
3828 // was observed to be a problem in testing.
3830 XXXX(CfBf, CC, 0xf, BB, 0xf); // i2[15:14]
3831 XXXX(AfCe, AA, 0xf, CC, 0xe);
3832 assign(i2_FEDC, ILO16x8(EX(t_CfBf), EX(t_AfCe)));
3834 XXXX(BeAe, BB, 0xe, AA, 0xe);
3835 XXXX(CdBd, CC, 0xd, BB, 0xd);
3836 assign(i2_BA98, ILO16x8(EX(t_BeAe), EX(t_CdBd)));
3837 assign(i2_hi64, ILO32x4(EX(i2_FEDC), EX(i2_BA98)));
3839 XXXX(AdCc, AA, 0xd, CC, 0xc);
3840 XXXX(BcAc, BB, 0xc, AA, 0xc);
3841 assign(i2_7654, ILO16x8(EX(t_AdCc), EX(t_BcAc)));
3843 XXXX(CbBb, CC, 0xb, BB, 0xb);
3844 XXXX(AbCa, AA, 0xb, CC, 0xa); // i2[1:0]
3845 assign(i2_3210, ILO16x8(EX(t_CbBb), EX(t_AbCa)));
3846 assign(i2_lo64, ILO32x4(EX(i2_7654), EX(i2_3210)));
3847 assign(*i2, ILO64x2(EX(i2_hi64), EX(i2_lo64)));
3849 XXXX(BaAa, BB, 0xa, AA, 0xa); // i1[15:14]
3850 XXXX(C9B9, CC, 0x9, BB, 0x9);
3851 assign(i1_FEDC, ILO16x8(EX(t_BaAa), EX(t_C9B9)));
3853 XXXX(A9C8, AA, 0x9, CC, 0x8);
3854 XXXX(B8A8, BB, 0x8, AA, 0x8);
3855 assign(i1_BA98, ILO16x8(EX(t_A9C8), EX(t_B8A8)));
3856 assign(i1_hi64, ILO32x4(EX(i1_FEDC), EX(i1_BA98)));
3858 XXXX(C7B7, CC, 0x7, BB, 0x7);
3859 XXXX(A7C6, AA, 0x7, CC, 0x6);
3860 assign(i1_7654, ILO16x8(EX(t_C7B7), EX(t_A7C6)));
3862 XXXX(B6A6, BB, 0x6, AA, 0x6);
3863 XXXX(C5B5, CC, 0x5, BB, 0x5); // i1[1:0]
3864 assign(i1_3210, ILO16x8(EX(t_B6A6), EX(t_C5B5)));
3865 assign(i1_lo64, ILO32x4(EX(i1_7654), EX(i1_3210)));
3866 assign(*i1, ILO64x2(EX(i1_hi64), EX(i1_lo64)));
3868 XXXX(A5C4, AA, 0x5, CC, 0x4); // i0[15:14]
3869 XXXX(B4A4, BB, 0x4, AA, 0x4);
3870 assign(i0_FEDC, ILO16x8(EX(t_A5C4), EX(t_B4A4)));
3872 XXXX(C3B3, CC, 0x3, BB, 0x3);
3873 XXXX(A3C2, AA, 0x3, CC, 0x2);
3874 assign(i0_BA98, ILO16x8(EX(t_C3B3), EX(t_A3C2)));
3875 assign(i0_hi64, ILO32x4(EX(i0_FEDC), EX(i0_BA98)));
3877 XXXX(B2A2, BB, 0x2, AA, 0x2);
3878 XXXX(C1B1, CC, 0x1, BB, 0x1);
3879 assign(i0_7654, ILO16x8(EX(t_B2A2), EX(t_C1B1)));
3881 XXXX(A1C0, AA, 0x1, CC, 0x0);
3882 XXXX(B0A0, BB, 0x0, AA, 0x0); // i0[1:0]
3883 assign(i0_3210, ILO16x8(EX(t_A1C0), EX(t_B0A0)));
3884 assign(i0_lo64, ILO32x4(EX(i0_7654), EX(i0_3210)));
3885 assign(*i0, ILO64x2(EX(i0_hi64), EX(i0_lo64)));
3887 # undef XXXX
3888 return;
3891 /*NOTREACHED*/
3892 vassert(0);
3896 /* Do interleaving for 4 128 bit vectors, for ST4 insns. */
3897 static
3898 void math_INTERLEAVE4_128(
3899 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
3900 UInt laneSzBlg2,
3901 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
3903 if (laneSzBlg2 == 3) {
3904 // 64x2
3905 assign(*i0, ILO64x2(EX(u1), EX(u0)));
3906 assign(*i1, ILO64x2(EX(u3), EX(u2)));
3907 assign(*i2, IHI64x2(EX(u1), EX(u0)));
3908 assign(*i3, IHI64x2(EX(u3), EX(u2)));
3909 return;
3911 if (laneSzBlg2 == 2) {
3912 // 32x4
3913 // First, interleave at the 64-bit lane size.
3914 IRTemp p0 = newTempV128();
3915 IRTemp p1 = newTempV128();
3916 IRTemp p2 = newTempV128();
3917 IRTemp p3 = newTempV128();
3918 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 3, u0, u1, u2, u3);
3919 // And interleave (cat) at the 32 bit size.
3920 assign(*i0, CEV32x4(EX(p1), EX(p0)));
3921 assign(*i1, COD32x4(EX(p1), EX(p0)));
3922 assign(*i2, CEV32x4(EX(p3), EX(p2)));
3923 assign(*i3, COD32x4(EX(p3), EX(p2)));
3924 return;
3926 if (laneSzBlg2 == 1) {
3927 // 16x8
3928 // First, interleave at the 32-bit lane size.
3929 IRTemp p0 = newTempV128();
3930 IRTemp p1 = newTempV128();
3931 IRTemp p2 = newTempV128();
3932 IRTemp p3 = newTempV128();
3933 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 2, u0, u1, u2, u3);
3934 // And rearrange within each vector, to get the right 16 bit lanes.
3935 assign(*i0, COD16x8(EX(p0), SHL(EX(p0), 2)));
3936 assign(*i1, COD16x8(EX(p1), SHL(EX(p1), 2)));
3937 assign(*i2, COD16x8(EX(p2), SHL(EX(p2), 2)));
3938 assign(*i3, COD16x8(EX(p3), SHL(EX(p3), 2)));
3939 return;
3941 if (laneSzBlg2 == 0) {
3942 // 8x16
3943 // First, interleave at the 16-bit lane size.
3944 IRTemp p0 = newTempV128();
3945 IRTemp p1 = newTempV128();
3946 IRTemp p2 = newTempV128();
3947 IRTemp p3 = newTempV128();
3948 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 1, u0, u1, u2, u3);
3949 // And rearrange within each vector, to get the right 8 bit lanes.
3950 assign(*i0, IHI32x4(COD8x16(EX(p0),EX(p0)), CEV8x16(EX(p0),EX(p0))));
3951 assign(*i1, IHI32x4(COD8x16(EX(p1),EX(p1)), CEV8x16(EX(p1),EX(p1))));
3952 assign(*i2, IHI32x4(COD8x16(EX(p2),EX(p2)), CEV8x16(EX(p2),EX(p2))));
3953 assign(*i3, IHI32x4(COD8x16(EX(p3),EX(p3)), CEV8x16(EX(p3),EX(p3))));
3954 return;
3956 /*NOTREACHED*/
3957 vassert(0);
3961 /* Do deinterleaving for 1 128 bit vector, for LD1 insns. */
3962 static
3963 void math_DEINTERLEAVE1_128( /*OUTx1*/ IRTemp* u0,
3964 UInt laneSzBlg2, IRTemp i0 )
3966 assign(*u0, mkexpr(i0));
3970 /* Do deinterleaving for 2 128 bit vectors, for LD2 insns. */
3971 static
3972 void math_DEINTERLEAVE2_128( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
3973 UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
3975 /* This is pretty easy, since we have primitives directly to
3976 hand. */
3977 if (laneSzBlg2 == 3) {
3978 // 64x2
3979 // i1 == B1 A1, i0 == B0 A0
3980 // u1 == B1 B0, u0 == A1 A0
3981 assign(*u0, binop(Iop_InterleaveLO64x2, mkexpr(i1), mkexpr(i0)));
3982 assign(*u1, binop(Iop_InterleaveHI64x2, mkexpr(i1), mkexpr(i0)));
3983 return;
3985 if (laneSzBlg2 == 2) {
3986 // 32x4
3987 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3988 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3989 assign(*u0, binop(Iop_CatEvenLanes32x4, mkexpr(i1), mkexpr(i0)));
3990 assign(*u1, binop(Iop_CatOddLanes32x4, mkexpr(i1), mkexpr(i0)));
3991 return;
3993 if (laneSzBlg2 == 1) {
3994 // 16x8
3995 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3996 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3997 // u1 == B{7..0}, u0 == A{7..0}
3998 assign(*u0, binop(Iop_CatEvenLanes16x8, mkexpr(i1), mkexpr(i0)));
3999 assign(*u1, binop(Iop_CatOddLanes16x8, mkexpr(i1), mkexpr(i0)));
4000 return;
4002 if (laneSzBlg2 == 0) {
4003 // 8x16
4004 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
4005 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
4006 // u1 == B{f..0}, u0 == A{f..0}
4007 assign(*u0, binop(Iop_CatEvenLanes8x16, mkexpr(i1), mkexpr(i0)));
4008 assign(*u1, binop(Iop_CatOddLanes8x16, mkexpr(i1), mkexpr(i0)));
4009 return;
4011 /*NOTREACHED*/
4012 vassert(0);
4016 /* Do deinterleaving for 3 128 bit vectors, for LD3 insns. */
4017 static
4018 void math_DEINTERLEAVE3_128(
4019 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
4020 UInt laneSzBlg2,
4021 IRTemp i0, IRTemp i1, IRTemp i2 )
4023 if (laneSzBlg2 == 3) {
4024 // 64x2
4025 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
4026 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
4027 assign(*u2, ILO64x2( ROL(EX(i2),8), EX(i1) ));
4028 assign(*u1, ILO64x2( EX(i2), ROL(EX(i0),8) ));
4029 assign(*u0, ILO64x2( ROL(EX(i1),8), EX(i0) ));
4030 return;
4033 if (laneSzBlg2 == 2) {
4034 // 32x4
4035 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
4036 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
4037 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
4038 IRTemp t_a1c0b0a0 = newTempV128();
4039 IRTemp t_a2c1b1a1 = newTempV128();
4040 IRTemp t_a3c2b2a2 = newTempV128();
4041 IRTemp t_a0c3b3a3 = newTempV128();
4042 IRTemp p0 = newTempV128();
4043 IRTemp p1 = newTempV128();
4044 IRTemp p2 = newTempV128();
4045 // Compute some intermediate values.
4046 assign(t_a1c0b0a0, EX(i0));
4047 assign(t_a2c1b1a1, SL(EX(i1),EX(i0),3*4));
4048 assign(t_a3c2b2a2, SL(EX(i2),EX(i1),2*4));
4049 assign(t_a0c3b3a3, SL(EX(i0),EX(i2),1*4));
4050 // First deinterleave into lane-pairs
4051 assign(p0, ILO32x4(EX(t_a2c1b1a1),EX(t_a1c0b0a0)));
4052 assign(p1, ILO64x2(ILO32x4(EX(t_a0c3b3a3), EX(t_a3c2b2a2)),
4053 IHI32x4(EX(t_a2c1b1a1), EX(t_a1c0b0a0))));
4054 assign(p2, ILO32x4(ROR(EX(t_a0c3b3a3),1*4), ROR(EX(t_a3c2b2a2),1*4)));
4055 // Then deinterleave at 64x2 granularity.
4056 math_DEINTERLEAVE3_128(u0, u1, u2, 3, p0, p1, p2);
4057 return;
4060 if (laneSzBlg2 == 1) {
4061 // 16x8
4062 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
4063 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
4064 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
4066 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
4067 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
4068 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
4070 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
4071 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
4072 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
4074 IRTemp s0, s1, s2, s3, t0, t1, t2, t3, p0, p1, p2, c00111111;
4075 s0 = s1 = s2 = s3
4076 = t0 = t1 = t2 = t3 = p0 = p1 = p2 = c00111111 = IRTemp_INVALID;
4077 newTempsV128_4(&s0, &s1, &s2, &s3);
4078 newTempsV128_4(&t0, &t1, &t2, &t3);
4079 newTempsV128_4(&p0, &p1, &p2, &c00111111);
4081 // s0 == b2a2 c1b1a1 c0b0a0
4082 // s1 == b4a4 c3b3c3 c2b2a2
4083 // s2 == b6a6 c5b5a5 c4b4a4
4084 // s3 == b0a0 c7b7a7 c6b6a6
4085 assign(s0, EX(i0));
4086 assign(s1, SL(EX(i1),EX(i0),6*2));
4087 assign(s2, SL(EX(i2),EX(i1),4*2));
4088 assign(s3, SL(EX(i0),EX(i2),2*2));
4090 // t0 == 0 0 c1c0 b1b0 a1a0
4091 // t1 == 0 0 c3c2 b3b2 a3a2
4092 // t2 == 0 0 c5c4 b5b4 a5a4
4093 // t3 == 0 0 c7c6 b7b6 a7a6
4094 assign(c00111111, mkV128(0x0FFF));
4095 assign(t0, AND( ILO16x8( ROR(EX(s0),3*2), EX(s0)), EX(c00111111)));
4096 assign(t1, AND( ILO16x8( ROR(EX(s1),3*2), EX(s1)), EX(c00111111)));
4097 assign(t2, AND( ILO16x8( ROR(EX(s2),3*2), EX(s2)), EX(c00111111)));
4098 assign(t3, AND( ILO16x8( ROR(EX(s3),3*2), EX(s3)), EX(c00111111)));
4100 assign(p0, OR2(EX(t0), SHL(EX(t1),6*2)));
4101 assign(p1, OR2(SHL(EX(t2),4*2), SHR(EX(t1),2*2)));
4102 assign(p2, OR2(SHL(EX(t3),2*2), SHR(EX(t2),4*2)));
4104 // Then deinterleave at 32x4 granularity.
4105 math_DEINTERLEAVE3_128(u0, u1, u2, 2, p0, p1, p2);
4106 return;
4109 if (laneSzBlg2 == 0) {
4110 // 8x16. This is the same scheme as for 16x8, with twice the
4111 // number of intermediate values.
4113 // u2 == C{f..0}
4114 // u1 == B{f..0}
4115 // u0 == A{f..0}
4117 // i2 == CBA{f} CBA{e} CBA{d} CBA{c} CBA{b} C{a}
4118 // i1 == BA{a} CBA{9} CBA{8} CBA{7} CBA{6} CB{5}
4119 // i0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4121 // p2 == C{fe} B{fe} A{fe} C{dc} B{dc} A{dc} C{ba} B{ba}
4122 // p1 == A{ba} C{98} B{98} A{98} C{76} B{76} A{76} C{54}
4123 // p0 == B{54} A{54} C{32} B{32} A{32} C{10} B{10} A{10}
4125 IRTemp s0, s1, s2, s3, s4, s5, s6, s7,
4126 t0, t1, t2, t3, t4, t5, t6, t7, p0, p1, p2, cMASK;
4127 s0 = s1 = s2 = s3 = s4 = s5 = s6 = s7
4128 = t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = p0 = p1 = p2 = cMASK
4129 = IRTemp_INVALID;
4130 newTempsV128_4(&s0, &s1, &s2, &s3);
4131 newTempsV128_4(&s4, &s5, &s6, &s7);
4132 newTempsV128_4(&t0, &t1, &t2, &t3);
4133 newTempsV128_4(&t4, &t5, &t6, &t7);
4134 newTempsV128_4(&p0, &p1, &p2, &cMASK);
4136 // s0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4137 // s1 == A{7} CBA{6} CBA{5} CBA{4} CBA{3} CBA{2}
4138 // s2 == A{9} CBA{8} CBA{7} CBA{6} CBA{5} CBA{4}
4139 // s3 == A{b} CBA{a} CBA{9} CBA{8} CBA{7} CBA{6}
4140 // s4 == A{d} CBA{c} CBA{b} CBA{a} CBA{9} CBA{8}
4141 // s5 == A{f} CBA{e} CBA{d} CBA{c} CBA{b} CBA{a}
4142 // s6 == A{1} CBA{0} CBA{f} CBA{e} CBA{d} CBA{c}
4143 // s7 == A{3} CBA{2} CBA{1} CBA{0} CBA{f} CBA{e}
4144 assign(s0, SL(EX(i1),EX(i0), 0));
4145 assign(s1, SL(EX(i1),EX(i0), 6));
4146 assign(s2, SL(EX(i1),EX(i0),12));
4147 assign(s3, SL(EX(i2),EX(i1), 2));
4148 assign(s4, SL(EX(i2),EX(i1), 8));
4149 assign(s5, SL(EX(i2),EX(i1),14));
4150 assign(s6, SL(EX(i0),EX(i2), 4));
4151 assign(s7, SL(EX(i0),EX(i2),10));
4153 // t0 == 0--(ten)--0 C1 C0 B1 B0 A1 A0
4154 // t1 == 0--(ten)--0 C3 C2 B3 B2 A3 A2
4155 // t2 == 0--(ten)--0 C5 C4 B5 B4 A5 A4
4156 // t3 == 0--(ten)--0 C7 C6 B7 B6 A7 A6
4157 // t4 == 0--(ten)--0 C9 C8 B9 B8 A9 A8
4158 // t5 == 0--(ten)--0 Cb Ca Bb Ba Ab Aa
4159 // t6 == 0--(ten)--0 Cd Cc Bd Bc Ad Ac
4160 // t7 == 0--(ten)--0 Cf Ce Bf Be Af Ae
4161 assign(cMASK, mkV128(0x003F));
4162 assign(t0, AND( ILO8x16( ROR(EX(s0),3), EX(s0)), EX(cMASK)));
4163 assign(t1, AND( ILO8x16( ROR(EX(s1),3), EX(s1)), EX(cMASK)));
4164 assign(t2, AND( ILO8x16( ROR(EX(s2),3), EX(s2)), EX(cMASK)));
4165 assign(t3, AND( ILO8x16( ROR(EX(s3),3), EX(s3)), EX(cMASK)));
4166 assign(t4, AND( ILO8x16( ROR(EX(s4),3), EX(s4)), EX(cMASK)));
4167 assign(t5, AND( ILO8x16( ROR(EX(s5),3), EX(s5)), EX(cMASK)));
4168 assign(t6, AND( ILO8x16( ROR(EX(s6),3), EX(s6)), EX(cMASK)));
4169 assign(t7, AND( ILO8x16( ROR(EX(s7),3), EX(s7)), EX(cMASK)));
4171 assign(p0, OR3( SHL(EX(t2),12), SHL(EX(t1),6), EX(t0) ));
4172 assign(p1, OR4( SHL(EX(t5),14), SHL(EX(t4),8),
4173 SHL(EX(t3),2), SHR(EX(t2),4) ));
4174 assign(p2, OR3( SHL(EX(t7),10), SHL(EX(t6),4), SHR(EX(t5),2) ));
4176 // Then deinterleave at 16x8 granularity.
4177 math_DEINTERLEAVE3_128(u0, u1, u2, 1, p0, p1, p2);
4178 return;
4181 /*NOTREACHED*/
4182 vassert(0);
4186 /* Do deinterleaving for 4 128 bit vectors, for LD4 insns. */
4187 static
4188 void math_DEINTERLEAVE4_128(
4189 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4190 UInt laneSzBlg2,
4191 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4193 if (laneSzBlg2 == 3) {
4194 // 64x2
4195 assign(*u0, ILO64x2(EX(i2), EX(i0)));
4196 assign(*u1, IHI64x2(EX(i2), EX(i0)));
4197 assign(*u2, ILO64x2(EX(i3), EX(i1)));
4198 assign(*u3, IHI64x2(EX(i3), EX(i1)));
4199 return;
4201 if (laneSzBlg2 == 2) {
4202 // 32x4
4203 IRTemp p0 = newTempV128();
4204 IRTemp p2 = newTempV128();
4205 IRTemp p1 = newTempV128();
4206 IRTemp p3 = newTempV128();
4207 assign(p0, ILO32x4(EX(i1), EX(i0)));
4208 assign(p1, IHI32x4(EX(i1), EX(i0)));
4209 assign(p2, ILO32x4(EX(i3), EX(i2)));
4210 assign(p3, IHI32x4(EX(i3), EX(i2)));
4211 // And now do what we did for the 64-bit case.
4212 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 3, p0, p1, p2, p3);
4213 return;
4215 if (laneSzBlg2 == 1) {
4216 // 16x8
4217 // Deinterleave into 32-bit chunks, then do as the 32-bit case.
4218 IRTemp p0 = newTempV128();
4219 IRTemp p1 = newTempV128();
4220 IRTemp p2 = newTempV128();
4221 IRTemp p3 = newTempV128();
4222 assign(p0, IHI16x8(EX(i0), SHL(EX(i0), 8)));
4223 assign(p1, IHI16x8(EX(i1), SHL(EX(i1), 8)));
4224 assign(p2, IHI16x8(EX(i2), SHL(EX(i2), 8)));
4225 assign(p3, IHI16x8(EX(i3), SHL(EX(i3), 8)));
4226 // From here on is like the 32 bit case.
4227 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 2, p0, p1, p2, p3);
4228 return;
4230 if (laneSzBlg2 == 0) {
4231 // 8x16
4232 // Deinterleave into 16-bit chunks, then do as the 16-bit case.
4233 IRTemp p0 = newTempV128();
4234 IRTemp p1 = newTempV128();
4235 IRTemp p2 = newTempV128();
4236 IRTemp p3 = newTempV128();
4237 assign(p0, IHI64x2( IHI8x16(EX(i0),ROL(EX(i0),4)),
4238 ILO8x16(EX(i0),ROL(EX(i0),4)) ));
4239 assign(p1, IHI64x2( IHI8x16(EX(i1),ROL(EX(i1),4)),
4240 ILO8x16(EX(i1),ROL(EX(i1),4)) ));
4241 assign(p2, IHI64x2( IHI8x16(EX(i2),ROL(EX(i2),4)),
4242 ILO8x16(EX(i2),ROL(EX(i2),4)) ));
4243 assign(p3, IHI64x2( IHI8x16(EX(i3),ROL(EX(i3),4)),
4244 ILO8x16(EX(i3),ROL(EX(i3),4)) ));
4245 // From here on is like the 16 bit case.
4246 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 1, p0, p1, p2, p3);
4247 return;
4249 /*NOTREACHED*/
4250 vassert(0);
4254 /* Wrappers that use the full-width (de)interleavers to do half-width
4255 (de)interleaving. The scheme is to clone each input lane in the
4256 lower half of each incoming value, do a full width (de)interleave
4257 at the next lane size up, and remove every other lane of the the
4258 result. The returned values may have any old junk in the upper
4259 64 bits -- the caller must ignore that. */
4261 /* Helper function -- get doubling and narrowing operations. */
4262 static
4263 void math_get_doubler_and_halver ( /*OUT*/IROp* doubler,
4264 /*OUT*/IROp* halver,
4265 UInt laneSzBlg2 )
4267 switch (laneSzBlg2) {
4268 case 2:
4269 *doubler = Iop_InterleaveLO32x4; *halver = Iop_CatEvenLanes32x4;
4270 break;
4271 case 1:
4272 *doubler = Iop_InterleaveLO16x8; *halver = Iop_CatEvenLanes16x8;
4273 break;
4274 case 0:
4275 *doubler = Iop_InterleaveLO8x16; *halver = Iop_CatEvenLanes8x16;
4276 break;
4277 default:
4278 vassert(0);
4282 /* Do interleaving for 1 64 bit vector, for ST1 insns. */
4283 static
4284 void math_INTERLEAVE1_64( /*OUTx1*/ IRTemp* i0,
4285 UInt laneSzBlg2, IRTemp u0 )
4287 assign(*i0, mkexpr(u0));
4291 /* Do interleaving for 2 64 bit vectors, for ST2 insns. */
4292 static
4293 void math_INTERLEAVE2_64( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
4294 UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
4296 if (laneSzBlg2 == 3) {
4297 // 1x64, degenerate case
4298 assign(*i0, EX(u0));
4299 assign(*i1, EX(u1));
4300 return;
4303 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4304 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4305 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4307 IRTemp du0 = newTempV128();
4308 IRTemp du1 = newTempV128();
4309 assign(du0, binop(doubler, EX(u0), EX(u0)));
4310 assign(du1, binop(doubler, EX(u1), EX(u1)));
4311 IRTemp di0 = newTempV128();
4312 IRTemp di1 = newTempV128();
4313 math_INTERLEAVE2_128(&di0, &di1, laneSzBlg2 + 1, du0, du1);
4314 assign(*i0, binop(halver, EX(di0), EX(di0)));
4315 assign(*i1, binop(halver, EX(di1), EX(di1)));
4319 /* Do interleaving for 3 64 bit vectors, for ST3 insns. */
4320 static
4321 void math_INTERLEAVE3_64(
4322 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
4323 UInt laneSzBlg2,
4324 IRTemp u0, IRTemp u1, IRTemp u2 )
4326 if (laneSzBlg2 == 3) {
4327 // 1x64, degenerate case
4328 assign(*i0, EX(u0));
4329 assign(*i1, EX(u1));
4330 assign(*i2, EX(u2));
4331 return;
4334 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4335 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4336 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4338 IRTemp du0 = newTempV128();
4339 IRTemp du1 = newTempV128();
4340 IRTemp du2 = newTempV128();
4341 assign(du0, binop(doubler, EX(u0), EX(u0)));
4342 assign(du1, binop(doubler, EX(u1), EX(u1)));
4343 assign(du2, binop(doubler, EX(u2), EX(u2)));
4344 IRTemp di0 = newTempV128();
4345 IRTemp di1 = newTempV128();
4346 IRTemp di2 = newTempV128();
4347 math_INTERLEAVE3_128(&di0, &di1, &di2, laneSzBlg2 + 1, du0, du1, du2);
4348 assign(*i0, binop(halver, EX(di0), EX(di0)));
4349 assign(*i1, binop(halver, EX(di1), EX(di1)));
4350 assign(*i2, binop(halver, EX(di2), EX(di2)));
4354 /* Do interleaving for 4 64 bit vectors, for ST4 insns. */
4355 static
4356 void math_INTERLEAVE4_64(
4357 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
4358 UInt laneSzBlg2,
4359 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
4361 if (laneSzBlg2 == 3) {
4362 // 1x64, degenerate case
4363 assign(*i0, EX(u0));
4364 assign(*i1, EX(u1));
4365 assign(*i2, EX(u2));
4366 assign(*i3, EX(u3));
4367 return;
4370 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4371 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4372 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4374 IRTemp du0 = newTempV128();
4375 IRTemp du1 = newTempV128();
4376 IRTemp du2 = newTempV128();
4377 IRTemp du3 = newTempV128();
4378 assign(du0, binop(doubler, EX(u0), EX(u0)));
4379 assign(du1, binop(doubler, EX(u1), EX(u1)));
4380 assign(du2, binop(doubler, EX(u2), EX(u2)));
4381 assign(du3, binop(doubler, EX(u3), EX(u3)));
4382 IRTemp di0 = newTempV128();
4383 IRTemp di1 = newTempV128();
4384 IRTemp di2 = newTempV128();
4385 IRTemp di3 = newTempV128();
4386 math_INTERLEAVE4_128(&di0, &di1, &di2, &di3,
4387 laneSzBlg2 + 1, du0, du1, du2, du3);
4388 assign(*i0, binop(halver, EX(di0), EX(di0)));
4389 assign(*i1, binop(halver, EX(di1), EX(di1)));
4390 assign(*i2, binop(halver, EX(di2), EX(di2)));
4391 assign(*i3, binop(halver, EX(di3), EX(di3)));
4395 /* Do deinterleaving for 1 64 bit vector, for LD1 insns. */
4396 static
4397 void math_DEINTERLEAVE1_64( /*OUTx1*/ IRTemp* u0,
4398 UInt laneSzBlg2, IRTemp i0 )
4400 assign(*u0, mkexpr(i0));
4404 /* Do deinterleaving for 2 64 bit vectors, for LD2 insns. */
4405 static
4406 void math_DEINTERLEAVE2_64( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
4407 UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
4409 if (laneSzBlg2 == 3) {
4410 // 1x64, degenerate case
4411 assign(*u0, EX(i0));
4412 assign(*u1, EX(i1));
4413 return;
4416 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4417 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4418 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4420 IRTemp di0 = newTempV128();
4421 IRTemp di1 = newTempV128();
4422 assign(di0, binop(doubler, EX(i0), EX(i0)));
4423 assign(di1, binop(doubler, EX(i1), EX(i1)));
4425 IRTemp du0 = newTempV128();
4426 IRTemp du1 = newTempV128();
4427 math_DEINTERLEAVE2_128(&du0, &du1, laneSzBlg2 + 1, di0, di1);
4428 assign(*u0, binop(halver, EX(du0), EX(du0)));
4429 assign(*u1, binop(halver, EX(du1), EX(du1)));
4433 /* Do deinterleaving for 3 64 bit vectors, for LD3 insns. */
4434 static
4435 void math_DEINTERLEAVE3_64(
4436 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
4437 UInt laneSzBlg2,
4438 IRTemp i0, IRTemp i1, IRTemp i2 )
4440 if (laneSzBlg2 == 3) {
4441 // 1x64, degenerate case
4442 assign(*u0, EX(i0));
4443 assign(*u1, EX(i1));
4444 assign(*u2, EX(i2));
4445 return;
4448 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4449 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4450 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4452 IRTemp di0 = newTempV128();
4453 IRTemp di1 = newTempV128();
4454 IRTemp di2 = newTempV128();
4455 assign(di0, binop(doubler, EX(i0), EX(i0)));
4456 assign(di1, binop(doubler, EX(i1), EX(i1)));
4457 assign(di2, binop(doubler, EX(i2), EX(i2)));
4458 IRTemp du0 = newTempV128();
4459 IRTemp du1 = newTempV128();
4460 IRTemp du2 = newTempV128();
4461 math_DEINTERLEAVE3_128(&du0, &du1, &du2, laneSzBlg2 + 1, di0, di1, di2);
4462 assign(*u0, binop(halver, EX(du0), EX(du0)));
4463 assign(*u1, binop(halver, EX(du1), EX(du1)));
4464 assign(*u2, binop(halver, EX(du2), EX(du2)));
4468 /* Do deinterleaving for 4 64 bit vectors, for LD4 insns. */
4469 static
4470 void math_DEINTERLEAVE4_64(
4471 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4472 UInt laneSzBlg2,
4473 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4475 if (laneSzBlg2 == 3) {
4476 // 1x64, degenerate case
4477 assign(*u0, EX(i0));
4478 assign(*u1, EX(i1));
4479 assign(*u2, EX(i2));
4480 assign(*u3, EX(i3));
4481 return;
4484 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4485 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4486 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4488 IRTemp di0 = newTempV128();
4489 IRTemp di1 = newTempV128();
4490 IRTemp di2 = newTempV128();
4491 IRTemp di3 = newTempV128();
4492 assign(di0, binop(doubler, EX(i0), EX(i0)));
4493 assign(di1, binop(doubler, EX(i1), EX(i1)));
4494 assign(di2, binop(doubler, EX(i2), EX(i2)));
4495 assign(di3, binop(doubler, EX(i3), EX(i3)));
4496 IRTemp du0 = newTempV128();
4497 IRTemp du1 = newTempV128();
4498 IRTemp du2 = newTempV128();
4499 IRTemp du3 = newTempV128();
4500 math_DEINTERLEAVE4_128(&du0, &du1, &du2, &du3,
4501 laneSzBlg2 + 1, di0, di1, di2, di3);
4502 assign(*u0, binop(halver, EX(du0), EX(du0)));
4503 assign(*u1, binop(halver, EX(du1), EX(du1)));
4504 assign(*u2, binop(halver, EX(du2), EX(du2)));
4505 assign(*u3, binop(halver, EX(du3), EX(du3)));
4509 #undef EX
4510 #undef SL
4511 #undef ROR
4512 #undef ROL
4513 #undef SHR
4514 #undef SHL
4515 #undef ILO64x2
4516 #undef IHI64x2
4517 #undef ILO32x4
4518 #undef IHI32x4
4519 #undef ILO16x8
4520 #undef IHI16x8
4521 #undef ILO16x8
4522 #undef IHI16x8
4523 #undef CEV32x4
4524 #undef COD32x4
4525 #undef COD16x8
4526 #undef COD8x16
4527 #undef CEV8x16
4528 #undef AND
4529 #undef OR2
4530 #undef OR3
4531 #undef OR4
4534 /*------------------------------------------------------------*/
4535 /*--- Load and Store instructions ---*/
4536 /*------------------------------------------------------------*/
4538 /* Generate the EA for a "reg + reg" style amode. This is done from
4539 parts of the insn, but for sanity checking sake it takes the whole
4540 insn. This appears to depend on insn[15:12], with opt=insn[15:13]
4541 and S=insn[12]:
4543 The possible forms, along with their opt:S values, are:
4544 011:0 Xn|SP + Xm
4545 111:0 Xn|SP + Xm
4546 011:1 Xn|SP + Xm * transfer_szB
4547 111:1 Xn|SP + Xm * transfer_szB
4548 010:0 Xn|SP + 32Uto64(Wm)
4549 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB
4550 110:0 Xn|SP + 32Sto64(Wm)
4551 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB
4553 Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of
4554 the transfer size is insn[23,31,30]. For integer loads/stores,
4555 insn[23] is zero, hence szLg2 can be at most 3 in such cases.
4557 If the decoding fails, it returns IRTemp_INVALID.
4559 isInt is True iff this is decoding is for transfers to/from integer
4560 registers. If False it is for transfers to/from vector registers.
4562 static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt )
4564 UInt optS = SLICE_UInt(insn, 15, 12);
4565 UInt mm = SLICE_UInt(insn, 20, 16);
4566 UInt nn = SLICE_UInt(insn, 9, 5);
4567 UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
4568 | SLICE_UInt(insn, 31, 30); // Log2 of the size
4570 buf[0] = 0;
4572 /* Sanity checks, that this really is a load/store insn. */
4573 if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
4574 goto fail;
4576 if (isInt
4577 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
4578 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
4579 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
4580 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
4581 goto fail;
4583 if (!isInt
4584 && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
4585 goto fail;
4587 /* Throw out non-verified but possibly valid cases. */
4588 switch (szLg2) {
4589 case BITS3(0,0,0): break; // 8 bit, valid for both int and vec
4590 case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
4591 case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
4592 case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
4593 case BITS3(1,0,0): // can only ever be valid for the vector case
4594 if (isInt) goto fail; else break;
4595 case BITS3(1,0,1): // these sizes are never valid
4596 case BITS3(1,1,0):
4597 case BITS3(1,1,1): goto fail;
4599 default: vassert(0);
4602 IRExpr* rhs = NULL;
4603 switch (optS) {
4604 case BITS4(1,1,1,0): goto fail; //ATC
4605 case BITS4(0,1,1,0):
4606 rhs = getIReg64orZR(mm);
4607 vex_sprintf(buf, "[%s, %s]",
4608 nameIReg64orZR(nn), nameIReg64orZR(mm));
4609 break;
4610 case BITS4(1,1,1,1): goto fail; //ATC
4611 case BITS4(0,1,1,1):
4612 rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
4613 vex_sprintf(buf, "[%s, %s lsl %u]",
4614 nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
4615 break;
4616 case BITS4(0,1,0,0):
4617 rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
4618 vex_sprintf(buf, "[%s, %s uxtx]",
4619 nameIReg64orZR(nn), nameIReg32orZR(mm));
4620 break;
4621 case BITS4(0,1,0,1):
4622 rhs = binop(Iop_Shl64,
4623 unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
4624 vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
4625 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4626 break;
4627 case BITS4(1,1,0,0):
4628 rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
4629 vex_sprintf(buf, "[%s, %s sxtx]",
4630 nameIReg64orZR(nn), nameIReg32orZR(mm));
4631 break;
4632 case BITS4(1,1,0,1):
4633 rhs = binop(Iop_Shl64,
4634 unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
4635 vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
4636 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4637 break;
4638 default:
4639 /* The rest appear to be genuinely invalid */
4640 goto fail;
4643 vassert(rhs);
4644 IRTemp res = newTemp(Ity_I64);
4645 assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
4646 return res;
4648 fail:
4649 vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
4650 return IRTemp_INVALID;
4654 /* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
4655 bits of DATAE :: Ity_I64. */
4656 static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
4658 IRExpr* addrE = mkexpr(addr);
4659 switch (szB) {
4660 case 8:
4661 storeLE(addrE, dataE);
4662 break;
4663 case 4:
4664 storeLE(addrE, unop(Iop_64to32, dataE));
4665 break;
4666 case 2:
4667 storeLE(addrE, unop(Iop_64to16, dataE));
4668 break;
4669 case 1:
4670 storeLE(addrE, unop(Iop_64to8, dataE));
4671 break;
4672 default:
4673 vassert(0);
4678 /* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
4679 placing the result in an Ity_I64 temporary. */
4680 static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
4682 IRTemp res = newTemp(Ity_I64);
4683 IRExpr* addrE = mkexpr(addr);
4684 switch (szB) {
4685 case 8:
4686 assign(res, loadLE(Ity_I64,addrE));
4687 break;
4688 case 4:
4689 assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
4690 break;
4691 case 2:
4692 assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
4693 break;
4694 case 1:
4695 assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
4696 break;
4697 default:
4698 vassert(0);
4700 return res;
4704 /* Generate a "standard 7" name, from bitQ and size. But also
4705 allow ".1d" since that's occasionally useful. */
4706 static
4707 const HChar* nameArr_Q_SZ ( UInt bitQ, UInt size )
4709 vassert(bitQ <= 1 && size <= 3);
4710 const HChar* nms[8]
4711 = { "8b", "4h", "2s", "1d", "16b", "8h", "4s", "2d" };
4712 UInt ix = (bitQ << 2) | size;
4713 vassert(ix < 8);
4714 return nms[ix];
4718 static
4719 Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
4720 const VexAbiInfo* abiinfo
4723 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
4725 /* ------------ LDR,STR (immediate, uimm12) ----------- */
4726 /* uimm12 is scaled by the transfer size
4728 31 29 26 21 9 4
4729 | | | | | |
4730 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8]
4731 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8]
4733 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4]
4734 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4]
4736 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2]
4737 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2]
4739 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1]
4740 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1]
4742 if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
4743 UInt szLg2 = INSN(31,30);
4744 UInt szB = 1 << szLg2;
4745 Bool isLD = INSN(22,22) == 1;
4746 UInt offs = INSN(21,10) * szB;
4747 UInt nn = INSN(9,5);
4748 UInt tt = INSN(4,0);
4749 IRTemp ta = newTemp(Ity_I64);
4750 assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
4751 if (nn == 31) { /* FIXME generate stack alignment check */ }
4752 vassert(szLg2 < 4);
4753 if (isLD) {
4754 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
4755 } else {
4756 gen_narrowing_store(szB, ta, getIReg64orZR(tt));
4758 const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
4759 const HChar* st_name[4] = { "strb", "strh", "str", "str" };
4760 DIP("%s %s, [%s, #%u]\n",
4761 (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
4762 nameIReg64orSP(nn), offs);
4763 return True;
4766 /* ------------ LDUR,STUR (immediate, simm9) ----------- */
4768 31 29 26 20 11 9 4
4769 | | | | | | |
4770 (at-Rn-then-Rn=EA) | | |
4771 sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9
4772 sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9
4774 (at-EA-then-Rn=EA)
4775 sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]!
4776 sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]!
4778 (at-EA)
4779 sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9]
4780 sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9]
4782 simm9 is unscaled.
4784 The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the
4785 load case this is because would create two competing values for
4786 Rt. In the store case the reason is unclear, but the spec
4787 disallows it anyway.
4789 Stores are narrowing, loads are unsigned widening. sz encodes
4790 the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
4792 if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
4793 == BITS9(1,1,1, 0,0,0,0,0, 0)) {
4794 UInt szLg2 = INSN(31,30);
4795 UInt szB = 1 << szLg2;
4796 Bool isLoad = INSN(22,22) == 1;
4797 UInt imm9 = INSN(20,12);
4798 UInt nn = INSN(9,5);
4799 UInt tt = INSN(4,0);
4800 Bool wBack = INSN(10,10) == 1;
4801 UInt how = INSN(11,10);
4802 if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) {
4803 /* undecodable; fall through */
4804 } else {
4805 if (nn == 31) { /* FIXME generate stack alignment check */ }
4807 // Compute the transfer address TA and the writeback address WA.
4808 IRTemp tRN = newTemp(Ity_I64);
4809 assign(tRN, getIReg64orSP(nn));
4810 IRTemp tEA = newTemp(Ity_I64);
4811 Long simm9 = (Long)sx_to_64(imm9, 9);
4812 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
4814 IRTemp tTA = newTemp(Ity_I64);
4815 IRTemp tWA = newTemp(Ity_I64);
4816 switch (how) {
4817 case BITS2(0,1):
4818 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4819 case BITS2(1,1):
4820 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4821 case BITS2(0,0):
4822 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4823 default:
4824 vassert(0); /* NOTREACHED */
4827 /* Normally rN would be updated after the transfer. However, in
4828 the special cases typifed by
4829 str x30, [sp,#-16]!
4830 str w1, [sp,#-32]!
4831 it is necessary to update SP before the transfer, (1)
4832 because Memcheck will otherwise complain about a write
4833 below the stack pointer, and (2) because the segfault
4834 stack extension mechanism will otherwise extend the stack
4835 only down to SP before the instruction, which might not be
4836 far enough, if the -16/-32 bit takes the actual access
4837 address to the next page.
4839 Bool earlyWBack
4840 = wBack && simm9 < 0 && (szB == 8 || szB == 4)
4841 && how == BITS2(1,1) && nn == 31 && !isLoad;
4843 if (wBack && earlyWBack)
4844 putIReg64orSP(nn, mkexpr(tEA));
4846 if (isLoad) {
4847 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
4848 } else {
4849 gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
4852 if (wBack && !earlyWBack)
4853 putIReg64orSP(nn, mkexpr(tEA));
4855 const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
4856 const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
4857 const HChar* fmt_str = NULL;
4858 switch (how) {
4859 case BITS2(0,1):
4860 fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4861 break;
4862 case BITS2(1,1):
4863 fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4864 break;
4865 case BITS2(0,0):
4866 fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
4867 break;
4868 default:
4869 vassert(0);
4871 DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
4872 nameIRegOrZR(szB == 8, tt),
4873 nameIReg64orSP(nn), simm9);
4874 return True;
4878 /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
4879 /* L==1 => mm==LD
4880 L==0 => mm==ST
4881 x==0 => 32 bit transfers, and zero extended loads
4882 x==1 => 64 bit transfers
4883 simm7 is scaled by the (single-register) transfer size
4885 (at-Rn-then-Rn=EA)
4886 x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm
4888 (at-EA-then-Rn=EA)
4889 x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]!
4891 (at-EA)
4892 x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]
4894 UInt insn_30_23 = INSN(30,23);
4895 if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
4896 || insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
4897 || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
4898 UInt bL = INSN(22,22);
4899 UInt bX = INSN(31,31);
4900 UInt bWBack = INSN(23,23);
4901 UInt rT1 = INSN(4,0);
4902 UInt rN = INSN(9,5);
4903 UInt rT2 = INSN(14,10);
4904 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
4905 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
4906 || (bL && rT1 == rT2)) {
4907 /* undecodable; fall through */
4908 } else {
4909 if (rN == 31) { /* FIXME generate stack alignment check */ }
4911 // Compute the transfer address TA and the writeback address WA.
4912 IRTemp tRN = newTemp(Ity_I64);
4913 assign(tRN, getIReg64orSP(rN));
4914 IRTemp tEA = newTemp(Ity_I64);
4915 simm7 = (bX ? 8 : 4) * simm7;
4916 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
4918 IRTemp tTA = newTemp(Ity_I64);
4919 IRTemp tWA = newTemp(Ity_I64);
4920 switch (INSN(24,23)) {
4921 case BITS2(0,1):
4922 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4923 case BITS2(1,1):
4924 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4925 case BITS2(1,0):
4926 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4927 default:
4928 vassert(0); /* NOTREACHED */
4931 /* Normally rN would be updated after the transfer. However, in
4932 the special case typifed by
4933 stp x29, x30, [sp,#-112]!
4934 it is necessary to update SP before the transfer, (1)
4935 because Memcheck will otherwise complain about a write
4936 below the stack pointer, and (2) because the segfault
4937 stack extension mechanism will otherwise extend the stack
4938 only down to SP before the instruction, which might not be
4939 far enough, if the -112 bit takes the actual access
4940 address to the next page.
4942 Bool earlyWBack
4943 = bWBack && simm7 < 0
4944 && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
4946 if (bWBack && earlyWBack)
4947 putIReg64orSP(rN, mkexpr(tEA));
4949 /**/ if (bL == 1 && bX == 1) {
4950 // 64 bit load
4951 putIReg64orZR(rT1, loadLE(Ity_I64,
4952 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
4953 putIReg64orZR(rT2, loadLE(Ity_I64,
4954 binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
4955 } else if (bL == 1 && bX == 0) {
4956 // 32 bit load
4957 putIReg32orZR(rT1, loadLE(Ity_I32,
4958 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
4959 putIReg32orZR(rT2, loadLE(Ity_I32,
4960 binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
4961 } else if (bL == 0 && bX == 1) {
4962 // 64 bit store
4963 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
4964 getIReg64orZR(rT1));
4965 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
4966 getIReg64orZR(rT2));
4967 } else {
4968 vassert(bL == 0 && bX == 0);
4969 // 32 bit store
4970 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
4971 getIReg32orZR(rT1));
4972 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
4973 getIReg32orZR(rT2));
4976 if (bWBack && !earlyWBack)
4977 putIReg64orSP(rN, mkexpr(tEA));
4979 const HChar* fmt_str = NULL;
4980 switch (INSN(24,23)) {
4981 case BITS2(0,1):
4982 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4983 break;
4984 case BITS2(1,1):
4985 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4986 break;
4987 case BITS2(1,0):
4988 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
4989 break;
4990 default:
4991 vassert(0);
4993 DIP(fmt_str, bL == 0 ? "st" : "ld",
4994 nameIRegOrZR(bX == 1, rT1),
4995 nameIRegOrZR(bX == 1, rT2),
4996 nameIReg64orSP(rN), simm7);
4997 return True;
5001 /* -------- LDPSW (immediate, simm7) (INT REGS) -------- */
5002 /* Does 32 bit transfers which are sign extended to 64 bits.
5003 simm7 is scaled by the (single-register) transfer size
5005 (at-Rn-then-Rn=EA)
5006 01 101 0001 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP], #imm
5008 (at-EA-then-Rn=EA)
5009 01 101 0011 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP, #imm]!
5011 (at-EA)
5012 01 101 0010 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP, #imm]
5014 UInt insn_31_22 = INSN(31,22);
5015 if (insn_31_22 == BITS10(0,1,1,0,1,0,0,0,1,1)
5016 || insn_31_22 == BITS10(0,1,1,0,1,0,0,1,1,1)
5017 || insn_31_22 == BITS10(0,1,1,0,1,0,0,1,0,1)) {
5018 UInt bWBack = INSN(23,23);
5019 UInt rT1 = INSN(4,0);
5020 UInt rN = INSN(9,5);
5021 UInt rT2 = INSN(14,10);
5022 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
5023 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
5024 || (rT1 == rT2)) {
5025 /* undecodable; fall through */
5026 } else {
5027 if (rN == 31) { /* FIXME generate stack alignment check */ }
5029 // Compute the transfer address TA and the writeback address WA.
5030 IRTemp tRN = newTemp(Ity_I64);
5031 assign(tRN, getIReg64orSP(rN));
5032 IRTemp tEA = newTemp(Ity_I64);
5033 simm7 = 4 * simm7;
5034 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
5036 IRTemp tTA = newTemp(Ity_I64);
5037 IRTemp tWA = newTemp(Ity_I64);
5038 switch (INSN(24,23)) {
5039 case BITS2(0,1):
5040 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
5041 case BITS2(1,1):
5042 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
5043 case BITS2(1,0):
5044 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
5045 default:
5046 vassert(0); /* NOTREACHED */
5049 // 32 bit load, sign extended to 64 bits
5050 putIReg64orZR(rT1, unop(Iop_32Sto64,
5051 loadLE(Ity_I32, binop(Iop_Add64,
5052 mkexpr(tTA),
5053 mkU64(0)))));
5054 putIReg64orZR(rT2, unop(Iop_32Sto64,
5055 loadLE(Ity_I32, binop(Iop_Add64,
5056 mkexpr(tTA),
5057 mkU64(4)))));
5058 if (bWBack)
5059 putIReg64orSP(rN, mkexpr(tEA));
5061 const HChar* fmt_str = NULL;
5062 switch (INSN(24,23)) {
5063 case BITS2(0,1):
5064 fmt_str = "ldpsw %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5065 break;
5066 case BITS2(1,1):
5067 fmt_str = "ldpsw %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5068 break;
5069 case BITS2(1,0):
5070 fmt_str = "ldpsw %s, %s, [%s, #%lld] (at-Rn)\n";
5071 break;
5072 default:
5073 vassert(0);
5075 DIP(fmt_str, nameIReg64orZR(rT1),
5076 nameIReg64orZR(rT2),
5077 nameIReg64orSP(rN), simm7);
5078 return True;
5082 /* ---------------- LDR (literal, int reg) ---------------- */
5083 /* 31 29 23 4
5084 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)]
5085 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)]
5086 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)]
5087 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)]
5088 Just handles the first two cases for now.
5090 if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
5091 UInt imm19 = INSN(23,5);
5092 UInt rT = INSN(4,0);
5093 UInt bX = INSN(30,30);
5094 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
5095 if (bX) {
5096 putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
5097 } else {
5098 putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
5100 DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
5101 return True;
5104 /* -------------- {LD,ST}R (integer register) --------------- */
5105 /* 31 29 20 15 12 11 9 4
5106 | | | | | | | |
5107 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R<m>{ext/sh}]
5108 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R<m>{ext/sh}]
5109 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R<m>{ext/sh}]
5110 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R<m>{ext/sh}]
5112 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R<m>{ext/sh}]
5113 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R<m>{ext/sh}]
5114 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R<m>{ext/sh}]
5115 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R<m>{ext/sh}]
5117 if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
5118 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5119 HChar dis_buf[64];
5120 UInt szLg2 = INSN(31,30);
5121 Bool isLD = INSN(22,22) == 1;
5122 UInt tt = INSN(4,0);
5123 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
5124 if (ea != IRTemp_INVALID) {
5125 switch (szLg2) {
5126 case 3: /* 64 bit */
5127 if (isLD) {
5128 putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
5129 DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
5130 } else {
5131 storeLE(mkexpr(ea), getIReg64orZR(tt));
5132 DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
5134 break;
5135 case 2: /* 32 bit */
5136 if (isLD) {
5137 putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
5138 DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
5139 } else {
5140 storeLE(mkexpr(ea), getIReg32orZR(tt));
5141 DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
5143 break;
5144 case 1: /* 16 bit */
5145 if (isLD) {
5146 putIReg64orZR(tt, unop(Iop_16Uto64,
5147 loadLE(Ity_I16, mkexpr(ea))));
5148 DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5149 } else {
5150 storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
5151 DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5153 break;
5154 case 0: /* 8 bit */
5155 if (isLD) {
5156 putIReg64orZR(tt, unop(Iop_8Uto64,
5157 loadLE(Ity_I8, mkexpr(ea))));
5158 DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
5159 } else {
5160 storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
5161 DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
5163 break;
5164 default:
5165 vassert(0);
5167 return True;
5171 /* -------------- LDRS{B,H,W} (uimm12) -------------- */
5172 /* 31 29 26 23 21 9 4
5173 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4]
5174 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2]
5175 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1]
5176 where
5177 Rt is Wt when x==1, Xt when x==0
5179 if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
5180 /* Further checks on bits 31:30 and 22 */
5181 Bool valid = False;
5182 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5183 case BITS3(1,0,0):
5184 case BITS3(0,1,0): case BITS3(0,1,1):
5185 case BITS3(0,0,0): case BITS3(0,0,1):
5186 valid = True;
5187 break;
5189 if (valid) {
5190 UInt szLg2 = INSN(31,30);
5191 UInt bitX = INSN(22,22);
5192 UInt imm12 = INSN(21,10);
5193 UInt nn = INSN(9,5);
5194 UInt tt = INSN(4,0);
5195 UInt szB = 1 << szLg2;
5196 IRExpr* ea = binop(Iop_Add64,
5197 getIReg64orSP(nn), mkU64(imm12 * szB));
5198 switch (szB) {
5199 case 4:
5200 vassert(bitX == 0);
5201 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
5202 DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
5203 nameIReg64orSP(nn), imm12 * szB);
5204 break;
5205 case 2:
5206 if (bitX == 1) {
5207 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
5208 } else {
5209 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
5211 DIP("ldrsh %s, [%s, #%u]\n",
5212 nameIRegOrZR(bitX == 0, tt),
5213 nameIReg64orSP(nn), imm12 * szB);
5214 break;
5215 case 1:
5216 if (bitX == 1) {
5217 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
5218 } else {
5219 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
5221 DIP("ldrsb %s, [%s, #%u]\n",
5222 nameIRegOrZR(bitX == 0, tt),
5223 nameIReg64orSP(nn), imm12 * szB);
5224 break;
5225 default:
5226 vassert(0);
5228 return True;
5230 /* else fall through */
5233 /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
5234 /* (at-Rn-then-Rn=EA)
5235 31 29 23 21 20 11 9 4
5236 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9
5237 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9
5238 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9
5240 (at-EA-then-Rn=EA)
5241 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]!
5242 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]!
5243 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]!
5244 where
5245 Rt is Wt when x==1, Xt when x==0
5246 transfer-at-Rn when [11]==0, at EA when [11]==1
5248 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5249 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5250 /* Further checks on bits 31:30 and 22 */
5251 Bool valid = False;
5252 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5253 case BITS3(1,0,0): // LDRSW Xt
5254 case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
5255 case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
5256 valid = True;
5257 break;
5259 if (valid) {
5260 UInt szLg2 = INSN(31,30);
5261 UInt imm9 = INSN(20,12);
5262 Bool atRN = INSN(11,11) == 0;
5263 UInt nn = INSN(9,5);
5264 UInt tt = INSN(4,0);
5265 IRTemp tRN = newTemp(Ity_I64);
5266 IRTemp tEA = newTemp(Ity_I64);
5267 IRTemp tTA = IRTemp_INVALID;
5268 ULong simm9 = sx_to_64(imm9, 9);
5269 Bool is64 = INSN(22,22) == 0;
5270 assign(tRN, getIReg64orSP(nn));
5271 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5272 tTA = atRN ? tRN : tEA;
5273 HChar ch = '?';
5274 /* There are 5 cases:
5275 byte load, SX to 64
5276 byte load, SX to 32, ZX to 64
5277 halfword load, SX to 64
5278 halfword load, SX to 32, ZX to 64
5279 word load, SX to 64
5280 The ifs below handle them in the listed order.
5282 if (szLg2 == 0) {
5283 ch = 'b';
5284 if (is64) {
5285 putIReg64orZR(tt, unop(Iop_8Sto64,
5286 loadLE(Ity_I8, mkexpr(tTA))));
5287 } else {
5288 putIReg32orZR(tt, unop(Iop_8Sto32,
5289 loadLE(Ity_I8, mkexpr(tTA))));
5292 else if (szLg2 == 1) {
5293 ch = 'h';
5294 if (is64) {
5295 putIReg64orZR(tt, unop(Iop_16Sto64,
5296 loadLE(Ity_I16, mkexpr(tTA))));
5297 } else {
5298 putIReg32orZR(tt, unop(Iop_16Sto32,
5299 loadLE(Ity_I16, mkexpr(tTA))));
5302 else if (szLg2 == 2 && is64) {
5303 ch = 'w';
5304 putIReg64orZR(tt, unop(Iop_32Sto64,
5305 loadLE(Ity_I32, mkexpr(tTA))));
5307 else {
5308 vassert(0);
5310 putIReg64orSP(nn, mkexpr(tEA));
5311 DIP(atRN ? "ldrs%c %s, [%s], #%llu\n" : "ldrs%c %s, [%s, #%llu]!",
5312 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
5313 return True;
5315 /* else fall through */
5318 /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
5319 /* 31 29 23 21 20 11 9 4
5320 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9]
5321 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9]
5322 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9]
5323 where
5324 Rt is Wt when x==1, Xt when x==0
5326 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5327 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5328 /* Further checks on bits 31:30 and 22 */
5329 Bool valid = False;
5330 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5331 case BITS3(1,0,0): // LDURSW Xt
5332 case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
5333 case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
5334 valid = True;
5335 break;
5337 if (valid) {
5338 UInt szLg2 = INSN(31,30);
5339 UInt imm9 = INSN(20,12);
5340 UInt nn = INSN(9,5);
5341 UInt tt = INSN(4,0);
5342 IRTemp tRN = newTemp(Ity_I64);
5343 IRTemp tEA = newTemp(Ity_I64);
5344 ULong simm9 = sx_to_64(imm9, 9);
5345 Bool is64 = INSN(22,22) == 0;
5346 assign(tRN, getIReg64orSP(nn));
5347 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5348 HChar ch = '?';
5349 /* There are 5 cases:
5350 byte load, SX to 64
5351 byte load, SX to 32, ZX to 64
5352 halfword load, SX to 64
5353 halfword load, SX to 32, ZX to 64
5354 word load, SX to 64
5355 The ifs below handle them in the listed order.
5357 if (szLg2 == 0) {
5358 ch = 'b';
5359 if (is64) {
5360 putIReg64orZR(tt, unop(Iop_8Sto64,
5361 loadLE(Ity_I8, mkexpr(tEA))));
5362 } else {
5363 putIReg32orZR(tt, unop(Iop_8Sto32,
5364 loadLE(Ity_I8, mkexpr(tEA))));
5367 else if (szLg2 == 1) {
5368 ch = 'h';
5369 if (is64) {
5370 putIReg64orZR(tt, unop(Iop_16Sto64,
5371 loadLE(Ity_I16, mkexpr(tEA))));
5372 } else {
5373 putIReg32orZR(tt, unop(Iop_16Sto32,
5374 loadLE(Ity_I16, mkexpr(tEA))));
5377 else if (szLg2 == 2 && is64) {
5378 ch = 'w';
5379 putIReg64orZR(tt, unop(Iop_32Sto64,
5380 loadLE(Ity_I32, mkexpr(tEA))));
5382 else {
5383 vassert(0);
5385 DIP("ldurs%c %s, [%s, #%lld]",
5386 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), (Long)simm9);
5387 return True;
5389 /* else fall through */
5392 /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
5393 /* L==1 => mm==LD
5394 L==0 => mm==ST
5395 sz==00 => 32 bit (S) transfers
5396 sz==01 => 64 bit (D) transfers
5397 sz==10 => 128 bit (Q) transfers
5398 sz==11 isn't allowed
5399 simm7 is scaled by the (single-register) transfer size
5401 31 29 26 22 21 14 9 4
5403 sz 101 1000 L imm7 t2 n t1 mmNP SDQt1, SDQt2, [Xn|SP, #imm]
5404 (at-EA, with nontemporal hint)
5406 sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm
5407 (at-Rn-then-Rn=EA)
5409 sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]
5410 (at-EA)
5412 sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]!
5413 (at-EA-then-Rn=EA)
5415 if (INSN(29,25) == BITS5(1,0,1,1,0)) {
5416 UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
5417 Bool isLD = INSN(22,22) == 1;
5418 Bool wBack = INSN(23,23) == 1;
5419 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
5420 UInt tt2 = INSN(14,10);
5421 UInt nn = INSN(9,5);
5422 UInt tt1 = INSN(4,0);
5423 if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) {
5424 /* undecodable; fall through */
5425 } else {
5426 if (nn == 31) { /* FIXME generate stack alignment check */ }
5428 // Compute the transfer address TA and the writeback address WA.
5429 UInt szB = 4 << szSlg2; /* szB is the per-register size */
5430 IRTemp tRN = newTemp(Ity_I64);
5431 assign(tRN, getIReg64orSP(nn));
5432 IRTemp tEA = newTemp(Ity_I64);
5433 simm7 = szB * simm7;
5434 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
5436 IRTemp tTA = newTemp(Ity_I64);
5437 IRTemp tWA = newTemp(Ity_I64);
5438 switch (INSN(24,23)) {
5439 case BITS2(0,1):
5440 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
5441 case BITS2(1,1):
5442 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
5443 case BITS2(1,0):
5444 case BITS2(0,0):
5445 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
5446 default:
5447 vassert(0); /* NOTREACHED */
5450 IRType ty = Ity_INVALID;
5451 switch (szB) {
5452 case 4: ty = Ity_F32; break;
5453 case 8: ty = Ity_F64; break;
5454 case 16: ty = Ity_V128; break;
5455 default: vassert(0);
5458 /* Normally rN would be updated after the transfer. However, in
5459 the special cases typifed by
5460 stp q0, q1, [sp,#-512]!
5461 stp d0, d1, [sp,#-512]!
5462 stp s0, s1, [sp,#-512]!
5463 it is necessary to update SP before the transfer, (1)
5464 because Memcheck will otherwise complain about a write
5465 below the stack pointer, and (2) because the segfault
5466 stack extension mechanism will otherwise extend the stack
5467 only down to SP before the instruction, which might not be
5468 far enough, if the -512 bit takes the actual access
5469 address to the next page.
5471 Bool earlyWBack
5472 = wBack && simm7 < 0
5473 && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
5475 if (wBack && earlyWBack)
5476 putIReg64orSP(nn, mkexpr(tEA));
5478 if (isLD) {
5479 if (szB < 16) {
5480 putQReg128(tt1, mkV128(0x0000));
5482 putQRegLO(tt1,
5483 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
5484 if (szB < 16) {
5485 putQReg128(tt2, mkV128(0x0000));
5487 putQRegLO(tt2,
5488 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
5489 } else {
5490 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
5491 getQRegLO(tt1, ty));
5492 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
5493 getQRegLO(tt2, ty));
5496 if (wBack && !earlyWBack)
5497 putIReg64orSP(nn, mkexpr(tEA));
5499 const HChar* fmt_str = NULL;
5500 switch (INSN(24,23)) {
5501 case BITS2(0,1):
5502 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5503 break;
5504 case BITS2(1,1):
5505 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5506 break;
5507 case BITS2(1,0):
5508 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
5509 break;
5510 case BITS2(0,0):
5511 fmt_str = "%snp %s, %s, [%s, #%lld] (at-Rn)\n";
5512 break;
5513 default:
5514 vassert(0);
5516 DIP(fmt_str, isLD ? "ld" : "st",
5517 nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
5518 nameIReg64orSP(nn), simm7);
5519 return True;
5523 /* -------------- {LD,ST}R (vector register) --------------- */
5524 /* 31 29 23 20 15 12 11 9 4
5525 | | | | | | | | |
5526 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R<m>{ext/sh}]
5527 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R<m>{ext/sh}]
5528 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R<m>{ext/sh}]
5529 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R<m>{ext/sh}]
5530 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R<m>{ext/sh}]
5532 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R<m>{ext/sh}]
5533 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R<m>{ext/sh}]
5534 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R<m>{ext/sh}]
5535 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R<m>{ext/sh}]
5536 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R<m>{ext/sh}]
5538 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5539 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5540 HChar dis_buf[64];
5541 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5542 Bool isLD = INSN(22,22) == 1;
5543 UInt tt = INSN(4,0);
5544 if (szLg2 > 4) goto after_LDR_STR_vector_register;
5545 IRTemp ea = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/);
5546 if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
5547 switch (szLg2) {
5548 case 0: /* 8 bit */
5549 if (isLD) {
5550 putQReg128(tt, mkV128(0x0000));
5551 putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
5552 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
5553 } else {
5554 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
5555 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
5557 break;
5558 case 1:
5559 if (isLD) {
5560 putQReg128(tt, mkV128(0x0000));
5561 putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
5562 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
5563 } else {
5564 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
5565 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
5567 break;
5568 case 2: /* 32 bit */
5569 if (isLD) {
5570 putQReg128(tt, mkV128(0x0000));
5571 putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
5572 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
5573 } else {
5574 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
5575 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
5577 break;
5578 case 3: /* 64 bit */
5579 if (isLD) {
5580 putQReg128(tt, mkV128(0x0000));
5581 putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
5582 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
5583 } else {
5584 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
5585 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
5587 break;
5588 case 4:
5589 if (isLD) {
5590 putQReg128(tt, loadLE(Ity_V128, mkexpr(ea)));
5591 DIP("ldr %s, %s\n", nameQReg128(tt), dis_buf);
5592 } else {
5593 storeLE(mkexpr(ea), getQReg128(tt));
5594 DIP("str %s, %s\n", nameQReg128(tt), dis_buf);
5596 break;
5597 default:
5598 vassert(0);
5600 return True;
5602 after_LDR_STR_vector_register:
5604 /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
5605 /* 31 29 22 20 15 12 11 9 4
5606 | | | | | | | | |
5607 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
5609 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
5610 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
5612 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
5613 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
5615 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5616 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5617 HChar dis_buf[64];
5618 UInt szLg2 = INSN(31,30);
5619 Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
5620 UInt tt = INSN(4,0);
5621 if (szLg2 == 3) goto after_LDRS_integer_register;
5622 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
5623 if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
5624 /* Enumerate the 5 variants explicitly. */
5625 if (szLg2 == 2/*32 bit*/ && sxTo64) {
5626 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
5627 DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
5628 return True;
5630 else
5631 if (szLg2 == 1/*16 bit*/) {
5632 if (sxTo64) {
5633 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
5634 DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
5635 } else {
5636 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
5637 DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5639 return True;
5641 else
5642 if (szLg2 == 0/*8 bit*/) {
5643 if (sxTo64) {
5644 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
5645 DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
5646 } else {
5647 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
5648 DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
5650 return True;
5652 /* else it's an invalid combination */
5654 after_LDRS_integer_register:
5656 /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
5657 /* This is the Unsigned offset variant only. The Post-Index and
5658 Pre-Index variants are below.
5660 31 29 23 21 9 4
5661 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1]
5662 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2]
5663 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4]
5664 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8]
5665 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16]
5667 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1]
5668 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2]
5669 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4]
5670 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8]
5671 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16]
5673 if (INSN(29,24) == BITS6(1,1,1,1,0,1)
5674 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
5675 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5676 Bool isLD = INSN(22,22) == 1;
5677 UInt pimm12 = INSN(21,10) << szLg2;
5678 UInt nn = INSN(9,5);
5679 UInt tt = INSN(4,0);
5680 IRTemp tEA = newTemp(Ity_I64);
5681 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5682 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
5683 if (isLD) {
5684 if (szLg2 < 4) {
5685 putQReg128(tt, mkV128(0x0000));
5687 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
5688 } else {
5689 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
5691 DIP("%s %s, [%s, #%u]\n",
5692 isLD ? "ldr" : "str",
5693 nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
5694 return True;
5697 /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
5698 /* These are the Post-Index and Pre-Index variants.
5700 31 29 23 20 11 9 4
5701 (at-Rn-then-Rn=EA)
5702 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm
5703 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm
5704 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm
5705 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm
5706 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm
5708 (at-EA-then-Rn=EA)
5709 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]!
5710 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]!
5711 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]!
5712 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]!
5713 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]!
5715 Stores are the same except with bit 22 set to 0.
5717 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5718 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5719 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5720 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5721 Bool isLD = INSN(22,22) == 1;
5722 UInt imm9 = INSN(20,12);
5723 Bool atRN = INSN(11,11) == 0;
5724 UInt nn = INSN(9,5);
5725 UInt tt = INSN(4,0);
5726 IRTemp tRN = newTemp(Ity_I64);
5727 IRTemp tEA = newTemp(Ity_I64);
5728 IRTemp tTA = IRTemp_INVALID;
5729 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5730 ULong simm9 = sx_to_64(imm9, 9);
5731 assign(tRN, getIReg64orSP(nn));
5732 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5733 tTA = atRN ? tRN : tEA;
5735 /* Do early writeback for the cases typified by
5736 str d8, [sp, #-32]!
5737 str d10, [sp, #-128]!
5738 str q1, [sp, #-32]!
5739 for the same reasons as described in a similar comment in the
5740 "LDP,STP (immediate, simm7) (FP&VEC)" case just above.
5742 Bool earlyWBack
5743 = !atRN && !isLD && (ty == Ity_F64 || ty == Ity_V128)
5744 && nn == 31 && ((Long)simm9) < 0;
5746 if (earlyWBack)
5747 putIReg64orSP(nn, mkexpr(tEA));
5749 if (isLD) {
5750 if (szLg2 < 4) {
5751 putQReg128(tt, mkV128(0x0000));
5753 putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
5754 } else {
5755 storeLE(mkexpr(tTA), getQRegLO(tt, ty));
5758 if (!earlyWBack)
5759 putIReg64orSP(nn, mkexpr(tEA));
5761 DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
5762 isLD ? "ldr" : "str",
5763 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
5764 return True;
5767 /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
5768 /* 31 29 23 20 11 9 4
5769 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm]
5770 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm]
5771 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm]
5772 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm]
5773 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm]
5775 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm]
5776 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm]
5777 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm]
5778 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm]
5779 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm]
5781 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5782 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5783 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5784 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5785 Bool isLD = INSN(22,22) == 1;
5786 UInt imm9 = INSN(20,12);
5787 UInt nn = INSN(9,5);
5788 UInt tt = INSN(4,0);
5789 ULong simm9 = sx_to_64(imm9, 9);
5790 IRTemp tEA = newTemp(Ity_I64);
5791 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5792 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
5793 if (isLD) {
5794 if (szLg2 < 4) {
5795 putQReg128(tt, mkV128(0x0000));
5797 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
5798 } else {
5799 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
5801 DIP("%s %s, [%s, #%lld]\n",
5802 isLD ? "ldur" : "stur",
5803 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
5804 return True;
5807 /* ---------------- LDR (literal, SIMD&FP) ---------------- */
5808 /* 31 29 23 4
5809 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)]
5810 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)]
5811 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)]
5813 if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
5814 UInt szB = 4 << INSN(31,30);
5815 UInt imm19 = INSN(23,5);
5816 UInt tt = INSN(4,0);
5817 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
5818 IRType ty = preferredVectorSubTypeFromSize(szB);
5819 putQReg128(tt, mkV128(0x0000));
5820 putQRegLO(tt, loadLE(ty, mkU64(ea)));
5821 DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
5822 return True;
5825 /* ------ LD1/ST1 (multiple 1-elem structs to/from 1 reg ------ */
5826 /* ------ LD2/ST2 (multiple 2-elem structs to/from 2 regs ------ */
5827 /* ------ LD3/ST3 (multiple 3-elem structs to/from 3 regs ------ */
5828 /* ------ LD4/ST4 (multiple 4-elem structs to/from 4 regs ------ */
5829 /* 31 29 26 22 21 20 15 11 9 4
5831 0q 001 1000 L 0 00000 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP]
5832 0q 001 1001 L 0 m 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP], step
5834 0q 001 1000 L 0 00000 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP]
5835 0q 001 1001 L 0 m 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP], step
5837 0q 001 1000 L 0 00000 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP]
5838 0q 001 1001 L 0 m 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP], step
5840 0q 001 1000 L 0 00000 0111 sz n t xx1 {Vt.T}, [Xn|SP]
5841 0q 001 1001 L 0 m 0111 sz n t xx1 {Vt.T}, [Xn|SP], step
5843 T = defined by Q and sz in the normal way
5844 step = if m == 11111 then transfer-size else Xm
5845 xx = case L of 1 -> LD ; 0 -> ST
5847 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
5848 && INSN(21,21) == 0) {
5849 Bool bitQ = INSN(30,30);
5850 Bool isPX = INSN(23,23) == 1;
5851 Bool isLD = INSN(22,22) == 1;
5852 UInt mm = INSN(20,16);
5853 UInt opc = INSN(15,12);
5854 UInt sz = INSN(11,10);
5855 UInt nn = INSN(9,5);
5856 UInt tt = INSN(4,0);
5857 Bool isQ = bitQ == 1;
5858 Bool is1d = sz == BITS2(1,1) && !isQ;
5859 UInt nRegs = 0;
5860 switch (opc) {
5861 case BITS4(0,0,0,0): nRegs = 4; break;
5862 case BITS4(0,1,0,0): nRegs = 3; break;
5863 case BITS4(1,0,0,0): nRegs = 2; break;
5864 case BITS4(0,1,1,1): nRegs = 1; break;
5865 default: break;
5868 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
5869 If we see it, set nRegs to 0 so as to cause the next conditional
5870 to fail. */
5871 if (!isPX && mm != 0)
5872 nRegs = 0;
5874 if (nRegs == 1 /* .1d is allowed */
5875 || (nRegs >= 2 && nRegs <= 4 && !is1d) /* .1d is not allowed */) {
5877 UInt xferSzB = (isQ ? 16 : 8) * nRegs;
5879 /* Generate the transfer address (TA) and if necessary the
5880 writeback address (WB) */
5881 IRTemp tTA = newTemp(Ity_I64);
5882 assign(tTA, getIReg64orSP(nn));
5883 if (nn == 31) { /* FIXME generate stack alignment check */ }
5884 IRTemp tWB = IRTemp_INVALID;
5885 if (isPX) {
5886 tWB = newTemp(Ity_I64);
5887 assign(tWB, binop(Iop_Add64,
5888 mkexpr(tTA),
5889 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
5890 : getIReg64orZR(mm)));
5893 /* -- BEGIN generate the transfers -- */
5895 IRTemp u0, u1, u2, u3, i0, i1, i2, i3;
5896 u0 = u1 = u2 = u3 = i0 = i1 = i2 = i3 = IRTemp_INVALID;
5897 switch (nRegs) {
5898 case 4: u3 = newTempV128(); i3 = newTempV128(); /* fallthru */
5899 case 3: u2 = newTempV128(); i2 = newTempV128(); /* fallthru */
5900 case 2: u1 = newTempV128(); i1 = newTempV128(); /* fallthru */
5901 case 1: u0 = newTempV128(); i0 = newTempV128(); break;
5902 default: vassert(0);
5905 /* -- Multiple 128 or 64 bit stores -- */
5906 if (!isLD) {
5907 switch (nRegs) {
5908 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
5909 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
5910 case 2: assign(u1, getQReg128((tt+1) % 32)); /* fallthru */
5911 case 1: assign(u0, getQReg128((tt+0) % 32)); break;
5912 default: vassert(0);
5914 switch (nRegs) {
5915 case 4: (isQ ? math_INTERLEAVE4_128 : math_INTERLEAVE4_64)
5916 (&i0, &i1, &i2, &i3, sz, u0, u1, u2, u3);
5917 break;
5918 case 3: (isQ ? math_INTERLEAVE3_128 : math_INTERLEAVE3_64)
5919 (&i0, &i1, &i2, sz, u0, u1, u2);
5920 break;
5921 case 2: (isQ ? math_INTERLEAVE2_128 : math_INTERLEAVE2_64)
5922 (&i0, &i1, sz, u0, u1);
5923 break;
5924 case 1: (isQ ? math_INTERLEAVE1_128 : math_INTERLEAVE1_64)
5925 (&i0, sz, u0);
5926 break;
5927 default: vassert(0);
5929 # define MAYBE_NARROW_TO_64(_expr) \
5930 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
5931 UInt step = isQ ? 16 : 8;
5932 switch (nRegs) {
5933 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
5934 MAYBE_NARROW_TO_64(mkexpr(i3)) );
5935 /* fallthru */
5936 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
5937 MAYBE_NARROW_TO_64(mkexpr(i2)) );
5938 /* fallthru */
5939 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
5940 MAYBE_NARROW_TO_64(mkexpr(i1)) );
5941 /* fallthru */
5942 case 1: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
5943 MAYBE_NARROW_TO_64(mkexpr(i0)) );
5944 break;
5945 default: vassert(0);
5947 # undef MAYBE_NARROW_TO_64
5950 /* -- Multiple 128 or 64 bit loads -- */
5951 else /* isLD */ {
5952 UInt step = isQ ? 16 : 8;
5953 IRType loadTy = isQ ? Ity_V128 : Ity_I64;
5954 # define MAYBE_WIDEN_FROM_64(_expr) \
5955 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
5956 switch (nRegs) {
5957 case 4:
5958 assign(i3, MAYBE_WIDEN_FROM_64(
5959 loadLE(loadTy,
5960 binop(Iop_Add64, mkexpr(tTA),
5961 mkU64(3 * step)))));
5962 /* fallthru */
5963 case 3:
5964 assign(i2, MAYBE_WIDEN_FROM_64(
5965 loadLE(loadTy,
5966 binop(Iop_Add64, mkexpr(tTA),
5967 mkU64(2 * step)))));
5968 /* fallthru */
5969 case 2:
5970 assign(i1, MAYBE_WIDEN_FROM_64(
5971 loadLE(loadTy,
5972 binop(Iop_Add64, mkexpr(tTA),
5973 mkU64(1 * step)))));
5974 /* fallthru */
5975 case 1:
5976 assign(i0, MAYBE_WIDEN_FROM_64(
5977 loadLE(loadTy,
5978 binop(Iop_Add64, mkexpr(tTA),
5979 mkU64(0 * step)))));
5980 break;
5981 default:
5982 vassert(0);
5984 # undef MAYBE_WIDEN_FROM_64
5985 switch (nRegs) {
5986 case 4: (isQ ? math_DEINTERLEAVE4_128 : math_DEINTERLEAVE4_64)
5987 (&u0, &u1, &u2, &u3, sz, i0,i1,i2,i3);
5988 break;
5989 case 3: (isQ ? math_DEINTERLEAVE3_128 : math_DEINTERLEAVE3_64)
5990 (&u0, &u1, &u2, sz, i0, i1, i2);
5991 break;
5992 case 2: (isQ ? math_DEINTERLEAVE2_128 : math_DEINTERLEAVE2_64)
5993 (&u0, &u1, sz, i0, i1);
5994 break;
5995 case 1: (isQ ? math_DEINTERLEAVE1_128 : math_DEINTERLEAVE1_64)
5996 (&u0, sz, i0);
5997 break;
5998 default: vassert(0);
6000 switch (nRegs) {
6001 case 4: putQReg128( (tt+3) % 32,
6002 math_MAYBE_ZERO_HI64(bitQ, u3));
6003 /* fallthru */
6004 case 3: putQReg128( (tt+2) % 32,
6005 math_MAYBE_ZERO_HI64(bitQ, u2));
6006 /* fallthru */
6007 case 2: putQReg128( (tt+1) % 32,
6008 math_MAYBE_ZERO_HI64(bitQ, u1));
6009 /* fallthru */
6010 case 1: putQReg128( (tt+0) % 32,
6011 math_MAYBE_ZERO_HI64(bitQ, u0));
6012 break;
6013 default: vassert(0);
6017 /* -- END generate the transfers -- */
6019 /* Do the writeback, if necessary */
6020 if (isPX) {
6021 putIReg64orSP(nn, mkexpr(tWB));
6024 HChar pxStr[20];
6025 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6026 if (isPX) {
6027 if (mm == BITS5(1,1,1,1,1))
6028 vex_sprintf(pxStr, ", #%u", xferSzB);
6029 else
6030 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6032 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6033 DIP("%s%u {v%u.%s .. v%u.%s}, [%s]%s\n",
6034 isLD ? "ld" : "st", nRegs,
6035 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6036 pxStr);
6038 return True;
6040 /* else fall through */
6043 /* ------ LD1/ST1 (multiple 1-elem structs to/from 2 regs ------ */
6044 /* ------ LD1/ST1 (multiple 1-elem structs to/from 3 regs ------ */
6045 /* ------ LD1/ST1 (multiple 1-elem structs to/from 4 regs ------ */
6046 /* 31 29 26 22 21 20 15 11 9 4
6048 0q 001 1000 L 0 00000 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP]
6049 0q 001 1001 L 0 m 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP], step
6051 0q 001 1000 L 0 00000 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP]
6052 0q 001 1001 L 0 m 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP], step
6054 0q 001 1000 L 0 00000 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP]
6055 0q 001 1001 L 0 m 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP], step
6057 T = defined by Q and sz in the normal way
6058 step = if m == 11111 then transfer-size else Xm
6059 xx = case L of 1 -> LD ; 0 -> ST
6061 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
6062 && INSN(21,21) == 0) {
6063 Bool bitQ = INSN(30,30);
6064 Bool isPX = INSN(23,23) == 1;
6065 Bool isLD = INSN(22,22) == 1;
6066 UInt mm = INSN(20,16);
6067 UInt opc = INSN(15,12);
6068 UInt sz = INSN(11,10);
6069 UInt nn = INSN(9,5);
6070 UInt tt = INSN(4,0);
6071 Bool isQ = bitQ == 1;
6072 UInt nRegs = 0;
6073 switch (opc) {
6074 case BITS4(0,0,1,0): nRegs = 4; break;
6075 case BITS4(0,1,1,0): nRegs = 3; break;
6076 case BITS4(1,0,1,0): nRegs = 2; break;
6077 default: break;
6080 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
6081 If we see it, set nRegs to 0 so as to cause the next conditional
6082 to fail. */
6083 if (!isPX && mm != 0)
6084 nRegs = 0;
6086 if (nRegs >= 2 && nRegs <= 4) {
6088 UInt xferSzB = (isQ ? 16 : 8) * nRegs;
6090 /* Generate the transfer address (TA) and if necessary the
6091 writeback address (WB) */
6092 IRTemp tTA = newTemp(Ity_I64);
6093 assign(tTA, getIReg64orSP(nn));
6094 if (nn == 31) { /* FIXME generate stack alignment check */ }
6095 IRTemp tWB = IRTemp_INVALID;
6096 if (isPX) {
6097 tWB = newTemp(Ity_I64);
6098 assign(tWB, binop(Iop_Add64,
6099 mkexpr(tTA),
6100 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6101 : getIReg64orZR(mm)));
6104 /* -- BEGIN generate the transfers -- */
6106 IRTemp u0, u1, u2, u3;
6107 u0 = u1 = u2 = u3 = IRTemp_INVALID;
6108 switch (nRegs) {
6109 case 4: u3 = newTempV128(); /* fallthru */
6110 case 3: u2 = newTempV128(); /* fallthru */
6111 case 2: u1 = newTempV128();
6112 u0 = newTempV128(); break;
6113 default: vassert(0);
6116 /* -- Multiple 128 or 64 bit stores -- */
6117 if (!isLD) {
6118 switch (nRegs) {
6119 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
6120 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
6121 case 2: assign(u1, getQReg128((tt+1) % 32));
6122 assign(u0, getQReg128((tt+0) % 32)); break;
6123 default: vassert(0);
6125 # define MAYBE_NARROW_TO_64(_expr) \
6126 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
6127 UInt step = isQ ? 16 : 8;
6128 switch (nRegs) {
6129 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
6130 MAYBE_NARROW_TO_64(mkexpr(u3)) );
6131 /* fallthru */
6132 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
6133 MAYBE_NARROW_TO_64(mkexpr(u2)) );
6134 /* fallthru */
6135 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
6136 MAYBE_NARROW_TO_64(mkexpr(u1)) );
6137 storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
6138 MAYBE_NARROW_TO_64(mkexpr(u0)) );
6139 break;
6140 default: vassert(0);
6142 # undef MAYBE_NARROW_TO_64
6145 /* -- Multiple 128 or 64 bit loads -- */
6146 else /* isLD */ {
6147 UInt step = isQ ? 16 : 8;
6148 IRType loadTy = isQ ? Ity_V128 : Ity_I64;
6149 # define MAYBE_WIDEN_FROM_64(_expr) \
6150 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
6151 switch (nRegs) {
6152 case 4:
6153 assign(u3, MAYBE_WIDEN_FROM_64(
6154 loadLE(loadTy,
6155 binop(Iop_Add64, mkexpr(tTA),
6156 mkU64(3 * step)))));
6157 /* fallthru */
6158 case 3:
6159 assign(u2, MAYBE_WIDEN_FROM_64(
6160 loadLE(loadTy,
6161 binop(Iop_Add64, mkexpr(tTA),
6162 mkU64(2 * step)))));
6163 /* fallthru */
6164 case 2:
6165 assign(u1, MAYBE_WIDEN_FROM_64(
6166 loadLE(loadTy,
6167 binop(Iop_Add64, mkexpr(tTA),
6168 mkU64(1 * step)))));
6169 assign(u0, MAYBE_WIDEN_FROM_64(
6170 loadLE(loadTy,
6171 binop(Iop_Add64, mkexpr(tTA),
6172 mkU64(0 * step)))));
6173 break;
6174 default:
6175 vassert(0);
6177 # undef MAYBE_WIDEN_FROM_64
6178 switch (nRegs) {
6179 case 4: putQReg128( (tt+3) % 32,
6180 math_MAYBE_ZERO_HI64(bitQ, u3));
6181 /* fallthru */
6182 case 3: putQReg128( (tt+2) % 32,
6183 math_MAYBE_ZERO_HI64(bitQ, u2));
6184 /* fallthru */
6185 case 2: putQReg128( (tt+1) % 32,
6186 math_MAYBE_ZERO_HI64(bitQ, u1));
6187 putQReg128( (tt+0) % 32,
6188 math_MAYBE_ZERO_HI64(bitQ, u0));
6189 break;
6190 default: vassert(0);
6194 /* -- END generate the transfers -- */
6196 /* Do the writeback, if necessary */
6197 if (isPX) {
6198 putIReg64orSP(nn, mkexpr(tWB));
6201 HChar pxStr[20];
6202 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6203 if (isPX) {
6204 if (mm == BITS5(1,1,1,1,1))
6205 vex_sprintf(pxStr, ", #%u", xferSzB);
6206 else
6207 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6209 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6210 DIP("%s1 {v%u.%s .. v%u.%s}, [%s]%s\n",
6211 isLD ? "ld" : "st",
6212 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6213 pxStr);
6215 return True;
6217 /* else fall through */
6220 /* ---------- LD1R (single structure, replicate) ---------- */
6221 /* ---------- LD2R (single structure, replicate) ---------- */
6222 /* ---------- LD3R (single structure, replicate) ---------- */
6223 /* ---------- LD4R (single structure, replicate) ---------- */
6224 /* 31 29 22 20 15 11 9 4
6225 0q 001 1010 10 00000 110 0 sz n t LD1R {Vt.T}, [Xn|SP]
6226 0q 001 1011 10 m 110 0 sz n t LD1R {Vt.T}, [Xn|SP], step
6228 0q 001 1010 11 00000 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP]
6229 0q 001 1011 11 m 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP], step
6231 0q 001 1010 10 00000 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP]
6232 0q 001 1011 10 m 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP], step
6234 0q 001 1010 11 00000 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP]
6235 0q 001 1011 11 m 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP], step
6237 step = if m == 11111 then transfer-size else Xm
6239 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)
6240 && INSN(22,22) == 1 && INSN(15,14) == BITS2(1,1)
6241 && INSN(12,12) == 0) {
6242 UInt bitQ = INSN(30,30);
6243 Bool isPX = INSN(23,23) == 1;
6244 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6245 UInt mm = INSN(20,16);
6246 UInt sz = INSN(11,10);
6247 UInt nn = INSN(9,5);
6248 UInt tt = INSN(4,0);
6250 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6251 if (isPX || mm == 0) {
6253 IRType ty = integerIRTypeOfSize(1 << sz);
6255 UInt laneSzB = 1 << sz;
6256 UInt xferSzB = laneSzB * nRegs;
6258 /* Generate the transfer address (TA) and if necessary the
6259 writeback address (WB) */
6260 IRTemp tTA = newTemp(Ity_I64);
6261 assign(tTA, getIReg64orSP(nn));
6262 if (nn == 31) { /* FIXME generate stack alignment check */ }
6263 IRTemp tWB = IRTemp_INVALID;
6264 if (isPX) {
6265 tWB = newTemp(Ity_I64);
6266 assign(tWB, binop(Iop_Add64,
6267 mkexpr(tTA),
6268 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6269 : getIReg64orZR(mm)));
6272 /* Do the writeback, if necessary */
6273 if (isPX) {
6274 putIReg64orSP(nn, mkexpr(tWB));
6277 IRTemp e0, e1, e2, e3, v0, v1, v2, v3;
6278 e0 = e1 = e2 = e3 = v0 = v1 = v2 = v3 = IRTemp_INVALID;
6279 switch (nRegs) {
6280 case 4:
6281 e3 = newTemp(ty);
6282 assign(e3, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6283 mkU64(3 * laneSzB))));
6284 v3 = math_DUP_TO_V128(e3, ty);
6285 putQReg128((tt+3) % 32, math_MAYBE_ZERO_HI64(bitQ, v3));
6286 /* fallthrough */
6287 case 3:
6288 e2 = newTemp(ty);
6289 assign(e2, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6290 mkU64(2 * laneSzB))));
6291 v2 = math_DUP_TO_V128(e2, ty);
6292 putQReg128((tt+2) % 32, math_MAYBE_ZERO_HI64(bitQ, v2));
6293 /* fallthrough */
6294 case 2:
6295 e1 = newTemp(ty);
6296 assign(e1, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6297 mkU64(1 * laneSzB))));
6298 v1 = math_DUP_TO_V128(e1, ty);
6299 putQReg128((tt+1) % 32, math_MAYBE_ZERO_HI64(bitQ, v1));
6300 /* fallthrough */
6301 case 1:
6302 e0 = newTemp(ty);
6303 assign(e0, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6304 mkU64(0 * laneSzB))));
6305 v0 = math_DUP_TO_V128(e0, ty);
6306 putQReg128((tt+0) % 32, math_MAYBE_ZERO_HI64(bitQ, v0));
6307 break;
6308 default:
6309 vassert(0);
6312 HChar pxStr[20];
6313 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6314 if (isPX) {
6315 if (mm == BITS5(1,1,1,1,1))
6316 vex_sprintf(pxStr, ", #%u", xferSzB);
6317 else
6318 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6320 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6321 DIP("ld%ur {v%u.%s .. v%u.%s}, [%s]%s\n",
6322 nRegs,
6323 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6324 pxStr);
6326 return True;
6328 /* else fall through */
6331 /* ------ LD1/ST1 (single structure, to/from one lane) ------ */
6332 /* ------ LD2/ST2 (single structure, to/from one lane) ------ */
6333 /* ------ LD3/ST3 (single structure, to/from one lane) ------ */
6334 /* ------ LD4/ST4 (single structure, to/from one lane) ------ */
6335 /* 31 29 22 21 20 15 11 9 4
6336 0q 001 1010 L 0 00000 xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP]
6337 0q 001 1011 L 0 m xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP], step
6339 0q 001 1010 L 1 00000 xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP]
6340 0q 001 1011 L 1 m xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP], step
6342 0q 001 1010 L 0 00000 xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP]
6343 0q 001 1011 L 0 m xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP], step
6345 0q 001 1010 L 1 00000 xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP]
6346 0q 001 1011 L 1 m xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP], step
6348 step = if m == 11111 then transfer-size else Xm
6349 op = case L of 1 -> LD ; 0 -> ST
6351 laneszB,ix = case xx:q:S:sz of 00:b:b:bb -> 1, bbbb
6352 01:b:b:b0 -> 2, bbb
6353 10:b:b:00 -> 4, bb
6354 10:b:0:01 -> 8, b
6356 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)) {
6357 UInt bitQ = INSN(30,30);
6358 Bool isPX = INSN(23,23) == 1;
6359 Bool isLD = INSN(22,22) == 1;
6360 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6361 UInt mm = INSN(20,16);
6362 UInt xx = INSN(15,14);
6363 UInt bitS = INSN(12,12);
6364 UInt sz = INSN(11,10);
6365 UInt nn = INSN(9,5);
6366 UInt tt = INSN(4,0);
6368 Bool valid = True;
6370 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6371 if (!isPX && mm != 0)
6372 valid = False;
6374 UInt laneSzB = 0; /* invalid */
6375 UInt ix = 16; /* invalid */
6377 UInt xx_q_S_sz = (xx << 4) | (bitQ << 3) | (bitS << 2) | sz;
6378 switch (xx_q_S_sz) {
6379 case 0x00: case 0x01: case 0x02: case 0x03:
6380 case 0x04: case 0x05: case 0x06: case 0x07:
6381 case 0x08: case 0x09: case 0x0A: case 0x0B:
6382 case 0x0C: case 0x0D: case 0x0E: case 0x0F:
6383 laneSzB = 1; ix = xx_q_S_sz & 0xF;
6384 break;
6385 case 0x10: case 0x12: case 0x14: case 0x16:
6386 case 0x18: case 0x1A: case 0x1C: case 0x1E:
6387 laneSzB = 2; ix = (xx_q_S_sz >> 1) & 7;
6388 break;
6389 case 0x20: case 0x24: case 0x28: case 0x2C:
6390 laneSzB = 4; ix = (xx_q_S_sz >> 2) & 3;
6391 break;
6392 case 0x21: case 0x29:
6393 laneSzB = 8; ix = (xx_q_S_sz >> 3) & 1;
6394 break;
6395 default:
6396 break;
6399 if (valid && laneSzB != 0) {
6401 IRType ty = integerIRTypeOfSize(laneSzB);
6402 UInt xferSzB = laneSzB * nRegs;
6404 /* Generate the transfer address (TA) and if necessary the
6405 writeback address (WB) */
6406 IRTemp tTA = newTemp(Ity_I64);
6407 assign(tTA, getIReg64orSP(nn));
6408 if (nn == 31) { /* FIXME generate stack alignment check */ }
6409 IRTemp tWB = IRTemp_INVALID;
6410 if (isPX) {
6411 tWB = newTemp(Ity_I64);
6412 assign(tWB, binop(Iop_Add64,
6413 mkexpr(tTA),
6414 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6415 : getIReg64orZR(mm)));
6418 /* Do the writeback, if necessary */
6419 if (isPX) {
6420 putIReg64orSP(nn, mkexpr(tWB));
6423 switch (nRegs) {
6424 case 4: {
6425 IRExpr* addr
6426 = binop(Iop_Add64, mkexpr(tTA), mkU64(3 * laneSzB));
6427 if (isLD) {
6428 putQRegLane((tt+3) % 32, ix, loadLE(ty, addr));
6429 } else {
6430 storeLE(addr, getQRegLane((tt+3) % 32, ix, ty));
6433 /* fallthrough */
6434 case 3: {
6435 IRExpr* addr
6436 = binop(Iop_Add64, mkexpr(tTA), mkU64(2 * laneSzB));
6437 if (isLD) {
6438 putQRegLane((tt+2) % 32, ix, loadLE(ty, addr));
6439 } else {
6440 storeLE(addr, getQRegLane((tt+2) % 32, ix, ty));
6443 /* fallthrough */
6444 case 2: {
6445 IRExpr* addr
6446 = binop(Iop_Add64, mkexpr(tTA), mkU64(1 * laneSzB));
6447 if (isLD) {
6448 putQRegLane((tt+1) % 32, ix, loadLE(ty, addr));
6449 } else {
6450 storeLE(addr, getQRegLane((tt+1) % 32, ix, ty));
6453 /* fallthrough */
6454 case 1: {
6455 IRExpr* addr
6456 = binop(Iop_Add64, mkexpr(tTA), mkU64(0 * laneSzB));
6457 if (isLD) {
6458 putQRegLane((tt+0) % 32, ix, loadLE(ty, addr));
6459 } else {
6460 storeLE(addr, getQRegLane((tt+0) % 32, ix, ty));
6462 break;
6464 default:
6465 vassert(0);
6468 HChar pxStr[20];
6469 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6470 if (isPX) {
6471 if (mm == BITS5(1,1,1,1,1))
6472 vex_sprintf(pxStr, ", #%u", xferSzB);
6473 else
6474 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6476 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6477 DIP("%s%u {v%u.%s .. v%u.%s}[%u], [%s]%s\n",
6478 isLD ? "ld" : "st", nRegs,
6479 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr,
6480 ix, nameIReg64orSP(nn), pxStr);
6482 return True;
6484 /* else fall through */
6487 /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
6488 /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
6489 /* 31 29 23 20 14 9 4
6490 sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP]
6491 sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP]
6492 sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP]
6493 sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP]
6495 /* For the "standard" implementation we pass through the LL and SC to
6496 the host. For the "fallback" implementation, for details see
6497 https://bugs.kde.org/show_bug.cgi?id=344524 and
6498 https://bugs.kde.org/show_bug.cgi?id=369459,
6499 but in short:
6501 LoadLinked(addr)
6502 gs.LLsize = load_size // 1, 2, 4 or 8
6503 gs.LLaddr = addr
6504 gs.LLdata = zeroExtend(*addr)
6506 StoreCond(addr, data)
6507 tmp_LLsize = gs.LLsize
6508 gs.LLsize = 0 // "no transaction"
6509 if tmp_LLsize != store_size -> fail
6510 if addr != gs.LLaddr -> fail
6511 if zeroExtend(*addr) != gs.LLdata -> fail
6512 cas_ok = CAS(store_size, addr, gs.LLdata -> data)
6513 if !cas_ok -> fail
6514 succeed
6516 When thread scheduled
6517 gs.LLsize = 0 // "no transaction"
6518 (coregrind/m_scheduler/scheduler.c, run_thread_for_a_while()
6519 has to do this bit)
6521 if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
6522 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
6523 && INSN(14,10) == BITS5(1,1,1,1,1)) {
6524 UInt szBlg2 = INSN(31,30);
6525 Bool isLD = INSN(22,22) == 1;
6526 Bool isAcqOrRel = INSN(15,15) == 1;
6527 UInt ss = INSN(20,16);
6528 UInt nn = INSN(9,5);
6529 UInt tt = INSN(4,0);
6531 vassert(szBlg2 < 4);
6532 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6533 IRType ty = integerIRTypeOfSize(szB);
6534 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
6536 IRTemp ea = newTemp(Ity_I64);
6537 assign(ea, getIReg64orSP(nn));
6538 /* FIXME generate check that ea is szB-aligned */
6540 if (isLD && ss == BITS5(1,1,1,1,1)) {
6541 IRTemp res = newTemp(ty);
6542 if (abiinfo->guest__use_fallback_LLSC) {
6543 // Do the load first so we don't update any guest state
6544 // if it faults.
6545 IRTemp loaded_data64 = newTemp(Ity_I64);
6546 assign(loaded_data64, widenUto64(ty, loadLE(ty, mkexpr(ea))));
6547 stmt( IRStmt_Put( OFFB_LLSC_DATA, mkexpr(loaded_data64) ));
6548 stmt( IRStmt_Put( OFFB_LLSC_ADDR, mkexpr(ea) ));
6549 stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(szB) ));
6550 putIReg64orZR(tt, mkexpr(loaded_data64));
6551 } else {
6552 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
6553 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
6555 if (isAcqOrRel) {
6556 stmt(IRStmt_MBE(Imbe_Fence));
6558 DIP("ld%sx%s %s, [%s] %s\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6559 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn),
6560 abiinfo->guest__use_fallback_LLSC
6561 ? "(fallback implementation)" : "");
6562 return True;
6564 if (!isLD) {
6565 if (isAcqOrRel) {
6566 stmt(IRStmt_MBE(Imbe_Fence));
6568 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
6569 if (abiinfo->guest__use_fallback_LLSC) {
6570 // This is really ugly, since we don't have any way to do
6571 // proper if-then-else. First, set up as if the SC failed,
6572 // and jump forwards if it really has failed.
6574 // Continuation address
6575 IRConst* nia = IRConst_U64(guest_PC_curr_instr + 4);
6577 // "the SC failed". Any non-zero value means failure.
6578 putIReg64orZR(ss, mkU64(1));
6580 IRTemp tmp_LLsize = newTemp(Ity_I64);
6581 assign(tmp_LLsize, IRExpr_Get(OFFB_LLSC_SIZE, Ity_I64));
6582 stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(0) // "no transaction"
6584 // Fail if no or wrong-size transaction
6585 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
6586 stmt( IRStmt_Exit(
6587 binop(Iop_CmpNE64, mkexpr(tmp_LLsize), mkU64(szB)),
6588 Ijk_Boring, nia, OFFB_PC
6590 // Fail if the address doesn't match the LL address
6591 stmt( IRStmt_Exit(
6592 binop(Iop_CmpNE64, mkexpr(ea),
6593 IRExpr_Get(OFFB_LLSC_ADDR, Ity_I64)),
6594 Ijk_Boring, nia, OFFB_PC
6596 // Fail if the data doesn't match the LL data
6597 IRTemp llsc_data64 = newTemp(Ity_I64);
6598 assign(llsc_data64, IRExpr_Get(OFFB_LLSC_DATA, Ity_I64));
6599 stmt( IRStmt_Exit(
6600 binop(Iop_CmpNE64, widenUto64(ty, loadLE(ty, mkexpr(ea))),
6601 mkexpr(llsc_data64)),
6602 Ijk_Boring, nia, OFFB_PC
6604 // Try to CAS the new value in.
6605 IRTemp old = newTemp(ty);
6606 IRTemp expd = newTemp(ty);
6607 assign(expd, narrowFrom64(ty, mkexpr(llsc_data64)));
6608 stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID, old,
6609 Iend_LE, mkexpr(ea),
6610 /*expdHi*/NULL, mkexpr(expd),
6611 /*dataHi*/NULL, data
6612 )));
6613 // Fail if the CAS failed (viz, old != expd)
6614 stmt( IRStmt_Exit(
6615 binop(Iop_CmpNE64,
6616 widenUto64(ty, mkexpr(old)),
6617 widenUto64(ty, mkexpr(expd))),
6618 Ijk_Boring, nia, OFFB_PC
6620 // Otherwise we succeeded (!)
6621 putIReg64orZR(ss, mkU64(0));
6622 } else {
6623 IRTemp res = newTemp(Ity_I1);
6624 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
6625 /* IR semantics: res is 1 if store succeeds, 0 if it fails.
6626 Need to set rS to 1 on failure, 0 on success. */
6627 putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
6628 mkU64(1)));
6630 DIP("st%sx%s %s, %s, [%s] %s\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6631 nameIRegOrZR(False, ss),
6632 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn),
6633 abiinfo->guest__use_fallback_LLSC
6634 ? "(fallback implementation)" : "");
6635 return True;
6637 /* else fall through */
6640 /* ------------------ LDA{R,RH,RB} ------------------ */
6641 /* ------------------ STL{R,RH,RB} ------------------ */
6642 /* 31 29 23 20 14 9 4
6643 sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn|SP]
6644 sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn|SP]
6646 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
6647 && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
6648 UInt szBlg2 = INSN(31,30);
6649 Bool isLD = INSN(22,22) == 1;
6650 UInt nn = INSN(9,5);
6651 UInt tt = INSN(4,0);
6653 vassert(szBlg2 < 4);
6654 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6655 IRType ty = integerIRTypeOfSize(szB);
6656 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
6658 IRTemp ea = newTemp(Ity_I64);
6659 assign(ea, getIReg64orSP(nn));
6660 /* FIXME generate check that ea is szB-aligned */
6662 if (isLD) {
6663 IRTemp res = newTemp(ty);
6664 assign(res, loadLE(ty, mkexpr(ea)));
6665 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
6666 stmt(IRStmt_MBE(Imbe_Fence));
6667 DIP("lda%s %s, [%s]\n", suffix[szBlg2],
6668 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6669 } else {
6670 stmt(IRStmt_MBE(Imbe_Fence));
6671 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
6672 storeLE(mkexpr(ea), data);
6673 DIP("stl%s %s, [%s]\n", suffix[szBlg2],
6674 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6676 return True;
6679 /* The PRFM cases that follow are possibly allow Rt values (the
6680 prefetch operation) which are not allowed by the documentation.
6681 This should be looked into. */
6682 /* ------------------ PRFM (immediate) ------------------ */
6683 /* 31 21 9 4
6684 11 111 00110 imm12 n t PRFM pfrop=Rt, [Xn|SP, #pimm]
6686 if (INSN(31,22) == BITS10(1,1,1,1,1,0,0,1,1,0)) {
6687 UInt imm12 = INSN(21,10);
6688 UInt nn = INSN(9,5);
6689 UInt tt = INSN(4,0);
6690 /* Generating any IR here is pointless, except for documentation
6691 purposes, as it will get optimised away later. */
6692 IRTemp ea = newTemp(Ity_I64);
6693 assign(ea, binop(Iop_Add64, getIReg64orSP(nn), mkU64(imm12 * 8)));
6694 DIP("prfm prfop=%u, [%s, #%u]\n", tt, nameIReg64orSP(nn), imm12 * 8);
6695 return True;
6698 /* ------------------ PRFM (register) ------------------ */
6699 /* 31 29 22 20 15 12 11 9 4
6700 11 1110001 01 Rm opt S 10 Rn Rt PRFM pfrop=Rt, [Xn|SP, R<m>{ext/sh}]
6702 if (INSN(31,21) == BITS11(1,1,1,1,1,0,0,0,1,0,1)
6703 && INSN(11,10) == BITS2(1,0)) {
6704 HChar dis_buf[64];
6705 UInt tt = INSN(4,0);
6706 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
6707 if (ea != IRTemp_INVALID) {
6708 /* No actual code to generate. */
6709 DIP("prfm prfop=%u, %s\n", tt, dis_buf);
6710 return True;
6714 /* ------------------ PRFM (unscaled offset) ------------------ */
6715 /* 31 29 22 20 11 9 4
6716 11 1110001 00 imm9 00 Rn Rt PRFM pfrop=Rt, [Xn|SP, #simm]
6718 if (INSN(31,21) == BITS11(1,1, 1,1,1,0,0,0,1, 0,0)
6719 && INSN(11,10) == BITS2(0,0)) {
6720 ULong imm9 = INSN(20,12);
6721 UInt nn = INSN(9,5);
6722 UInt tt = INSN(4,0);
6723 ULong offset = sx_to_64(imm9, 9);
6724 IRTemp ea = newTemp(Ity_I64);
6725 assign(ea, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offset)));
6726 /* No actual code to generate. */
6727 DIP("prfum prfop=%u, [%s, #0x%llx]\n", tt, nameIReg64orSP(nn), offset);
6728 return True;
6731 /* ---------------- ARMv8.1-LSE: Atomic Memory Operations ---------------- */
6732 /* 31 29 23 22 21 20 15 11 9 4
6733 sz 111000 A R 1 s 0000 00 n t LDADD{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6734 sz 111000 A R 1 s 0001 00 n t LDCLR{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6735 sz 111000 A R 1 s 0010 00 n t LDEOR{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6736 sz 111000 A R 1 s 0011 00 n t LDSET{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6737 sz 111000 A R 1 s 0100 00 n t LDSMAX{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6738 sz 111000 A R 1 s 0101 00 n t LDSMIN{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6739 sz 111000 A R 1 s 0110 00 n t LDUMAX{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6740 sz 111000 A R 1 s 0111 00 n t LDUMIN{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6741 sz 111000 A R 1 s 1000 00 n t SWP{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6743 if (INSN(29,24) == BITS6(1,1,1,0,0,0)
6744 && INSN(21,21) == 1
6745 && (INSN(15,12) <= BITS4(1,0,0,0))
6746 && INSN(11,10) == BITS2(0,0)) {
6747 UInt szBlg2 = INSN(31,30);
6748 Bool isAcq = INSN(23,23) == 1;
6749 Bool isRel = INSN(22,22) == 1;
6750 UInt ss = INSN(20,16);
6751 UInt opc = INSN(15,12);
6752 UInt nn = INSN(9,5);
6753 UInt tt = INSN(4,0);
6755 const HChar* nm = NULL;
6756 const HChar* suffix[4] = { "b", "h", "", "" };
6758 vassert(szBlg2 < 4);
6759 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 bytes*/
6760 IRType ty = integerIRTypeOfSize(szB);
6761 Bool is64 = szB == 8;
6762 Bool isSigned = (opc == 4) || (opc == 5) /*smax || smin*/;
6764 // IR used to emulate these atomic memory ops:
6765 // 1) barrier
6766 // 2) load
6767 // 3) widen operands and do arithmetic/logic op
6768 // 4) cas to see if target memory updated
6769 // 5) barrier
6770 // 6) repeat from 1) if cas says target memory not updated
6771 // 7) update register
6773 IRTemp ea = newTemp(Ity_I64);
6774 assign(ea, getIReg64orSP(nn));
6776 // Insert barrier before loading for acquire and acquire-release variants:
6777 // A and AL.
6778 if (isAcq && (tt != 31))
6779 stmt(IRStmt_MBE(Imbe_Fence));
6781 // Load LHS from memory, RHS from register.
6782 IRTemp orig = newTemp(ty);
6783 assign(orig, loadLE(ty, mkexpr(ea)));
6784 IRExpr *lhs = mkexpr(orig);
6785 IRExpr *rhs = narrowFrom64(ty, getIReg64orZR(ss));
6786 IRExpr *res = NULL;
6788 lhs = isSigned ? widenSto64(ty, lhs) : widenUto64(ty, lhs);
6789 rhs = isSigned ? widenSto64(ty, rhs) : widenUto64(ty, rhs);
6791 // Perform the operation.
6792 switch (opc) {
6793 case 0:
6794 nm = "ldadd";
6795 res = binop(Iop_Add64, lhs, rhs);
6796 break;
6797 case 1:
6798 nm = "ldclr";
6799 res = binop(Iop_And64, lhs, unop(mkNOT(Ity_I64), rhs));
6800 break;
6801 case 2:
6802 nm = "ldeor";
6803 res = binop(Iop_Xor64, lhs, rhs);
6804 break;
6805 case 3:
6806 nm = "ldset";
6807 res = binop(Iop_Or64, lhs, rhs);
6808 break;
6809 case 4:
6810 nm = "ldsmax";
6811 res = IRExpr_ITE(binop(Iop_CmpLT64S, lhs, rhs), rhs, lhs);
6812 break;
6813 case 5:
6814 nm = "ldsmin";
6815 res = IRExpr_ITE(binop(Iop_CmpLT64S, lhs, rhs), lhs, rhs);
6816 break;
6817 case 6:
6818 nm = "ldumax";
6819 res = IRExpr_ITE(binop(Iop_CmpLT64U, lhs, rhs), rhs, rhs);
6820 break;
6821 case 7:
6822 nm = "ldumin";
6823 res = IRExpr_ITE(binop(Iop_CmpLT64U, lhs, rhs), lhs, rhs);
6824 break;
6825 case 8:
6826 nm = "swp";
6827 res = lhs;
6828 break;
6829 default:
6830 vassert(0);
6831 break;
6834 // Store the result back if LHS remains unchanged in memory.
6835 IRTemp old = newTemp(ty);
6836 stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID, old,
6837 Iend_LE, mkexpr(ea),
6838 /*expdHi*/NULL, mkexpr(orig),
6839 /*dataHi*/NULL, narrowFrom64(ty, res))) );
6841 // Insert barrier after storing for release and acquire-release variants:
6842 // L and AL.
6843 if (isRel)
6844 stmt(IRStmt_MBE(Imbe_Fence));
6846 // Retry if the CAS failed (i.e. when old != orig).
6847 IRConst* nia = IRConst_U64(guest_PC_curr_instr);
6848 stmt( IRStmt_Exit(
6849 binop(Iop_CasCmpNE64,
6850 widenUto64(ty, mkexpr(old)),
6851 widenUto64(ty, mkexpr(orig))),
6852 Ijk_Boring, nia, OFFB_PC ));
6853 // Otherwise we succeeded.
6854 putIReg64orZR(tt, widenUto64(ty, mkexpr(old)));
6856 DIP("%s%s%s%s %s, %s, [%s]\n", nm, isAcq ? "a" : "", isRel ? "l" : "",
6857 suffix[szBlg2], nameIRegOrZR(is64, ss), nameIRegOrZR(is64, tt),
6858 nameIReg64orSP(nn));
6859 return True;
6862 vex_printf("ARM64 front end: load_store\n");
6863 return False;
6864 # undef INSN
6868 /*------------------------------------------------------------*/
6869 /*--- Control flow and misc instructions ---*/
6870 /*------------------------------------------------------------*/
6872 static
6873 Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn,
6874 const VexArchInfo* archinfo,
6875 const VexAbiInfo* abiinfo)
6877 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
6879 /* ---------------------- B cond ----------------------- */
6880 /* 31 24 4 3
6881 0101010 0 imm19 0 cond */
6882 if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
6883 UInt cond = INSN(3,0);
6884 ULong uimm64 = INSN(23,5) << 2;
6885 Long simm64 = (Long)sx_to_64(uimm64, 21);
6886 vassert(dres->whatNext == Dis_Continue);
6887 vassert(dres->len == 4);
6888 vassert(dres->jk_StopHere == Ijk_INVALID);
6889 stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
6890 Ijk_Boring,
6891 IRConst_U64(guest_PC_curr_instr + simm64),
6892 OFFB_PC) );
6893 putPC(mkU64(guest_PC_curr_instr + 4));
6894 dres->whatNext = Dis_StopHere;
6895 dres->jk_StopHere = Ijk_Boring;
6896 DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
6897 return True;
6900 /* -------------------- B{L} uncond -------------------- */
6901 if (INSN(30,26) == BITS5(0,0,1,0,1)) {
6902 /* 000101 imm26 B (PC + sxTo64(imm26 << 2))
6903 100101 imm26 B (PC + sxTo64(imm26 << 2))
6905 UInt bLink = INSN(31,31);
6906 ULong uimm64 = INSN(25,0) << 2;
6907 Long simm64 = (Long)sx_to_64(uimm64, 28);
6908 if (bLink) {
6909 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
6911 putPC(mkU64(guest_PC_curr_instr + simm64));
6912 dres->whatNext = Dis_StopHere;
6913 dres->jk_StopHere = Ijk_Call;
6914 DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
6915 guest_PC_curr_instr + simm64);
6916 return True;
6919 /* --------------------- B{L} reg --------------------- */
6920 /* 31 24 22 20 15 9 4
6921 1101011 00 10 11111 000000 nn 00000 RET Rn
6922 1101011 00 01 11111 000000 nn 00000 CALL Rn
6923 1101011 00 00 11111 000000 nn 00000 JMP Rn
6925 if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
6926 && INSN(20,16) == BITS5(1,1,1,1,1)
6927 && INSN(15,10) == BITS6(0,0,0,0,0,0)
6928 && INSN(4,0) == BITS5(0,0,0,0,0)) {
6929 UInt branch_type = INSN(22,21);
6930 UInt nn = INSN(9,5);
6931 if (branch_type == BITS2(1,0) /* RET */) {
6932 putPC(getIReg64orZR(nn));
6933 dres->whatNext = Dis_StopHere;
6934 dres->jk_StopHere = Ijk_Ret;
6935 DIP("ret %s\n", nameIReg64orZR(nn));
6936 return True;
6938 if (branch_type == BITS2(0,1) /* CALL */) {
6939 IRTemp dst = newTemp(Ity_I64);
6940 assign(dst, getIReg64orZR(nn));
6941 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
6942 putPC(mkexpr(dst));
6943 dres->whatNext = Dis_StopHere;
6944 dres->jk_StopHere = Ijk_Call;
6945 DIP("blr %s\n", nameIReg64orZR(nn));
6946 return True;
6948 if (branch_type == BITS2(0,0) /* JMP */) {
6949 putPC(getIReg64orZR(nn));
6950 dres->whatNext = Dis_StopHere;
6951 dres->jk_StopHere = Ijk_Boring;
6952 DIP("jmp %s\n", nameIReg64orZR(nn));
6953 return True;
6957 /* -------------------- CB{N}Z -------------------- */
6958 /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
6959 sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2))
6961 if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
6962 Bool is64 = INSN(31,31) == 1;
6963 Bool bIfZ = INSN(24,24) == 0;
6964 ULong uimm64 = INSN(23,5) << 2;
6965 UInt rT = INSN(4,0);
6966 Long simm64 = (Long)sx_to_64(uimm64, 21);
6967 IRExpr* cond = NULL;
6968 if (is64) {
6969 cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
6970 getIReg64orZR(rT), mkU64(0));
6971 } else {
6972 cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
6973 getIReg32orZR(rT), mkU32(0));
6975 stmt( IRStmt_Exit(cond,
6976 Ijk_Boring,
6977 IRConst_U64(guest_PC_curr_instr + simm64),
6978 OFFB_PC) );
6979 putPC(mkU64(guest_PC_curr_instr + 4));
6980 dres->whatNext = Dis_StopHere;
6981 dres->jk_StopHere = Ijk_Boring;
6982 DIP("cb%sz %s, 0x%llx\n",
6983 bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
6984 guest_PC_curr_instr + simm64);
6985 return True;
6988 /* -------------------- TB{N}Z -------------------- */
6989 /* 31 30 24 23 18 5 4
6990 b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
6991 b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
6993 if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
6994 UInt b5 = INSN(31,31);
6995 Bool bIfZ = INSN(24,24) == 0;
6996 UInt b40 = INSN(23,19);
6997 UInt imm14 = INSN(18,5);
6998 UInt tt = INSN(4,0);
6999 UInt bitNo = (b5 << 5) | b40;
7000 ULong uimm64 = imm14 << 2;
7001 Long simm64 = sx_to_64(uimm64, 16);
7002 IRExpr* cond
7003 = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
7004 binop(Iop_And64,
7005 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
7006 mkU64(1)),
7007 mkU64(0));
7008 stmt( IRStmt_Exit(cond,
7009 Ijk_Boring,
7010 IRConst_U64(guest_PC_curr_instr + simm64),
7011 OFFB_PC) );
7012 putPC(mkU64(guest_PC_curr_instr + 4));
7013 dres->whatNext = Dis_StopHere;
7014 dres->jk_StopHere = Ijk_Boring;
7015 DIP("tb%sz %s, #%u, 0x%llx\n",
7016 bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
7017 guest_PC_curr_instr + simm64);
7018 return True;
7021 /* -------------------- SVC -------------------- */
7022 /* 11010100 000 imm16 000 01
7023 Don't bother with anything except the imm16==0 case.
7025 if (INSN(31,0) == 0xD4000001) {
7026 putPC(mkU64(guest_PC_curr_instr + 4));
7027 dres->whatNext = Dis_StopHere;
7028 dres->jk_StopHere = Ijk_Sys_syscall;
7029 DIP("svc #0\n");
7030 return True;
7033 /* ------------------ M{SR,RS} ------------------ */
7034 /* ---- Cases for TPIDR_EL0 ----
7035 0xD51BD0 010 Rt MSR tpidr_el0, rT
7036 0xD53BD0 010 Rt MRS rT, tpidr_el0
7038 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
7039 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
7040 Bool toSys = INSN(21,21) == 0;
7041 UInt tt = INSN(4,0);
7042 if (toSys) {
7043 stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
7044 DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
7045 } else {
7046 putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
7047 DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
7049 return True;
7051 /* ---- Cases for FPCR ----
7052 0xD51B44 000 Rt MSR fpcr, rT
7053 0xD53B44 000 Rt MSR rT, fpcr
7055 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
7056 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
7057 Bool toSys = INSN(21,21) == 0;
7058 UInt tt = INSN(4,0);
7059 if (toSys) {
7060 stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
7061 DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
7062 } else {
7063 putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
7064 DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
7066 return True;
7068 /* ---- Cases for FPSR ----
7069 0xD51B44 001 Rt MSR fpsr, rT
7070 0xD53B44 001 Rt MSR rT, fpsr
7071 The only part of this we model is FPSR.QC. All other bits
7072 are ignored when writing to it and RAZ when reading from it.
7074 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
7075 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
7076 Bool toSys = INSN(21,21) == 0;
7077 UInt tt = INSN(4,0);
7078 if (toSys) {
7079 /* Just deal with FPSR.QC. Make up a V128 value which is
7080 zero if Xt[27] is zero and any other value if Xt[27] is
7081 nonzero. */
7082 IRTemp qc64 = newTemp(Ity_I64);
7083 assign(qc64, binop(Iop_And64,
7084 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(27)),
7085 mkU64(1)));
7086 IRExpr* qcV128 = binop(Iop_64HLtoV128, mkexpr(qc64), mkexpr(qc64));
7087 stmt( IRStmt_Put( OFFB_QCFLAG, qcV128 ) );
7088 DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
7089 } else {
7090 /* Generate a value which is all zeroes except for bit 27,
7091 which must be zero if QCFLAG is all zeroes and one otherwise. */
7092 IRTemp qcV128 = newTempV128();
7093 assign(qcV128, IRExpr_Get( OFFB_QCFLAG, Ity_V128 ));
7094 IRTemp qc64 = newTemp(Ity_I64);
7095 assign(qc64, binop(Iop_Or64, unop(Iop_V128HIto64, mkexpr(qcV128)),
7096 unop(Iop_V128to64, mkexpr(qcV128))));
7097 IRExpr* res = binop(Iop_Shl64,
7098 unop(Iop_1Uto64,
7099 binop(Iop_CmpNE64, mkexpr(qc64), mkU64(0))),
7100 mkU8(27));
7101 putIReg64orZR(tt, res);
7102 DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
7104 return True;
7106 /* ---- Cases for NZCV ----
7107 D51B42 000 Rt MSR nzcv, rT
7108 D53B42 000 Rt MRS rT, nzcv
7109 The only parts of NZCV that actually exist are bits 31:28, which
7110 are the N Z C and V bits themselves. Hence the flags thunk provides
7111 all the state we need.
7113 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
7114 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
7115 Bool toSys = INSN(21,21) == 0;
7116 UInt tt = INSN(4,0);
7117 if (toSys) {
7118 IRTemp t = newTemp(Ity_I64);
7119 assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
7120 setFlags_COPY(t);
7121 DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
7122 } else {
7123 IRTemp res = newTemp(Ity_I64);
7124 assign(res, mk_arm64g_calculate_flags_nzcv());
7125 putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
7126 DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
7128 return True;
7130 /* ---- Cases for DCZID_EL0 ----
7131 Don't support arbitrary reads and writes to this register. Just
7132 return the value 16, which indicates that the DC ZVA instruction
7133 is not permitted, so we don't have to emulate it.
7134 D5 3B 00 111 Rt MRS rT, dczid_el0
7136 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
7137 UInt tt = INSN(4,0);
7138 putIReg64orZR(tt, mkU64(1<<4));
7139 DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt));
7140 return True;
7142 /* ---- Cases for CTR_EL0 ----
7143 We just handle reads, and make up a value from the D and I line
7144 sizes in the VexArchInfo we are given, and patch in the following
7145 fields that the Foundation model gives ("natively"):
7146 CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
7147 D5 3B 00 001 Rt MRS rT, dczid_el0
7149 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
7150 UInt tt = INSN(4,0);
7151 /* Need to generate a value from dMinLine_lg2_szB and
7152 dMinLine_lg2_szB. The value in the register is in 32-bit
7153 units, so need to subtract 2 from the values in the
7154 VexArchInfo. We can assume that the values here are valid --
7155 disInstr_ARM64 checks them -- so there's no need to deal with
7156 out-of-range cases. */
7157 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
7158 && archinfo->arm64_dMinLine_lg2_szB <= 17
7159 && archinfo->arm64_iMinLine_lg2_szB >= 2
7160 && archinfo->arm64_iMinLine_lg2_szB <= 17);
7161 UInt val
7162 = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16)
7163 | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0);
7164 putIReg64orZR(tt, mkU64(val));
7165 DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt));
7166 return True;
7168 /* ---- Cases for CNTVCT_EL0 ----
7169 This is a timestamp counter of some sort. Support reads of it only
7170 by passing through to the host.
7171 D5 3B E0 010 Rt MRS Xt, cntvct_el0
7173 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE040) {
7174 UInt tt = INSN(4,0);
7175 IRTemp val = newTemp(Ity_I64);
7176 IRExpr** args = mkIRExprVec_0();
7177 IRDirty* d = unsafeIRDirty_1_N (
7178 val,
7179 0/*regparms*/,
7180 "arm64g_dirtyhelper_MRS_CNTVCT_EL0",
7181 &arm64g_dirtyhelper_MRS_CNTVCT_EL0,
7182 args
7184 /* execute the dirty call, dumping the result in val. */
7185 stmt( IRStmt_Dirty(d) );
7186 putIReg64orZR(tt, mkexpr(val));
7187 DIP("mrs %s, cntvct_el0\n", nameIReg64orZR(tt));
7188 return True;
7190 /* ---- Cases for CNTFRQ_EL0 ----
7191 This is always RO at EL0, so it's safe to pass through to the host.
7192 D5 3B E0 000 Rt MRS Xt, cntfrq_el0
7194 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE000) {
7195 UInt tt = INSN(4,0);
7196 IRTemp val = newTemp(Ity_I64);
7197 IRExpr** args = mkIRExprVec_0();
7198 IRDirty* d = unsafeIRDirty_1_N (
7199 val,
7200 0/*regparms*/,
7201 "arm64g_dirtyhelper_MRS_CNTFRQ_EL0",
7202 &arm64g_dirtyhelper_MRS_CNTFRQ_EL0,
7203 args
7205 /* execute the dirty call, dumping the result in val. */
7206 stmt( IRStmt_Dirty(d) );
7207 putIReg64orZR(tt, mkexpr(val));
7208 DIP("mrs %s, cntfrq_el0\n", nameIReg64orZR(tt));
7209 return True;
7212 /* ------------------ IC_IVAU ------------------ */
7213 /* D5 0B 75 001 Rt ic ivau, rT
7215 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
7216 /* We will always be provided with a valid iMinLine value. */
7217 vassert(archinfo->arm64_iMinLine_lg2_szB >= 2
7218 && archinfo->arm64_iMinLine_lg2_szB <= 17);
7219 /* Round the requested address, in rT, down to the start of the
7220 containing block. */
7221 UInt tt = INSN(4,0);
7222 ULong lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB;
7223 IRTemp addr = newTemp(Ity_I64);
7224 assign( addr, binop( Iop_And64,
7225 getIReg64orZR(tt),
7226 mkU64(~(lineszB - 1))) );
7227 /* Set the invalidation range, request exit-and-invalidate, with
7228 continuation at the next instruction. */
7229 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
7230 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
7231 /* be paranoid ... */
7232 stmt( IRStmt_MBE(Imbe_Fence) );
7233 putPC(mkU64( guest_PC_curr_instr + 4 ));
7234 dres->whatNext = Dis_StopHere;
7235 dres->jk_StopHere = Ijk_InvalICache;
7236 DIP("ic ivau, %s\n", nameIReg64orZR(tt));
7237 return True;
7240 /* ------------------ DC_CVAU ------------------ */
7241 /* D5 0B 7B 001 Rt dc cvau, rT
7243 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) {
7244 /* Exactly the same scheme as for IC IVAU, except we observe the
7245 dMinLine size, and request an Ijk_FlushDCache instead of
7246 Ijk_InvalICache. */
7247 /* We will always be provided with a valid dMinLine value. */
7248 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
7249 && archinfo->arm64_dMinLine_lg2_szB <= 17);
7250 /* Round the requested address, in rT, down to the start of the
7251 containing block. */
7252 UInt tt = INSN(4,0);
7253 ULong lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB;
7254 IRTemp addr = newTemp(Ity_I64);
7255 assign( addr, binop( Iop_And64,
7256 getIReg64orZR(tt),
7257 mkU64(~(lineszB - 1))) );
7258 /* Set the flush range, request exit-and-flush, with
7259 continuation at the next instruction. */
7260 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
7261 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
7262 /* be paranoid ... */
7263 stmt( IRStmt_MBE(Imbe_Fence) );
7264 putPC(mkU64( guest_PC_curr_instr + 4 ));
7265 dres->whatNext = Dis_StopHere;
7266 dres->jk_StopHere = Ijk_FlushDCache;
7267 DIP("dc cvau, %s\n", nameIReg64orZR(tt));
7268 return True;
7271 /* ------------------ ISB, DMB, DSB ------------------ */
7272 /* 31 21 11 7 6 4
7273 11010 10100 0 00 011 0011 CRm 1 01 11111 DMB opt
7274 11010 10100 0 00 011 0011 CRm 1 00 11111 DSB opt
7275 11010 10100 0 00 011 0011 CRm 1 10 11111 ISB opt
7277 if (INSN(31,22) == BITS10(1,1,0,1,0,1,0,1,0,0)
7278 && INSN(21,12) == BITS10(0,0,0,0,1,1,0,0,1,1)
7279 && INSN(7,7) == 1
7280 && INSN(6,5) <= BITS2(1,0) && INSN(4,0) == BITS5(1,1,1,1,1)) {
7281 UInt opc = INSN(6,5);
7282 UInt CRm = INSN(11,8);
7283 vassert(opc <= 2 && CRm <= 15);
7284 stmt(IRStmt_MBE(Imbe_Fence));
7285 const HChar* opNames[3]
7286 = { "dsb", "dmb", "isb" };
7287 const HChar* howNames[16]
7288 = { "#0", "oshld", "oshst", "osh", "#4", "nshld", "nshst", "nsh",
7289 "#8", "ishld", "ishst", "ish", "#12", "ld", "st", "sy" };
7290 DIP("%s %s\n", opNames[opc], howNames[CRm]);
7291 return True;
7294 /* -------------------- NOP -------------------- */
7295 if (INSN(31,0) == 0xD503201F) {
7296 DIP("nop\n");
7297 return True;
7300 /* -------------------- BRK -------------------- */
7301 /* 31 23 20 4
7302 1101 0100 001 imm16 00000 BRK #imm16
7304 if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,0)
7305 && INSN(23,21) == BITS3(0,0,1) && INSN(4,0) == BITS5(0,0,0,0,0)) {
7306 UInt imm16 = INSN(20,5);
7307 /* Request SIGTRAP and then restart of this insn. */
7308 putPC(mkU64(guest_PC_curr_instr + 0));
7309 dres->whatNext = Dis_StopHere;
7310 dres->jk_StopHere = Ijk_SigTRAP;
7311 DIP("brk #%u\n", imm16);
7312 return True;
7315 /* ------------------- YIELD ------------------- */
7316 /* 31 23 15 7
7317 1101 0101 0000 0011 0010 0000 0011 1111
7319 if (INSN(31,0) == 0xD503203F) {
7320 /* Request yield followed by continuation at the next insn. */
7321 putPC(mkU64(guest_PC_curr_instr + 4));
7322 dres->whatNext = Dis_StopHere;
7323 dres->jk_StopHere = Ijk_Yield;
7324 DIP("yield\n");
7325 return True;
7328 /* -------------------- HINT ------------------- */
7329 /* 31 23 15 11 4 3
7330 1101 0101 0000 0011 0010 imm7 1 1111
7331 Catch otherwise unhandled HINT instructions - any
7332 like YIELD which are explicitly handled should go
7333 above this case.
7335 if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,1)
7336 && INSN(23,16) == BITS8(0,0,0,0,0,0,1,1)
7337 && INSN(15,12) == BITS4(0,0,1,0)
7338 && INSN(4,0) == BITS5(1,1,1,1,1)) {
7339 UInt imm7 = INSN(11,5);
7340 DIP("hint #%u\n", imm7);
7341 return True;
7344 /* ------------------- CLREX ------------------ */
7345 /* 31 23 15 11 7
7346 1101 0101 0000 0011 0011 m 0101 1111 CLREX CRm
7347 CRm is apparently ignored.
7349 if ((INSN(31,0) & 0xFFFFF0FF) == 0xD503305F) {
7350 UInt mm = INSN(11,8);
7351 /* AFAICS, this simply cancels a (all?) reservations made by a
7352 (any?) preceding LDREX(es). Arrange to hand it through to
7353 the back end. */
7354 if (abiinfo->guest__use_fallback_LLSC) {
7355 stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(0) )); // "no transaction"
7356 } else {
7357 stmt( IRStmt_MBE(Imbe_CancelReservation) );
7359 DIP("clrex #%u\n", mm);
7360 return True;
7363 vex_printf("ARM64 front end: branch_etc\n");
7364 return False;
7365 # undef INSN
7369 /*------------------------------------------------------------*/
7370 /*--- SIMD and FP instructions: helper functions ---*/
7371 /*------------------------------------------------------------*/
7373 /* Some constructors for interleave/deinterleave expressions. */
7375 static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) {
7376 // returns a0 b0
7377 return binop(Iop_InterleaveLO64x2, mkexpr(a10), mkexpr(b10));
7380 static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) {
7381 // returns a1 b1
7382 return binop(Iop_InterleaveHI64x2, mkexpr(a10), mkexpr(b10));
7385 static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
7386 // returns a2 a0 b2 b0
7387 return binop(Iop_CatEvenLanes32x4, mkexpr(a3210), mkexpr(b3210));
7390 static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
7391 // returns a3 a1 b3 b1
7392 return binop(Iop_CatOddLanes32x4, mkexpr(a3210), mkexpr(b3210));
7395 static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) {
7396 // returns a1 b1 a0 b0
7397 return binop(Iop_InterleaveLO32x4, mkexpr(a3210), mkexpr(b3210));
7400 static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) {
7401 // returns a3 b3 a2 b2
7402 return binop(Iop_InterleaveHI32x4, mkexpr(a3210), mkexpr(b3210));
7405 static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7406 // returns a6 a4 a2 a0 b6 b4 b2 b0
7407 return binop(Iop_CatEvenLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
7410 static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7411 // returns a7 a5 a3 a1 b7 b5 b3 b1
7412 return binop(Iop_CatOddLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
7415 static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7416 // returns a3 b3 a2 b2 a1 b1 a0 b0
7417 return binop(Iop_InterleaveLO16x8, mkexpr(a76543210), mkexpr(b76543210));
7420 static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7421 // returns a7 b7 a6 b6 a5 b5 a4 b4
7422 return binop(Iop_InterleaveHI16x8, mkexpr(a76543210), mkexpr(b76543210));
7425 static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210,
7426 IRTemp bFEDCBA9876543210 ) {
7427 // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
7428 return binop(Iop_CatEvenLanes8x16, mkexpr(aFEDCBA9876543210),
7429 mkexpr(bFEDCBA9876543210));
7432 static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210,
7433 IRTemp bFEDCBA9876543210 ) {
7434 // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
7435 return binop(Iop_CatOddLanes8x16, mkexpr(aFEDCBA9876543210),
7436 mkexpr(bFEDCBA9876543210));
7439 static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210,
7440 IRTemp bFEDCBA9876543210 ) {
7441 // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
7442 return binop(Iop_InterleaveLO8x16, mkexpr(aFEDCBA9876543210),
7443 mkexpr(bFEDCBA9876543210));
7446 static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210,
7447 IRTemp bFEDCBA9876543210 ) {
7448 // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
7449 return binop(Iop_InterleaveHI8x16, mkexpr(aFEDCBA9876543210),
7450 mkexpr(bFEDCBA9876543210));
7453 /* Generate N copies of |bit| in the bottom of a ULong. */
7454 static ULong Replicate ( ULong bit, Int N )
7456 vassert(bit <= 1 && N >= 1 && N < 64);
7457 if (bit == 0) {
7458 return 0;
7459 } else {
7460 /* Careful. This won't work for N == 64. */
7461 return (1ULL << N) - 1;
7465 static ULong Replicate32x2 ( ULong bits32 )
7467 vassert(0 == (bits32 & ~0xFFFFFFFFULL));
7468 return (bits32 << 32) | bits32;
7471 static ULong Replicate16x4 ( ULong bits16 )
7473 vassert(0 == (bits16 & ~0xFFFFULL));
7474 return Replicate32x2((bits16 << 16) | bits16);
7477 static ULong Replicate8x8 ( ULong bits8 )
7479 vassert(0 == (bits8 & ~0xFFULL));
7480 return Replicate16x4((bits8 << 8) | bits8);
7483 /* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
7484 |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
7485 is 64. In the former case, the upper 32 bits of the returned value
7486 are guaranteed to be zero. */
7487 static ULong VFPExpandImm ( ULong imm8, Int N )
7489 vassert(imm8 <= 0xFF);
7490 vassert(N == 32 || N == 64);
7491 Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
7492 Int F = N - E - 1;
7493 ULong imm8_6 = (imm8 >> 6) & 1;
7494 /* sign: 1 bit */
7495 /* exp: E bits */
7496 /* frac: F bits */
7497 ULong sign = (imm8 >> 7) & 1;
7498 ULong exp = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1);
7499 ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6);
7500 vassert(sign < (1ULL << 1));
7501 vassert(exp < (1ULL << E));
7502 vassert(frac < (1ULL << F));
7503 vassert(1 + E + F == N);
7504 ULong res = (sign << (E+F)) | (exp << F) | frac;
7505 return res;
7508 /* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
7509 This might fail, as indicated by the returned Bool. Page 2530 of
7510 the manual. */
7511 static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res,
7512 UInt op, UInt cmode, UInt imm8 )
7514 vassert(op <= 1);
7515 vassert(cmode <= 15);
7516 vassert(imm8 <= 255);
7518 *res = 0; /* will overwrite iff returning True */
7520 ULong imm64 = 0;
7521 Bool testimm8 = False;
7523 switch (cmode >> 1) {
7524 case 0:
7525 testimm8 = False; imm64 = Replicate32x2(imm8); break;
7526 case 1:
7527 testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break;
7528 case 2:
7529 testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break;
7530 case 3:
7531 testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break;
7532 case 4:
7533 testimm8 = False; imm64 = Replicate16x4(imm8); break;
7534 case 5:
7535 testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break;
7536 case 6:
7537 testimm8 = True;
7538 if ((cmode & 1) == 0)
7539 imm64 = Replicate32x2((imm8 << 8) | 0xFF);
7540 else
7541 imm64 = Replicate32x2((imm8 << 16) | 0xFFFF);
7542 break;
7543 case 7:
7544 testimm8 = False;
7545 if ((cmode & 1) == 0 && op == 0)
7546 imm64 = Replicate8x8(imm8);
7547 if ((cmode & 1) == 0 && op == 1) {
7548 imm64 = 0; imm64 |= (imm8 & 0x80) ? 0xFF : 0x00;
7549 imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00;
7550 imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00;
7551 imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00;
7552 imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00;
7553 imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00;
7554 imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00;
7555 imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00;
7557 if ((cmode & 1) == 1 && op == 0) {
7558 ULong imm8_7 = (imm8 >> 7) & 1;
7559 ULong imm8_6 = (imm8 >> 6) & 1;
7560 ULong imm8_50 = imm8 & 63;
7561 ULong imm32 = (imm8_7 << (1 + 5 + 6 + 19))
7562 | ((imm8_6 ^ 1) << (5 + 6 + 19))
7563 | (Replicate(imm8_6, 5) << (6 + 19))
7564 | (imm8_50 << 19);
7565 imm64 = Replicate32x2(imm32);
7567 if ((cmode & 1) == 1 && op == 1) {
7568 // imm64 = imm8<7>:NOT(imm8<6>)
7569 // :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
7570 ULong imm8_7 = (imm8 >> 7) & 1;
7571 ULong imm8_6 = (imm8 >> 6) & 1;
7572 ULong imm8_50 = imm8 & 63;
7573 imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62)
7574 | (Replicate(imm8_6, 8) << 54)
7575 | (imm8_50 << 48);
7577 break;
7578 default:
7579 vassert(0);
7582 if (testimm8 && imm8 == 0)
7583 return False;
7585 *res = imm64;
7586 return True;
7589 /* Help a bit for decoding laneage for vector operations that can be
7590 of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
7591 and SZ bits, typically for vector floating point. */
7592 static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI, /*OUT*/IRType* tyF,
7593 /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper,
7594 /*OUT*/const HChar** arrSpec,
7595 Bool bitQ, Bool bitSZ )
7597 vassert(bitQ == True || bitQ == False);
7598 vassert(bitSZ == True || bitSZ == False);
7599 if (bitQ && bitSZ) { // 2x64
7600 if (tyI) *tyI = Ity_I64;
7601 if (tyF) *tyF = Ity_F64;
7602 if (nLanes) *nLanes = 2;
7603 if (zeroUpper) *zeroUpper = False;
7604 if (arrSpec) *arrSpec = "2d";
7605 return True;
7607 if (bitQ && !bitSZ) { // 4x32
7608 if (tyI) *tyI = Ity_I32;
7609 if (tyF) *tyF = Ity_F32;
7610 if (nLanes) *nLanes = 4;
7611 if (zeroUpper) *zeroUpper = False;
7612 if (arrSpec) *arrSpec = "4s";
7613 return True;
7615 if (!bitQ && !bitSZ) { // 2x32
7616 if (tyI) *tyI = Ity_I32;
7617 if (tyF) *tyF = Ity_F32;
7618 if (nLanes) *nLanes = 2;
7619 if (zeroUpper) *zeroUpper = True;
7620 if (arrSpec) *arrSpec = "2s";
7621 return True;
7623 // Else impliedly 1x64, which isn't allowed.
7624 return False;
7627 /* Helper for decoding laneage for shift-style vector operations
7628 that involve an immediate shift amount. */
7629 static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2,
7630 UInt immh, UInt immb )
7632 vassert(immh < (1<<4));
7633 vassert(immb < (1<<3));
7634 UInt immhb = (immh << 3) | immb;
7635 if (immh & 8) {
7636 if (shift) *shift = 128 - immhb;
7637 if (szBlg2) *szBlg2 = 3;
7638 return True;
7640 if (immh & 4) {
7641 if (shift) *shift = 64 - immhb;
7642 if (szBlg2) *szBlg2 = 2;
7643 return True;
7645 if (immh & 2) {
7646 if (shift) *shift = 32 - immhb;
7647 if (szBlg2) *szBlg2 = 1;
7648 return True;
7650 if (immh & 1) {
7651 if (shift) *shift = 16 - immhb;
7652 if (szBlg2) *szBlg2 = 0;
7653 return True;
7655 return False;
7658 /* Generate IR to fold all lanes of the V128 value in 'src' as
7659 characterised by the operator 'op', and return the result in the
7660 bottom bits of a V128, with all other bits set to zero. */
7661 static IRTemp math_FOLDV ( IRTemp src, IROp op )
7663 /* The basic idea is to use repeated applications of Iop_CatEven*
7664 and Iop_CatOdd* operators to 'src' so as to clone each lane into
7665 a complete vector. Then fold all those vectors with 'op' and
7666 zero out all but the least significant lane. */
7667 switch (op) {
7668 case Iop_Min8Sx16: case Iop_Min8Ux16:
7669 case Iop_Max8Sx16: case Iop_Max8Ux16: case Iop_Add8x16: {
7670 /* NB: temp naming here is misleading -- the naming is for 8
7671 lanes of 16 bit, whereas what is being operated on is 16
7672 lanes of 8 bits. */
7673 IRTemp x76543210 = src;
7674 IRTemp x76547654 = newTempV128();
7675 IRTemp x32103210 = newTempV128();
7676 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
7677 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
7678 IRTemp x76767676 = newTempV128();
7679 IRTemp x54545454 = newTempV128();
7680 IRTemp x32323232 = newTempV128();
7681 IRTemp x10101010 = newTempV128();
7682 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
7683 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
7684 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
7685 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
7686 IRTemp x77777777 = newTempV128();
7687 IRTemp x66666666 = newTempV128();
7688 IRTemp x55555555 = newTempV128();
7689 IRTemp x44444444 = newTempV128();
7690 IRTemp x33333333 = newTempV128();
7691 IRTemp x22222222 = newTempV128();
7692 IRTemp x11111111 = newTempV128();
7693 IRTemp x00000000 = newTempV128();
7694 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
7695 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
7696 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
7697 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
7698 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
7699 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
7700 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
7701 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
7702 /* Naming not misleading after here. */
7703 IRTemp xAllF = newTempV128();
7704 IRTemp xAllE = newTempV128();
7705 IRTemp xAllD = newTempV128();
7706 IRTemp xAllC = newTempV128();
7707 IRTemp xAllB = newTempV128();
7708 IRTemp xAllA = newTempV128();
7709 IRTemp xAll9 = newTempV128();
7710 IRTemp xAll8 = newTempV128();
7711 IRTemp xAll7 = newTempV128();
7712 IRTemp xAll6 = newTempV128();
7713 IRTemp xAll5 = newTempV128();
7714 IRTemp xAll4 = newTempV128();
7715 IRTemp xAll3 = newTempV128();
7716 IRTemp xAll2 = newTempV128();
7717 IRTemp xAll1 = newTempV128();
7718 IRTemp xAll0 = newTempV128();
7719 assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777));
7720 assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777));
7721 assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666));
7722 assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666));
7723 assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555));
7724 assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555));
7725 assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444));
7726 assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444));
7727 assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333));
7728 assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333));
7729 assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222));
7730 assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222));
7731 assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111));
7732 assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111));
7733 assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000));
7734 assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000));
7735 IRTemp maxFE = newTempV128();
7736 IRTemp maxDC = newTempV128();
7737 IRTemp maxBA = newTempV128();
7738 IRTemp max98 = newTempV128();
7739 IRTemp max76 = newTempV128();
7740 IRTemp max54 = newTempV128();
7741 IRTemp max32 = newTempV128();
7742 IRTemp max10 = newTempV128();
7743 assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE)));
7744 assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC)));
7745 assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA)));
7746 assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8)));
7747 assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6)));
7748 assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4)));
7749 assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2)));
7750 assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0)));
7751 IRTemp maxFEDC = newTempV128();
7752 IRTemp maxBA98 = newTempV128();
7753 IRTemp max7654 = newTempV128();
7754 IRTemp max3210 = newTempV128();
7755 assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC)));
7756 assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98)));
7757 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
7758 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7759 IRTemp maxFEDCBA98 = newTempV128();
7760 IRTemp max76543210 = newTempV128();
7761 assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98)));
7762 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
7763 IRTemp maxAllLanes = newTempV128();
7764 assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98),
7765 mkexpr(max76543210)));
7766 IRTemp res = newTempV128();
7767 assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes)));
7768 return res;
7770 case Iop_Min16Sx8: case Iop_Min16Ux8:
7771 case Iop_Max16Sx8: case Iop_Max16Ux8: case Iop_Add16x8: {
7772 IRTemp x76543210 = src;
7773 IRTemp x76547654 = newTempV128();
7774 IRTemp x32103210 = newTempV128();
7775 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
7776 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
7777 IRTemp x76767676 = newTempV128();
7778 IRTemp x54545454 = newTempV128();
7779 IRTemp x32323232 = newTempV128();
7780 IRTemp x10101010 = newTempV128();
7781 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
7782 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
7783 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
7784 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
7785 IRTemp x77777777 = newTempV128();
7786 IRTemp x66666666 = newTempV128();
7787 IRTemp x55555555 = newTempV128();
7788 IRTemp x44444444 = newTempV128();
7789 IRTemp x33333333 = newTempV128();
7790 IRTemp x22222222 = newTempV128();
7791 IRTemp x11111111 = newTempV128();
7792 IRTemp x00000000 = newTempV128();
7793 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
7794 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
7795 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
7796 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
7797 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
7798 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
7799 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
7800 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
7801 IRTemp max76 = newTempV128();
7802 IRTemp max54 = newTempV128();
7803 IRTemp max32 = newTempV128();
7804 IRTemp max10 = newTempV128();
7805 assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
7806 assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
7807 assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
7808 assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
7809 IRTemp max7654 = newTempV128();
7810 IRTemp max3210 = newTempV128();
7811 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
7812 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7813 IRTemp max76543210 = newTempV128();
7814 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
7815 IRTemp res = newTempV128();
7816 assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
7817 return res;
7819 case Iop_Max32Fx4: case Iop_Min32Fx4:
7820 case Iop_Min32Sx4: case Iop_Min32Ux4:
7821 case Iop_Max32Sx4: case Iop_Max32Ux4: case Iop_Add32x4: {
7822 IRTemp x3210 = src;
7823 IRTemp x3232 = newTempV128();
7824 IRTemp x1010 = newTempV128();
7825 assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
7826 assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
7827 IRTemp x3333 = newTempV128();
7828 IRTemp x2222 = newTempV128();
7829 IRTemp x1111 = newTempV128();
7830 IRTemp x0000 = newTempV128();
7831 assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
7832 assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
7833 assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
7834 assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
7835 IRTemp max32 = newTempV128();
7836 IRTemp max10 = newTempV128();
7837 assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
7838 assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
7839 IRTemp max3210 = newTempV128();
7840 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7841 IRTemp res = newTempV128();
7842 assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
7843 return res;
7845 case Iop_Add64x2: {
7846 IRTemp x10 = src;
7847 IRTemp x00 = newTempV128();
7848 IRTemp x11 = newTempV128();
7849 assign(x11, binop(Iop_InterleaveHI64x2, mkexpr(x10), mkexpr(x10)));
7850 assign(x00, binop(Iop_InterleaveLO64x2, mkexpr(x10), mkexpr(x10)));
7851 IRTemp max10 = newTempV128();
7852 assign(max10, binop(op, mkexpr(x11), mkexpr(x00)));
7853 IRTemp res = newTempV128();
7854 assign(res, unop(Iop_ZeroHI64ofV128, mkexpr(max10)));
7855 return res;
7857 default:
7858 vassert(0);
7863 /* Generate IR for TBL and TBX. This deals with the 128 bit case
7864 only. */
7865 static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
7866 IRTemp oor_values )
7868 vassert(len >= 0 && len <= 3);
7870 /* Generate some useful constants as concisely as possible. */
7871 IRTemp half15 = newTemp(Ity_I64);
7872 assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
7873 IRTemp half16 = newTemp(Ity_I64);
7874 assign(half16, mkU64(0x1010101010101010ULL));
7876 /* A zero vector */
7877 IRTemp allZero = newTempV128();
7878 assign(allZero, mkV128(0x0000));
7879 /* A vector containing 15 in each 8-bit lane */
7880 IRTemp all15 = newTempV128();
7881 assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
7882 /* A vector containing 16 in each 8-bit lane */
7883 IRTemp all16 = newTempV128();
7884 assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
7885 /* A vector containing 32 in each 8-bit lane */
7886 IRTemp all32 = newTempV128();
7887 assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
7888 /* A vector containing 48 in each 8-bit lane */
7889 IRTemp all48 = newTempV128();
7890 assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
7891 /* A vector containing 64 in each 8-bit lane */
7892 IRTemp all64 = newTempV128();
7893 assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
7895 /* Group the 16/32/48/64 vectors so as to be indexable. */
7896 IRTemp allXX[4] = { all16, all32, all48, all64 };
7898 /* Compute the result for each table vector, with zeroes in places
7899 where the index values are out of range, and OR them into the
7900 running vector. */
7901 IRTemp running_result = newTempV128();
7902 assign(running_result, mkV128(0));
7904 UInt tabent;
7905 for (tabent = 0; tabent <= len; tabent++) {
7906 vassert(tabent >= 0 && tabent < 4);
7907 IRTemp bias = newTempV128();
7908 assign(bias,
7909 mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
7910 IRTemp biased_indices = newTempV128();
7911 assign(biased_indices,
7912 binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
7913 IRTemp valid_mask = newTempV128();
7914 assign(valid_mask,
7915 binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
7916 IRTemp safe_biased_indices = newTempV128();
7917 assign(safe_biased_indices,
7918 binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
7919 IRTemp results_or_junk = newTempV128();
7920 assign(results_or_junk,
7921 binop(Iop_Perm8x16, mkexpr(tab[tabent]),
7922 mkexpr(safe_biased_indices)));
7923 IRTemp results_or_zero = newTempV128();
7924 assign(results_or_zero,
7925 binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
7926 /* And OR that into the running result. */
7927 IRTemp tmp = newTempV128();
7928 assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
7929 mkexpr(running_result)));
7930 running_result = tmp;
7933 /* So now running_result holds the overall result where the indices
7934 are in range, and zero in out-of-range lanes. Now we need to
7935 compute an overall validity mask and use this to copy in the
7936 lanes in the oor_values for out of range indices. This is
7937 unnecessary for TBL but will get folded out by iropt, so we lean
7938 on that and generate the same code for TBL and TBX here. */
7939 IRTemp overall_valid_mask = newTempV128();
7940 assign(overall_valid_mask,
7941 binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
7942 IRTemp result = newTempV128();
7943 assign(result,
7944 binop(Iop_OrV128,
7945 mkexpr(running_result),
7946 binop(Iop_AndV128,
7947 mkexpr(oor_values),
7948 unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
7949 return result;
7953 /* Let |argL| and |argR| be V128 values, and let |opI64x2toV128| be
7954 an op which takes two I64s and produces a V128. That is, a widening
7955 operator. Generate IR which applies |opI64x2toV128| to either the
7956 lower (if |is2| is False) or upper (if |is2| is True) halves of
7957 |argL| and |argR|, and return the value in a new IRTemp.
7959 static
7960 IRTemp math_BINARY_WIDENING_V128 ( Bool is2, IROp opI64x2toV128,
7961 IRExpr* argL, IRExpr* argR )
7963 IRTemp res = newTempV128();
7964 IROp slice = is2 ? Iop_V128HIto64 : Iop_V128to64;
7965 assign(res, binop(opI64x2toV128, unop(slice, argL),
7966 unop(slice, argR)));
7967 return res;
7971 /* Generate signed/unsigned absolute difference vector IR. */
7972 static
7973 IRTemp math_ABD ( Bool isU, UInt size, IRExpr* argLE, IRExpr* argRE )
7975 vassert(size <= 3);
7976 IRTemp argL = newTempV128();
7977 IRTemp argR = newTempV128();
7978 IRTemp msk = newTempV128();
7979 IRTemp res = newTempV128();
7980 assign(argL, argLE);
7981 assign(argR, argRE);
7982 assign(msk, binop(isU ? mkVecCMPGTU(size) : mkVecCMPGTS(size),
7983 mkexpr(argL), mkexpr(argR)));
7984 assign(res,
7985 binop(Iop_OrV128,
7986 binop(Iop_AndV128,
7987 binop(mkVecSUB(size), mkexpr(argL), mkexpr(argR)),
7988 mkexpr(msk)),
7989 binop(Iop_AndV128,
7990 binop(mkVecSUB(size), mkexpr(argR), mkexpr(argL)),
7991 unop(Iop_NotV128, mkexpr(msk)))));
7992 return res;
7996 /* Generate IR that takes a V128 and sign- or zero-widens
7997 either the lower or upper set of lanes to twice-as-wide,
7998 resulting in a new V128 value. */
7999 static
8000 IRTemp math_WIDEN_LO_OR_HI_LANES ( Bool zWiden, Bool fromUpperHalf,
8001 UInt sizeNarrow, IRExpr* srcE )
8003 IRTemp src = newTempV128();
8004 IRTemp res = newTempV128();
8005 assign(src, srcE);
8006 switch (sizeNarrow) {
8007 case X10:
8008 assign(res,
8009 binop(zWiden ? Iop_ShrN64x2 : Iop_SarN64x2,
8010 binop(fromUpperHalf ? Iop_InterleaveHI32x4
8011 : Iop_InterleaveLO32x4,
8012 mkexpr(src),
8013 mkexpr(src)),
8014 mkU8(32)));
8015 break;
8016 case X01:
8017 assign(res,
8018 binop(zWiden ? Iop_ShrN32x4 : Iop_SarN32x4,
8019 binop(fromUpperHalf ? Iop_InterleaveHI16x8
8020 : Iop_InterleaveLO16x8,
8021 mkexpr(src),
8022 mkexpr(src)),
8023 mkU8(16)));
8024 break;
8025 case X00:
8026 assign(res,
8027 binop(zWiden ? Iop_ShrN16x8 : Iop_SarN16x8,
8028 binop(fromUpperHalf ? Iop_InterleaveHI8x16
8029 : Iop_InterleaveLO8x16,
8030 mkexpr(src),
8031 mkexpr(src)),
8032 mkU8(8)));
8033 break;
8034 default:
8035 vassert(0);
8037 return res;
8041 /* Generate IR that takes a V128 and sign- or zero-widens
8042 either the even or odd lanes to twice-as-wide,
8043 resulting in a new V128 value. */
8044 static
8045 IRTemp math_WIDEN_EVEN_OR_ODD_LANES ( Bool zWiden, Bool fromOdd,
8046 UInt sizeNarrow, IRExpr* srcE )
8048 IRTemp src = newTempV128();
8049 IRTemp res = newTempV128();
8050 IROp opSAR = mkVecSARN(sizeNarrow+1);
8051 IROp opSHR = mkVecSHRN(sizeNarrow+1);
8052 IROp opSHL = mkVecSHLN(sizeNarrow+1);
8053 IROp opSxR = zWiden ? opSHR : opSAR;
8054 UInt amt = 0;
8055 switch (sizeNarrow) {
8056 case X10: amt = 32; break;
8057 case X01: amt = 16; break;
8058 case X00: amt = 8; break;
8059 default: vassert(0);
8061 assign(src, srcE);
8062 if (fromOdd) {
8063 assign(res, binop(opSxR, mkexpr(src), mkU8(amt)));
8064 } else {
8065 assign(res, binop(opSxR, binop(opSHL, mkexpr(src), mkU8(amt)),
8066 mkU8(amt)));
8068 return res;
8072 /* Generate IR that takes two V128s and narrows (takes lower half)
8073 of each lane, producing a single V128 value. */
8074 static
8075 IRTemp math_NARROW_LANES ( IRTemp argHi, IRTemp argLo, UInt sizeNarrow )
8077 IRTemp res = newTempV128();
8078 assign(res, binop(mkVecCATEVENLANES(sizeNarrow),
8079 mkexpr(argHi), mkexpr(argLo)));
8080 return res;
8084 /* Return a temp which holds the vector dup of the lane of width
8085 (1 << size) obtained from src[laneNo]. */
8086 static
8087 IRTemp math_DUP_VEC_ELEM ( IRExpr* src, UInt size, UInt laneNo )
8089 vassert(size <= 3);
8090 /* Normalise |laneNo| so it is of the form
8091 x000 for D, xx00 for S, xxx0 for H, and xxxx for B.
8092 This puts the bits we want to inspect at constant offsets
8093 regardless of the value of |size|.
8095 UInt ix = laneNo << size;
8096 vassert(ix <= 15);
8097 IROp ops[4] = { Iop_INVALID, Iop_INVALID, Iop_INVALID, Iop_INVALID };
8098 switch (size) {
8099 case 0: /* B */
8100 ops[0] = (ix & 1) ? Iop_CatOddLanes8x16 : Iop_CatEvenLanes8x16;
8101 /* fallthrough */
8102 case 1: /* H */
8103 ops[1] = (ix & 2) ? Iop_CatOddLanes16x8 : Iop_CatEvenLanes16x8;
8104 /* fallthrough */
8105 case 2: /* S */
8106 ops[2] = (ix & 4) ? Iop_CatOddLanes32x4 : Iop_CatEvenLanes32x4;
8107 /* fallthrough */
8108 case 3: /* D */
8109 ops[3] = (ix & 8) ? Iop_InterleaveHI64x2 : Iop_InterleaveLO64x2;
8110 break;
8111 default:
8112 vassert(0);
8114 IRTemp res = newTempV128();
8115 assign(res, src);
8116 Int i;
8117 for (i = 3; i >= 0; i--) {
8118 if (ops[i] == Iop_INVALID)
8119 break;
8120 IRTemp tmp = newTempV128();
8121 assign(tmp, binop(ops[i], mkexpr(res), mkexpr(res)));
8122 res = tmp;
8124 return res;
8128 /* Let |srcV| be a V128 value, and let |imm5| be a lane-and-size
8129 selector encoded as shown below. Return a new V128 holding the
8130 selected lane from |srcV| dup'd out to V128, and also return the
8131 lane number, log2 of the lane size in bytes, and width-character via
8132 *laneNo, *laneSzLg2 and *laneCh respectively. It may be that imm5
8133 is an invalid selector, in which case return
8134 IRTemp_INVALID, 0, 0 and '?' respectively.
8136 imm5 = xxxx1 signifies .b[xxxx]
8137 = xxx10 .h[xxx]
8138 = xx100 .s[xx]
8139 = x1000 .d[x]
8140 otherwise invalid
8142 static
8143 IRTemp handle_DUP_VEC_ELEM ( /*OUT*/UInt* laneNo,
8144 /*OUT*/UInt* laneSzLg2, /*OUT*/HChar* laneCh,
8145 IRExpr* srcV, UInt imm5 )
8147 *laneNo = 0;
8148 *laneSzLg2 = 0;
8149 *laneCh = '?';
8151 if (imm5 & 1) {
8152 *laneNo = (imm5 >> 1) & 15;
8153 *laneSzLg2 = 0;
8154 *laneCh = 'b';
8156 else if (imm5 & 2) {
8157 *laneNo = (imm5 >> 2) & 7;
8158 *laneSzLg2 = 1;
8159 *laneCh = 'h';
8161 else if (imm5 & 4) {
8162 *laneNo = (imm5 >> 3) & 3;
8163 *laneSzLg2 = 2;
8164 *laneCh = 's';
8166 else if (imm5 & 8) {
8167 *laneNo = (imm5 >> 4) & 1;
8168 *laneSzLg2 = 3;
8169 *laneCh = 'd';
8171 else {
8172 /* invalid */
8173 return IRTemp_INVALID;
8176 return math_DUP_VEC_ELEM(srcV, *laneSzLg2, *laneNo);
8180 /* Clone |imm| to every lane of a V128, with lane size log2 of |size|. */
8181 static
8182 IRTemp math_VEC_DUP_IMM ( UInt size, ULong imm )
8184 IRType ty = Ity_INVALID;
8185 IRTemp rcS = IRTemp_INVALID;
8186 switch (size) {
8187 case X01:
8188 vassert(imm <= 0xFFFFULL);
8189 ty = Ity_I16;
8190 rcS = newTemp(ty); assign(rcS, mkU16( (UShort)imm ));
8191 break;
8192 case X10:
8193 vassert(imm <= 0xFFFFFFFFULL);
8194 ty = Ity_I32;
8195 rcS = newTemp(ty); assign(rcS, mkU32( (UInt)imm ));
8196 break;
8197 case X11:
8198 ty = Ity_I64;
8199 rcS = newTemp(ty); assign(rcS, mkU64(imm)); break;
8200 default:
8201 vassert(0);
8203 IRTemp rcV = math_DUP_TO_V128(rcS, ty);
8204 return rcV;
8208 /* Let |new64| be a V128 in which only the lower 64 bits are interesting,
8209 and the upper can contain any value -- it is ignored. If |is2| is False,
8210 generate IR to put |new64| in the lower half of vector reg |dd| and zero
8211 the upper half. If |is2| is True, generate IR to put |new64| in the upper
8212 half of vector reg |dd| and leave the lower half unchanged. This
8213 simulates the behaviour of the "foo/foo2" instructions in which the
8214 destination is half the width of sources, for example addhn/addhn2.
8216 static
8217 void putLO64andZUorPutHI64 ( Bool is2, UInt dd, IRTemp new64 )
8219 if (is2) {
8220 /* Get the old contents of Vdd, zero the upper half, and replace
8221 it with 'x'. */
8222 IRTemp t_zero_oldLO = newTempV128();
8223 assign(t_zero_oldLO, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
8224 IRTemp t_newHI_zero = newTempV128();
8225 assign(t_newHI_zero, binop(Iop_InterleaveLO64x2, mkexpr(new64),
8226 mkV128(0x0000)));
8227 IRTemp res = newTempV128();
8228 assign(res, binop(Iop_OrV128, mkexpr(t_zero_oldLO),
8229 mkexpr(t_newHI_zero)));
8230 putQReg128(dd, mkexpr(res));
8231 } else {
8232 /* This is simple. */
8233 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(new64)));
8238 /* Compute vector SQABS at lane size |size| for |srcE|, returning
8239 the q result in |*qabs| and the normal result in |*nabs|. */
8240 static
8241 void math_SQABS ( /*OUT*/IRTemp* qabs, /*OUT*/IRTemp* nabs,
8242 IRExpr* srcE, UInt size )
8244 IRTemp src, mask, maskn, nsub, qsub;
8245 src = mask = maskn = nsub = qsub = IRTemp_INVALID;
8246 newTempsV128_7(&src, &mask, &maskn, &nsub, &qsub, nabs, qabs);
8247 assign(src, srcE);
8248 assign(mask, binop(mkVecCMPGTS(size), mkV128(0x0000), mkexpr(src)));
8249 assign(maskn, unop(Iop_NotV128, mkexpr(mask)));
8250 assign(nsub, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
8251 assign(qsub, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
8252 assign(*nabs, binop(Iop_OrV128,
8253 binop(Iop_AndV128, mkexpr(nsub), mkexpr(mask)),
8254 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
8255 assign(*qabs, binop(Iop_OrV128,
8256 binop(Iop_AndV128, mkexpr(qsub), mkexpr(mask)),
8257 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
8261 /* Compute vector SQNEG at lane size |size| for |srcE|, returning
8262 the q result in |*qneg| and the normal result in |*nneg|. */
8263 static
8264 void math_SQNEG ( /*OUT*/IRTemp* qneg, /*OUT*/IRTemp* nneg,
8265 IRExpr* srcE, UInt size )
8267 IRTemp src = IRTemp_INVALID;
8268 newTempsV128_3(&src, nneg, qneg);
8269 assign(src, srcE);
8270 assign(*nneg, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
8271 assign(*qneg, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
8275 /* Zero all except the least significant lane of |srcE|, where |size|
8276 indicates the lane size in the usual way. */
8277 static IRTemp math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size, IRExpr* srcE )
8279 vassert(size < 4);
8280 IRTemp t = newTempV128();
8281 assign(t, unop(mkVecZEROHIxxOFV128(size), srcE));
8282 return t;
8286 /* Generate IR to compute vector widening MULL from either the lower
8287 (is2==False) or upper (is2==True) halves of vecN and vecM. The
8288 widening multiplies are unsigned when isU==True and signed when
8289 isU==False. |size| is the narrow lane size indication. Optionally,
8290 the product may be added to or subtracted from vecD, at the wide lane
8291 size. This happens when |mas| is 'a' (add) or 's' (sub). When |mas|
8292 is 'm' (only multiply) then the accumulate part does not happen, and
8293 |vecD| is expected to == IRTemp_INVALID.
8295 Only size==0 (h_b_b), size==1 (s_h_h) and size==2 (d_s_s) variants
8296 are allowed. The result is returned in a new IRTemp, which is
8297 returned in *res. */
8298 static
8299 void math_MULL_ACC ( /*OUT*/IRTemp* res,
8300 Bool is2, Bool isU, UInt size, HChar mas,
8301 IRTemp vecN, IRTemp vecM, IRTemp vecD )
8303 vassert(res && *res == IRTemp_INVALID);
8304 vassert(size <= 2);
8305 vassert(mas == 'm' || mas == 'a' || mas == 's');
8306 if (mas == 'm') vassert(vecD == IRTemp_INVALID);
8307 IROp mulOp = isU ? mkVecMULLU(size) : mkVecMULLS(size);
8308 IROp accOp = (mas == 'a') ? mkVecADD(size+1)
8309 : (mas == 's' ? mkVecSUB(size+1)
8310 : Iop_INVALID);
8311 IRTemp mul = math_BINARY_WIDENING_V128(is2, mulOp,
8312 mkexpr(vecN), mkexpr(vecM));
8313 *res = newTempV128();
8314 assign(*res, mas == 'm' ? mkexpr(mul)
8315 : binop(accOp, mkexpr(vecD), mkexpr(mul)));
8319 /* Same as math_MULL_ACC, except the multiply is signed widening,
8320 the multiplied value is then doubled, before being added to or
8321 subtracted from the accumulated value. And everything is
8322 saturated. In all cases, saturation residuals are returned
8323 via (sat1q, sat1n), and in the accumulate cases,
8324 via (sat2q, sat2n) too. All results are returned in new temporaries.
8325 In the no-accumulate case, *sat2q and *sat2n are never instantiated,
8326 so the caller can tell this has happened. */
8327 static
8328 void math_SQDMULL_ACC ( /*OUT*/IRTemp* res,
8329 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
8330 /*OUT*/IRTemp* sat2q, /*OUT*/IRTemp* sat2n,
8331 Bool is2, UInt size, HChar mas,
8332 IRTemp vecN, IRTemp vecM, IRTemp vecD )
8334 vassert(size <= 2);
8335 vassert(mas == 'm' || mas == 'a' || mas == 's');
8336 /* Compute
8337 sat1q = vecN.D[is2] *sq vecM.d[is2] *q 2
8338 sat1n = vecN.D[is2] *s vecM.d[is2] * 2
8339 IOW take either the low or high halves of vecN and vecM, signed widen,
8340 multiply, double that, and signedly saturate. Also compute the same
8341 but without saturation.
8343 vassert(sat2q && *sat2q == IRTemp_INVALID);
8344 vassert(sat2n && *sat2n == IRTemp_INVALID);
8345 newTempsV128_3(sat1q, sat1n, res);
8346 IRTemp tq = math_BINARY_WIDENING_V128(is2, mkVecQDMULLS(size),
8347 mkexpr(vecN), mkexpr(vecM));
8348 IRTemp tn = math_BINARY_WIDENING_V128(is2, mkVecMULLS(size),
8349 mkexpr(vecN), mkexpr(vecM));
8350 assign(*sat1q, mkexpr(tq));
8351 assign(*sat1n, binop(mkVecADD(size+1), mkexpr(tn), mkexpr(tn)));
8353 /* If there is no accumulation, the final result is sat1q,
8354 and there's no assignment to sat2q or sat2n. */
8355 if (mas == 'm') {
8356 assign(*res, mkexpr(*sat1q));
8357 return;
8360 /* Compute
8361 sat2q = vecD +sq/-sq sat1q
8362 sat2n = vecD +/- sat1n
8363 result = sat2q
8365 newTempsV128_2(sat2q, sat2n);
8366 assign(*sat2q, binop(mas == 'a' ? mkVecQADDS(size+1) : mkVecQSUBS(size+1),
8367 mkexpr(vecD), mkexpr(*sat1q)));
8368 assign(*sat2n, binop(mas == 'a' ? mkVecADD(size+1) : mkVecSUB(size+1),
8369 mkexpr(vecD), mkexpr(*sat1n)));
8370 assign(*res, mkexpr(*sat2q));
8374 /* Generate IR for widening signed vector multiplies. The operands
8375 have their lane width signedly widened, and they are then multiplied
8376 at the wider width, returning results in two new IRTemps. */
8377 static
8378 void math_MULLS ( /*OUT*/IRTemp* resHI, /*OUT*/IRTemp* resLO,
8379 UInt sizeNarrow, IRTemp argL, IRTemp argR )
8381 vassert(sizeNarrow <= 2);
8382 newTempsV128_2(resHI, resLO);
8383 IRTemp argLhi = newTemp(Ity_I64);
8384 IRTemp argLlo = newTemp(Ity_I64);
8385 IRTemp argRhi = newTemp(Ity_I64);
8386 IRTemp argRlo = newTemp(Ity_I64);
8387 assign(argLhi, unop(Iop_V128HIto64, mkexpr(argL)));
8388 assign(argLlo, unop(Iop_V128to64, mkexpr(argL)));
8389 assign(argRhi, unop(Iop_V128HIto64, mkexpr(argR)));
8390 assign(argRlo, unop(Iop_V128to64, mkexpr(argR)));
8391 IROp opMulls = mkVecMULLS(sizeNarrow);
8392 assign(*resHI, binop(opMulls, mkexpr(argLhi), mkexpr(argRhi)));
8393 assign(*resLO, binop(opMulls, mkexpr(argLlo), mkexpr(argRlo)));
8397 /* Generate IR for SQDMULH and SQRDMULH: signedly wideningly multiply,
8398 double that, possibly add a rounding constant (R variants), and take
8399 the high half. */
8400 static
8401 void math_SQDMULH ( /*OUT*/IRTemp* res,
8402 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
8403 Bool isR, UInt size, IRTemp vN, IRTemp vM )
8405 vassert(size == X01 || size == X10); /* s or h only */
8407 newTempsV128_3(res, sat1q, sat1n);
8409 IRTemp mullsHI = IRTemp_INVALID, mullsLO = IRTemp_INVALID;
8410 math_MULLS(&mullsHI, &mullsLO, size, vN, vM);
8412 IRTemp addWide = mkVecADD(size+1);
8414 if (isR) {
8415 assign(*sat1q, binop(mkVecQRDMULHIS(size), mkexpr(vN), mkexpr(vM)));
8417 Int rcShift = size == X01 ? 15 : 31;
8418 IRTemp roundConst = math_VEC_DUP_IMM(size+1, 1ULL << rcShift);
8419 assign(*sat1n,
8420 binop(mkVecCATODDLANES(size),
8421 binop(addWide,
8422 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
8423 mkexpr(roundConst)),
8424 binop(addWide,
8425 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO)),
8426 mkexpr(roundConst))));
8427 } else {
8428 assign(*sat1q, binop(mkVecQDMULHIS(size), mkexpr(vN), mkexpr(vM)));
8430 assign(*sat1n,
8431 binop(mkVecCATODDLANES(size),
8432 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
8433 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO))));
8436 assign(*res, mkexpr(*sat1q));
8439 /* Generate IR for SQRDMLAH and SQRDMLSH: signedly wideningly multiply,
8440 double, add a rounding constant, take the high half and accumulate. */
8441 static
8442 void math_SQRDMLAH ( /*OUT*/IRTemp* res, /*OUT*/IRTemp* res_nosat, Bool isAdd,
8443 UInt size, IRTemp vD, IRTemp vN, IRTemp vM )
8445 vassert(size == X01 || size == X10); /* s or h only */
8447 /* SQRDMLAH = SQADD(A, SQRDMULH(B, C)) */
8449 IRTemp mul, mul_nosat, dummy;
8450 mul = mul_nosat = dummy = IRTemp_INVALID;
8451 math_SQDMULH(&mul, &dummy, &mul_nosat, True/*R*/, size, vN, vM);
8453 IROp op = isAdd ? mkVecADD(size) : mkVecSUB(size);
8454 IROp qop = isAdd ? mkVecQADDS(size) : mkVecQSUBS(size);
8455 newTempsV128_2(res, res_nosat);
8456 assign(*res, binop(qop, mkexpr(vD), mkexpr(mul)));
8457 assign(*res_nosat, binop(op, mkexpr(vD), mkexpr(mul_nosat)));
8461 /* Generate IR for SQSHL, UQSHL, SQSHLU by imm. Put the result in
8462 a new temp in *res, and the Q difference pair in new temps in
8463 *qDiff1 and *qDiff2 respectively. |nm| denotes which of the
8464 three operations it is. */
8465 static
8466 void math_QSHL_IMM ( /*OUT*/IRTemp* res,
8467 /*OUT*/IRTemp* qDiff1, /*OUT*/IRTemp* qDiff2,
8468 IRTemp src, UInt size, UInt shift, const HChar* nm )
8470 vassert(size <= 3);
8471 UInt laneBits = 8 << size;
8472 vassert(shift < laneBits);
8473 newTempsV128_3(res, qDiff1, qDiff2);
8474 IRTemp z128 = newTempV128();
8475 assign(z128, mkV128(0x0000));
8477 /* UQSHL */
8478 if (vex_streq(nm, "uqshl")) {
8479 IROp qop = mkVecQSHLNSATUU(size);
8480 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
8481 if (shift == 0) {
8482 /* No shift means no saturation. */
8483 assign(*qDiff1, mkexpr(z128));
8484 assign(*qDiff2, mkexpr(z128));
8485 } else {
8486 /* Saturation has occurred if any of the shifted-out bits are
8487 nonzero. We get the shifted-out bits by right-shifting the
8488 original value. */
8489 UInt rshift = laneBits - shift;
8490 vassert(rshift >= 1 && rshift < laneBits);
8491 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
8492 assign(*qDiff2, mkexpr(z128));
8494 return;
8497 /* SQSHL */
8498 if (vex_streq(nm, "sqshl")) {
8499 IROp qop = mkVecQSHLNSATSS(size);
8500 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
8501 if (shift == 0) {
8502 /* No shift means no saturation. */
8503 assign(*qDiff1, mkexpr(z128));
8504 assign(*qDiff2, mkexpr(z128));
8505 } else {
8506 /* Saturation has occurred if any of the shifted-out bits are
8507 different from the top bit of the original value. */
8508 UInt rshift = laneBits - 1 - shift;
8509 vassert(rshift >= 0 && rshift < laneBits-1);
8510 /* qDiff1 is the shifted out bits, and the top bit of the original
8511 value, preceded by zeroes. */
8512 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
8513 /* qDiff2 is the top bit of the original value, cloned the
8514 correct number of times. */
8515 assign(*qDiff2, binop(mkVecSHRN(size),
8516 binop(mkVecSARN(size), mkexpr(src),
8517 mkU8(laneBits-1)),
8518 mkU8(rshift)));
8519 /* This also succeeds in comparing the top bit of the original
8520 value to itself, which is a bit stupid, but not wrong. */
8522 return;
8525 /* SQSHLU */
8526 if (vex_streq(nm, "sqshlu")) {
8527 IROp qop = mkVecQSHLNSATSU(size);
8528 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
8529 if (shift == 0) {
8530 /* If there's no shift, saturation depends on the top bit
8531 of the source. */
8532 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(laneBits-1)));
8533 assign(*qDiff2, mkexpr(z128));
8534 } else {
8535 /* Saturation has occurred if any of the shifted-out bits are
8536 nonzero. We get the shifted-out bits by right-shifting the
8537 original value. */
8538 UInt rshift = laneBits - shift;
8539 vassert(rshift >= 1 && rshift < laneBits);
8540 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
8541 assign(*qDiff2, mkexpr(z128));
8543 return;
8546 vassert(0);
8550 /* Generate IR to do SRHADD and URHADD. */
8551 static
8552 IRTemp math_RHADD ( UInt size, Bool isU, IRTemp aa, IRTemp bb )
8554 /* Generate this:
8555 (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1)
8557 vassert(size <= 3);
8558 IROp opSHR = isU ? mkVecSHRN(size) : mkVecSARN(size);
8559 IROp opADD = mkVecADD(size);
8560 /* The only tricky bit is to generate the correct vector 1 constant. */
8561 const ULong ones64[4]
8562 = { 0x0101010101010101ULL, 0x0001000100010001ULL,
8563 0x0000000100000001ULL, 0x0000000000000001ULL };
8564 IRTemp imm64 = newTemp(Ity_I64);
8565 assign(imm64, mkU64(ones64[size]));
8566 IRTemp vecOne = newTempV128();
8567 assign(vecOne, binop(Iop_64HLtoV128, mkexpr(imm64), mkexpr(imm64)));
8568 IRTemp scaOne = newTemp(Ity_I8);
8569 assign(scaOne, mkU8(1));
8570 IRTemp res = newTempV128();
8571 assign(res,
8572 binop(opADD,
8573 binop(opSHR, mkexpr(aa), mkexpr(scaOne)),
8574 binop(opADD,
8575 binop(opSHR, mkexpr(bb), mkexpr(scaOne)),
8576 binop(opSHR,
8577 binop(opADD,
8578 binop(opADD,
8579 binop(Iop_AndV128, mkexpr(aa),
8580 mkexpr(vecOne)),
8581 binop(Iop_AndV128, mkexpr(bb),
8582 mkexpr(vecOne))
8584 mkexpr(vecOne)
8586 mkexpr(scaOne)
8591 return res;
8595 /* QCFLAG tracks the SIMD sticky saturation status. Update the status
8596 thusly: if, after application of |opZHI| to both |qres| and |nres|,
8597 they have the same value, leave QCFLAG unchanged. Otherwise, set it
8598 (implicitly) to 1. |opZHI| may only be one of the Iop_ZeroHIxxofV128
8599 operators, or Iop_INVALID, in which case |qres| and |nres| are used
8600 unmodified. The presence |opZHI| means this function can be used to
8601 generate QCFLAG update code for both scalar and vector SIMD operations.
8603 static
8604 void updateQCFLAGwithDifferenceZHI ( IRTemp qres, IRTemp nres, IROp opZHI )
8606 IRTemp diff = newTempV128();
8607 IRTemp oldQCFLAG = newTempV128();
8608 IRTemp newQCFLAG = newTempV128();
8609 if (opZHI == Iop_INVALID) {
8610 assign(diff, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres)));
8611 } else {
8612 vassert(opZHI == Iop_ZeroHI64ofV128
8613 || opZHI == Iop_ZeroHI96ofV128 || opZHI == Iop_ZeroHI112ofV128);
8614 assign(diff, unop(opZHI, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres))));
8616 assign(oldQCFLAG, IRExpr_Get(OFFB_QCFLAG, Ity_V128));
8617 assign(newQCFLAG, binop(Iop_OrV128, mkexpr(oldQCFLAG), mkexpr(diff)));
8618 stmt(IRStmt_Put(OFFB_QCFLAG, mkexpr(newQCFLAG)));
8622 /* A variant of updateQCFLAGwithDifferenceZHI in which |qres| and |nres|
8623 are used unmodified, hence suitable for QCFLAG updates for whole-vector
8624 operations. */
8625 static
8626 void updateQCFLAGwithDifference ( IRTemp qres, IRTemp nres )
8628 updateQCFLAGwithDifferenceZHI(qres, nres, Iop_INVALID);
8632 /* Generate IR to rearrange two vector values in a way which is useful
8633 for doing S/D add-pair etc operations. There are 3 cases:
8635 2d: [m1 m0] [n1 n0] --> [m1 n1] [m0 n0]
8637 4s: [m3 m2 m1 m0] [n3 n2 n1 n0] --> [m3 m1 n3 n1] [m2 m0 n2 n0]
8639 2s: [m2 m2 m1 m0] [n3 n2 n1 n0] --> [0 0 m1 n1] [0 0 m0 n0]
8641 The cases are distinguished as follows:
8642 isD == True, bitQ == 1 => 2d
8643 isD == False, bitQ == 1 => 4s
8644 isD == False, bitQ == 0 => 2s
8646 static
8647 void math_REARRANGE_FOR_FLOATING_PAIRWISE (
8648 /*OUT*/IRTemp* rearrL, /*OUT*/IRTemp* rearrR,
8649 IRTemp vecM, IRTemp vecN, Bool isD, UInt bitQ
8652 vassert(rearrL && *rearrL == IRTemp_INVALID);
8653 vassert(rearrR && *rearrR == IRTemp_INVALID);
8654 *rearrL = newTempV128();
8655 *rearrR = newTempV128();
8656 if (isD) {
8657 // 2d case
8658 vassert(bitQ == 1);
8659 assign(*rearrL, binop(Iop_InterleaveHI64x2, mkexpr(vecM), mkexpr(vecN)));
8660 assign(*rearrR, binop(Iop_InterleaveLO64x2, mkexpr(vecM), mkexpr(vecN)));
8662 else if (!isD && bitQ == 1) {
8663 // 4s case
8664 assign(*rearrL, binop(Iop_CatOddLanes32x4, mkexpr(vecM), mkexpr(vecN)));
8665 assign(*rearrR, binop(Iop_CatEvenLanes32x4, mkexpr(vecM), mkexpr(vecN)));
8666 } else {
8667 // 2s case
8668 vassert(!isD && bitQ == 0);
8669 IRTemp m1n1m0n0 = newTempV128();
8670 IRTemp m0n0m1n1 = newTempV128();
8671 assign(m1n1m0n0, binop(Iop_InterleaveLO32x4,
8672 mkexpr(vecM), mkexpr(vecN)));
8673 assign(m0n0m1n1, triop(Iop_SliceV128,
8674 mkexpr(m1n1m0n0), mkexpr(m1n1m0n0), mkU8(8)));
8675 assign(*rearrL, unop(Iop_ZeroHI64ofV128, mkexpr(m1n1m0n0)));
8676 assign(*rearrR, unop(Iop_ZeroHI64ofV128, mkexpr(m0n0m1n1)));
8681 /* Returns 2.0 ^ (-n) for n in 1 .. 64 */
8682 static Double two_to_the_minus ( Int n )
8684 if (n == 1) return 0.5;
8685 vassert(n >= 2 && n <= 64);
8686 Int half = n / 2;
8687 return two_to_the_minus(half) * two_to_the_minus(n - half);
8691 /* Returns 2.0 ^ n for n in 1 .. 64 */
8692 static Double two_to_the_plus ( Int n )
8694 if (n == 1) return 2.0;
8695 vassert(n >= 2 && n <= 64);
8696 Int half = n / 2;
8697 return two_to_the_plus(half) * two_to_the_plus(n - half);
8701 /*------------------------------------------------------------*/
8702 /*--- SIMD and FP instructions ---*/
8703 /*------------------------------------------------------------*/
8705 static
8706 Bool dis_AdvSIMD_EXT(/*MB_OUT*/DisResult* dres, UInt insn)
8708 /* 31 29 23 21 20 15 14 10 9 4
8709 0 q 101110 op2 0 m 0 imm4 0 n d
8710 Decode fields: op2
8712 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8713 if (INSN(31,31) != 0
8714 || INSN(29,24) != BITS6(1,0,1,1,1,0)
8715 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) {
8716 return False;
8718 UInt bitQ = INSN(30,30);
8719 UInt op2 = INSN(23,22);
8720 UInt mm = INSN(20,16);
8721 UInt imm4 = INSN(14,11);
8722 UInt nn = INSN(9,5);
8723 UInt dd = INSN(4,0);
8725 if (op2 == BITS2(0,0)) {
8726 /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */
8727 IRTemp sHi = newTempV128();
8728 IRTemp sLo = newTempV128();
8729 IRTemp res = newTempV128();
8730 assign(sHi, getQReg128(mm));
8731 assign(sLo, getQReg128(nn));
8732 if (bitQ == 1) {
8733 if (imm4 == 0) {
8734 assign(res, mkexpr(sLo));
8735 } else {
8736 vassert(imm4 >= 1 && imm4 <= 15);
8737 assign(res, triop(Iop_SliceV128,
8738 mkexpr(sHi), mkexpr(sLo), mkU8(imm4)));
8740 putQReg128(dd, mkexpr(res));
8741 DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4);
8742 } else {
8743 if (imm4 >= 8) return False;
8744 if (imm4 == 0) {
8745 assign(res, mkexpr(sLo));
8746 } else {
8747 vassert(imm4 >= 1 && imm4 <= 7);
8748 IRTemp hi64lo64 = newTempV128();
8749 assign(hi64lo64, binop(Iop_InterleaveLO64x2,
8750 mkexpr(sHi), mkexpr(sLo)));
8751 assign(res, triop(Iop_SliceV128,
8752 mkexpr(hi64lo64), mkexpr(hi64lo64), mkU8(imm4)));
8754 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
8755 DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4);
8757 return True;
8760 return False;
8761 # undef INSN
8765 static
8766 Bool dis_AdvSIMD_TBL_TBX(/*MB_OUT*/DisResult* dres, UInt insn)
8768 /* 31 29 23 21 20 15 14 12 11 9 4
8769 0 q 001110 op2 0 m 0 len op 00 n d
8770 Decode fields: op2,len,op
8772 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8773 if (INSN(31,31) != 0
8774 || INSN(29,24) != BITS6(0,0,1,1,1,0)
8775 || INSN(21,21) != 0
8776 || INSN(15,15) != 0
8777 || INSN(11,10) != BITS2(0,0)) {
8778 return False;
8780 UInt bitQ = INSN(30,30);
8781 UInt op2 = INSN(23,22);
8782 UInt mm = INSN(20,16);
8783 UInt len = INSN(14,13);
8784 UInt bitOP = INSN(12,12);
8785 UInt nn = INSN(9,5);
8786 UInt dd = INSN(4,0);
8788 if (op2 == X00) {
8789 /* -------- 00,xx,0 TBL, xx register table -------- */
8790 /* -------- 00,xx,1 TBX, xx register table -------- */
8791 /* 31 28 20 15 14 12 9 4
8792 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8793 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8794 where Ta = 16b(q=1) or 8b(q=0)
8796 Bool isTBX = bitOP == 1;
8797 /* The out-of-range values to use. */
8798 IRTemp oor_values = newTempV128();
8799 assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
8800 /* src value */
8801 IRTemp src = newTempV128();
8802 assign(src, getQReg128(mm));
8803 /* The table values */
8804 IRTemp tab[4];
8805 UInt i;
8806 for (i = 0; i <= len; i++) {
8807 vassert(i < 4);
8808 tab[i] = newTempV128();
8809 assign(tab[i], getQReg128((nn + i) % 32));
8811 IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
8812 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8813 const HChar* Ta = bitQ ==1 ? "16b" : "8b";
8814 const HChar* nm = isTBX ? "tbx" : "tbl";
8815 DIP("%s %s.%s, {v%u.16b .. v%u.16b}, %s.%s\n",
8816 nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
8817 return True;
8820 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8821 return False;
8822 # undef INSN
8826 static
8827 Bool dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult* dres, UInt insn)
8829 /* 31 29 23 21 20 15 14 11 9 4
8830 0 q 001110 size 0 m 0 opcode 10 n d
8831 Decode fields: opcode
8833 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8834 if (INSN(31,31) != 0
8835 || INSN(29,24) != BITS6(0,0,1,1,1,0)
8836 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(11,10) != BITS2(1,0)) {
8837 return False;
8839 UInt bitQ = INSN(30,30);
8840 UInt size = INSN(23,22);
8841 UInt mm = INSN(20,16);
8842 UInt opcode = INSN(14,12);
8843 UInt nn = INSN(9,5);
8844 UInt dd = INSN(4,0);
8846 if (opcode == BITS3(0,0,1) || opcode == BITS3(1,0,1)) {
8847 /* -------- 001 UZP1 std7_std7_std7 -------- */
8848 /* -------- 101 UZP2 std7_std7_std7 -------- */
8849 if (bitQ == 0 && size == X11) return False; // implied 1d case
8850 Bool isUZP1 = opcode == BITS3(0,0,1);
8851 IROp op = isUZP1 ? mkVecCATEVENLANES(size)
8852 : mkVecCATODDLANES(size);
8853 IRTemp preL = newTempV128();
8854 IRTemp preR = newTempV128();
8855 IRTemp res = newTempV128();
8856 if (bitQ == 0) {
8857 assign(preL, binop(Iop_InterleaveLO64x2, getQReg128(mm),
8858 getQReg128(nn)));
8859 assign(preR, mkexpr(preL));
8860 } else {
8861 assign(preL, getQReg128(mm));
8862 assign(preR, getQReg128(nn));
8864 assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
8865 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8866 const HChar* nm = isUZP1 ? "uzp1" : "uzp2";
8867 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8868 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8869 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8870 return True;
8873 if (opcode == BITS3(0,1,0) || opcode == BITS3(1,1,0)) {
8874 /* -------- 010 TRN1 std7_std7_std7 -------- */
8875 /* -------- 110 TRN2 std7_std7_std7 -------- */
8876 if (bitQ == 0 && size == X11) return False; // implied 1d case
8877 Bool isTRN1 = opcode == BITS3(0,1,0);
8878 IROp op1 = isTRN1 ? mkVecCATEVENLANES(size)
8879 : mkVecCATODDLANES(size);
8880 IROp op2 = mkVecINTERLEAVEHI(size);
8881 IRTemp srcM = newTempV128();
8882 IRTemp srcN = newTempV128();
8883 IRTemp res = newTempV128();
8884 assign(srcM, getQReg128(mm));
8885 assign(srcN, getQReg128(nn));
8886 assign(res, binop(op2, binop(op1, mkexpr(srcM), mkexpr(srcM)),
8887 binop(op1, mkexpr(srcN), mkexpr(srcN))));
8888 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8889 const HChar* nm = isTRN1 ? "trn1" : "trn2";
8890 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8891 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8892 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8893 return True;
8896 if (opcode == BITS3(0,1,1) || opcode == BITS3(1,1,1)) {
8897 /* -------- 011 ZIP1 std7_std7_std7 -------- */
8898 /* -------- 111 ZIP2 std7_std7_std7 -------- */
8899 if (bitQ == 0 && size == X11) return False; // implied 1d case
8900 Bool isZIP1 = opcode == BITS3(0,1,1);
8901 IROp op = isZIP1 ? mkVecINTERLEAVELO(size)
8902 : mkVecINTERLEAVEHI(size);
8903 IRTemp preL = newTempV128();
8904 IRTemp preR = newTempV128();
8905 IRTemp res = newTempV128();
8906 if (bitQ == 0 && !isZIP1) {
8907 IRTemp z128 = newTempV128();
8908 assign(z128, mkV128(0x0000));
8909 // preL = Vm shifted left 32 bits
8910 // preR = Vn shifted left 32 bits
8911 assign(preL, triop(Iop_SliceV128,
8912 getQReg128(mm), mkexpr(z128), mkU8(12)));
8913 assign(preR, triop(Iop_SliceV128,
8914 getQReg128(nn), mkexpr(z128), mkU8(12)));
8916 } else {
8917 assign(preL, getQReg128(mm));
8918 assign(preR, getQReg128(nn));
8920 assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
8921 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8922 const HChar* nm = isZIP1 ? "zip1" : "zip2";
8923 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8924 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8925 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8926 return True;
8929 return False;
8930 # undef INSN
8934 static
8935 Bool dis_AdvSIMD_across_lanes(/*MB_OUT*/DisResult* dres, UInt insn)
8937 /* 31 28 23 21 16 11 9 4
8938 0 q u 01110 size 11000 opcode 10 n d
8939 Decode fields: u,size,opcode
8941 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8942 if (INSN(31,31) != 0
8943 || INSN(28,24) != BITS5(0,1,1,1,0)
8944 || INSN(21,17) != BITS5(1,1,0,0,0) || INSN(11,10) != BITS2(1,0)) {
8945 return False;
8947 UInt bitQ = INSN(30,30);
8948 UInt bitU = INSN(29,29);
8949 UInt size = INSN(23,22);
8950 UInt opcode = INSN(16,12);
8951 UInt nn = INSN(9,5);
8952 UInt dd = INSN(4,0);
8954 if (opcode == BITS5(0,0,0,1,1)) {
8955 /* -------- 0,xx,00011 SADDLV -------- */
8956 /* -------- 1,xx,00011 UADDLV -------- */
8957 /* size is the narrow size */
8958 if (size == X11 || (size == X10 && bitQ == 0)) return False;
8959 Bool isU = bitU == 1;
8960 IRTemp src = newTempV128();
8961 assign(src, getQReg128(nn));
8962 /* The basic plan is to widen the lower half, and if Q = 1,
8963 the upper half too. Add them together (if Q = 1), and in
8964 either case fold with add at twice the lane width.
8966 IRExpr* widened
8967 = mkexpr(math_WIDEN_LO_OR_HI_LANES(
8968 isU, False/*!fromUpperHalf*/, size, mkexpr(src)));
8969 if (bitQ == 1) {
8970 widened
8971 = binop(mkVecADD(size+1),
8972 widened,
8973 mkexpr(math_WIDEN_LO_OR_HI_LANES(
8974 isU, True/*fromUpperHalf*/, size, mkexpr(src)))
8977 /* Now fold. */
8978 IRTemp tWi = newTempV128();
8979 assign(tWi, widened);
8980 IRTemp res = math_FOLDV(tWi, mkVecADD(size+1));
8981 putQReg128(dd, mkexpr(res));
8982 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8983 const HChar ch = "bhsd"[size];
8984 DIP("%s %s.%c, %s.%s\n", isU ? "uaddlv" : "saddlv",
8985 nameQReg128(dd), ch, nameQReg128(nn), arr);
8986 return True;
8989 UInt ix = 0;
8990 /**/ if (opcode == BITS5(0,1,0,1,0)) { ix = bitU == 0 ? 1 : 2; }
8991 else if (opcode == BITS5(1,1,0,1,0)) { ix = bitU == 0 ? 3 : 4; }
8992 else if (opcode == BITS5(1,1,0,1,1) && bitU == 0) { ix = 5; }
8993 /**/
8994 if (ix != 0) {
8995 /* -------- 0,xx,01010: SMAXV -------- (1) */
8996 /* -------- 1,xx,01010: UMAXV -------- (2) */
8997 /* -------- 0,xx,11010: SMINV -------- (3) */
8998 /* -------- 1,xx,11010: UMINV -------- (4) */
8999 /* -------- 0,xx,11011: ADDV -------- (5) */
9000 vassert(ix >= 1 && ix <= 5);
9001 if (size == X11) return False; // 1d,2d cases not allowed
9002 if (size == X10 && bitQ == 0) return False; // 2s case not allowed
9003 const IROp opMAXS[3]
9004 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
9005 const IROp opMAXU[3]
9006 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
9007 const IROp opMINS[3]
9008 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
9009 const IROp opMINU[3]
9010 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
9011 const IROp opADD[3]
9012 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4 };
9013 vassert(size < 3);
9014 IROp op = Iop_INVALID;
9015 const HChar* nm = NULL;
9016 switch (ix) {
9017 case 1: op = opMAXS[size]; nm = "smaxv"; break;
9018 case 2: op = opMAXU[size]; nm = "umaxv"; break;
9019 case 3: op = opMINS[size]; nm = "sminv"; break;
9020 case 4: op = opMINU[size]; nm = "uminv"; break;
9021 case 5: op = opADD[size]; nm = "addv"; break;
9022 default: vassert(0);
9024 vassert(op != Iop_INVALID && nm != NULL);
9025 IRTemp tN1 = newTempV128();
9026 assign(tN1, getQReg128(nn));
9027 /* If Q == 0, we're just folding lanes in the lower half of
9028 the value. In which case, copy the lower half of the
9029 source into the upper half, so we can then treat it the
9030 same as the full width case. Except for the addition case,
9031 in which we have to zero out the upper half. */
9032 IRTemp tN2 = newTempV128();
9033 assign(tN2, bitQ == 0
9034 ? (ix == 5 ? unop(Iop_ZeroHI64ofV128, mkexpr(tN1))
9035 : mk_CatEvenLanes64x2(tN1,tN1))
9036 : mkexpr(tN1));
9037 IRTemp res = math_FOLDV(tN2, op);
9038 if (res == IRTemp_INVALID)
9039 return False; /* means math_FOLDV
9040 doesn't handle this case yet */
9041 putQReg128(dd, mkexpr(res));
9042 const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
9043 IRType laneTy = tys[size];
9044 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9045 DIP("%s %s, %s.%s\n", nm,
9046 nameQRegLO(dd, laneTy), nameQReg128(nn), arr);
9047 return True;
9050 if ((size == X00 || size == X10)
9051 && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
9052 /* -------- 0,00,01100: FMAXMNV s_4s -------- */
9053 /* -------- 0,10,01100: FMINMNV s_4s -------- */
9054 /* -------- 1,00,01111: FMAXV s_4s -------- */
9055 /* -------- 1,10,01111: FMINV s_4s -------- */
9056 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
9057 if (bitQ == 0) return False; // Only 4s is allowed
9058 Bool isMIN = (size & 2) == 2;
9059 Bool isNM = opcode == BITS5(0,1,1,0,0);
9060 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(2);
9061 IRTemp src = newTempV128();
9062 assign(src, getQReg128(nn));
9063 IRTemp res = math_FOLDV(src, opMXX);
9064 putQReg128(dd, mkexpr(res));
9065 DIP("%s%sv s%u, %u.4s\n",
9066 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", dd, nn);
9067 return True;
9070 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9071 return False;
9072 # undef INSN
9076 static
9077 Bool dis_AdvSIMD_copy(/*MB_OUT*/DisResult* dres, UInt insn)
9079 /* 31 28 20 15 14 10 9 4
9080 0 q op 01110000 imm5 0 imm4 1 n d
9081 Decode fields: q,op,imm4
9083 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9084 if (INSN(31,31) != 0
9085 || INSN(28,21) != BITS8(0,1,1,1,0,0,0,0)
9086 || INSN(15,15) != 0 || INSN(10,10) != 1) {
9087 return False;
9089 UInt bitQ = INSN(30,30);
9090 UInt bitOP = INSN(29,29);
9091 UInt imm5 = INSN(20,16);
9092 UInt imm4 = INSN(14,11);
9093 UInt nn = INSN(9,5);
9094 UInt dd = INSN(4,0);
9096 /* -------- x,0,0000: DUP (element, vector) -------- */
9097 /* 31 28 20 15 9 4
9098 0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index]
9100 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
9101 UInt laneNo = 0;
9102 UInt laneSzLg2 = 0;
9103 HChar laneCh = '?';
9104 IRTemp res = handle_DUP_VEC_ELEM(&laneNo, &laneSzLg2, &laneCh,
9105 getQReg128(nn), imm5);
9106 if (res == IRTemp_INVALID)
9107 return False;
9108 if (bitQ == 0 && laneSzLg2 == X11)
9109 return False; /* .1d case */
9110 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9111 const HChar* arT = nameArr_Q_SZ(bitQ, laneSzLg2);
9112 DIP("dup %s.%s, %s.%c[%u]\n",
9113 nameQReg128(dd), arT, nameQReg128(nn), laneCh, laneNo);
9114 return True;
9117 /* -------- x,0,0001: DUP (general, vector) -------- */
9118 /* 31 28 20 15 9 4
9119 0q0 01110000 imm5 0 0001 1 n d DUP Vd.T, Rn
9120 Q=0 writes 64, Q=1 writes 128
9121 imm5: xxxx1 8B(q=0) or 16b(q=1), R=W
9122 xxx10 4H(q=0) or 8H(q=1), R=W
9123 xx100 2S(q=0) or 4S(q=1), R=W
9124 x1000 Invalid(q=0) or 2D(q=1), R=X
9125 x0000 Invalid(q=0) or Invalid(q=1)
9126 Require op=0, imm4=0001
9128 if (bitOP == 0 && imm4 == BITS4(0,0,0,1)) {
9129 Bool isQ = bitQ == 1;
9130 IRTemp w0 = newTemp(Ity_I64);
9131 const HChar* arT = "??";
9132 IRType laneTy = Ity_INVALID;
9133 if (imm5 & 1) {
9134 arT = isQ ? "16b" : "8b";
9135 laneTy = Ity_I8;
9136 assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
9138 else if (imm5 & 2) {
9139 arT = isQ ? "8h" : "4h";
9140 laneTy = Ity_I16;
9141 assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
9143 else if (imm5 & 4) {
9144 arT = isQ ? "4s" : "2s";
9145 laneTy = Ity_I32;
9146 assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
9148 else if ((imm5 & 8) && isQ) {
9149 arT = "2d";
9150 laneTy = Ity_I64;
9151 assign(w0, getIReg64orZR(nn));
9153 else {
9154 /* invalid; leave laneTy unchanged. */
9156 /* */
9157 if (laneTy != Ity_INVALID) {
9158 IRTemp w1 = math_DUP_TO_64(w0, laneTy);
9159 putQReg128(dd, binop(Iop_64HLtoV128,
9160 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
9161 DIP("dup %s.%s, %s\n",
9162 nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
9163 return True;
9165 /* invalid */
9166 return False;
9169 /* -------- 1,0,0011: INS (general) -------- */
9170 /* 31 28 20 15 9 4
9171 010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn
9172 where Ts,ix = case imm5 of xxxx1 -> B, xxxx
9173 xxx10 -> H, xxx
9174 xx100 -> S, xx
9175 x1000 -> D, x
9177 if (bitQ == 1 && bitOP == 0 && imm4 == BITS4(0,0,1,1)) {
9178 HChar ts = '?';
9179 UInt laneNo = 16;
9180 IRExpr* src = NULL;
9181 if (imm5 & 1) {
9182 src = unop(Iop_64to8, getIReg64orZR(nn));
9183 laneNo = (imm5 >> 1) & 15;
9184 ts = 'b';
9186 else if (imm5 & 2) {
9187 src = unop(Iop_64to16, getIReg64orZR(nn));
9188 laneNo = (imm5 >> 2) & 7;
9189 ts = 'h';
9191 else if (imm5 & 4) {
9192 src = unop(Iop_64to32, getIReg64orZR(nn));
9193 laneNo = (imm5 >> 3) & 3;
9194 ts = 's';
9196 else if (imm5 & 8) {
9197 src = getIReg64orZR(nn);
9198 laneNo = (imm5 >> 4) & 1;
9199 ts = 'd';
9201 /* */
9202 if (src) {
9203 vassert(laneNo < 16);
9204 putQRegLane(dd, laneNo, src);
9205 DIP("ins %s.%c[%u], %s\n",
9206 nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn));
9207 return True;
9209 /* invalid */
9210 return False;
9213 /* -------- x,0,0101: SMOV -------- */
9214 /* -------- x,0,0111: UMOV -------- */
9215 /* 31 28 20 15 9 4
9216 0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index]
9217 0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index]
9218 dest is Xd when q==1, Wd when q==0
9219 UMOV:
9220 Ts,index,ops = case q:imm5 of
9221 0:xxxx1 -> B, xxxx, 8Uto64
9222 1:xxxx1 -> invalid
9223 0:xxx10 -> H, xxx, 16Uto64
9224 1:xxx10 -> invalid
9225 0:xx100 -> S, xx, 32Uto64
9226 1:xx100 -> invalid
9227 1:x1000 -> D, x, copy64
9228 other -> invalid
9229 SMOV:
9230 Ts,index,ops = case q:imm5 of
9231 0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
9232 1:xxxx1 -> B, xxxx, 8Sto64
9233 0:xxx10 -> H, xxx, (32Uto64 . 16Sto32)
9234 1:xxx10 -> H, xxx, 16Sto64
9235 0:xx100 -> invalid
9236 1:xx100 -> S, xx, 32Sto64
9237 1:x1000 -> invalid
9238 other -> invalid
9240 if (bitOP == 0 && (imm4 == BITS4(0,1,0,1) || imm4 == BITS4(0,1,1,1))) {
9241 Bool isU = (imm4 & 2) == 2;
9242 const HChar* arTs = "??";
9243 UInt laneNo = 16; /* invalid */
9244 // Setting 'res' to non-NULL determines valid/invalid
9245 IRExpr* res = NULL;
9246 if (!bitQ && (imm5 & 1)) { // 0:xxxx1
9247 laneNo = (imm5 >> 1) & 15;
9248 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
9249 res = isU ? unop(Iop_8Uto64, lane)
9250 : unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
9251 arTs = "b";
9253 else if (bitQ && (imm5 & 1)) { // 1:xxxx1
9254 laneNo = (imm5 >> 1) & 15;
9255 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
9256 res = isU ? NULL
9257 : unop(Iop_8Sto64, lane);
9258 arTs = "b";
9260 else if (!bitQ && (imm5 & 2)) { // 0:xxx10
9261 laneNo = (imm5 >> 2) & 7;
9262 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
9263 res = isU ? unop(Iop_16Uto64, lane)
9264 : unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
9265 arTs = "h";
9267 else if (bitQ && (imm5 & 2)) { // 1:xxx10
9268 laneNo = (imm5 >> 2) & 7;
9269 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
9270 res = isU ? NULL
9271 : unop(Iop_16Sto64, lane);
9272 arTs = "h";
9274 else if (!bitQ && (imm5 & 4)) { // 0:xx100
9275 laneNo = (imm5 >> 3) & 3;
9276 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
9277 res = isU ? unop(Iop_32Uto64, lane)
9278 : NULL;
9279 arTs = "s";
9281 else if (bitQ && (imm5 & 4)) { // 1:xxx10
9282 laneNo = (imm5 >> 3) & 3;
9283 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
9284 res = isU ? NULL
9285 : unop(Iop_32Sto64, lane);
9286 arTs = "s";
9288 else if (bitQ && (imm5 & 8)) { // 1:x1000
9289 laneNo = (imm5 >> 4) & 1;
9290 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
9291 res = isU ? lane
9292 : NULL;
9293 arTs = "d";
9295 /* */
9296 if (res) {
9297 vassert(laneNo < 16);
9298 putIReg64orZR(dd, res);
9299 DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
9300 nameIRegOrZR(bitQ == 1, dd),
9301 nameQReg128(nn), arTs, laneNo);
9302 return True;
9304 /* invalid */
9305 return False;
9308 /* -------- 1,1,xxxx: INS (element) -------- */
9309 /* 31 28 20 14 9 4
9310 011 01110000 imm5 0 imm4 n d INS Vd.Ts[ix1], Vn.Ts[ix2]
9311 where Ts,ix1,ix2
9312 = case imm5 of xxxx1 -> B, xxxx, imm4[3:0]
9313 xxx10 -> H, xxx, imm4[3:1]
9314 xx100 -> S, xx, imm4[3:2]
9315 x1000 -> D, x, imm4[3:3]
9317 if (bitQ == 1 && bitOP == 1) {
9318 HChar ts = '?';
9319 IRType ity = Ity_INVALID;
9320 UInt ix1 = 16;
9321 UInt ix2 = 16;
9322 if (imm5 & 1) {
9323 ts = 'b';
9324 ity = Ity_I8;
9325 ix1 = (imm5 >> 1) & 15;
9326 ix2 = (imm4 >> 0) & 15;
9328 else if (imm5 & 2) {
9329 ts = 'h';
9330 ity = Ity_I16;
9331 ix1 = (imm5 >> 2) & 7;
9332 ix2 = (imm4 >> 1) & 7;
9334 else if (imm5 & 4) {
9335 ts = 's';
9336 ity = Ity_I32;
9337 ix1 = (imm5 >> 3) & 3;
9338 ix2 = (imm4 >> 2) & 3;
9340 else if (imm5 & 8) {
9341 ts = 'd';
9342 ity = Ity_I64;
9343 ix1 = (imm5 >> 4) & 1;
9344 ix2 = (imm4 >> 3) & 1;
9346 /* */
9347 if (ity != Ity_INVALID) {
9348 vassert(ix1 < 16);
9349 vassert(ix2 < 16);
9350 putQRegLane(dd, ix1, getQRegLane(nn, ix2, ity));
9351 DIP("ins %s.%c[%u], %s.%c[%u]\n",
9352 nameQReg128(dd), ts, ix1, nameQReg128(nn), ts, ix2);
9353 return True;
9355 /* invalid */
9356 return False;
9359 return False;
9360 # undef INSN
9364 static
9365 Bool dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
9367 /* 31 28 18 15 11 9 4
9368 0q op 01111 00000 abc cmode 01 defgh d
9369 Decode fields: q,op,cmode
9370 Bit 11 is really "o2", but it is always zero.
9372 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9373 if (INSN(31,31) != 0
9374 || INSN(28,19) != BITS10(0,1,1,1,1,0,0,0,0,0)
9375 || INSN(11,10) != BITS2(0,1)) {
9376 return False;
9378 UInt bitQ = INSN(30,30);
9379 UInt bitOP = INSN(29,29);
9380 UInt cmode = INSN(15,12);
9381 UInt abcdefgh = (INSN(18,16) << 5) | INSN(9,5);
9382 UInt dd = INSN(4,0);
9384 ULong imm64lo = 0;
9385 UInt op_cmode = (bitOP << 4) | cmode;
9386 Bool ok = False;
9387 Bool isORR = False;
9388 Bool isBIC = False;
9389 Bool isMOV = False;
9390 Bool isMVN = False;
9391 Bool isFMOV = False;
9392 switch (op_cmode) {
9393 /* -------- x,0,0000 MOVI 32-bit shifted imm -------- */
9394 /* -------- x,0,0010 MOVI 32-bit shifted imm -------- */
9395 /* -------- x,0,0100 MOVI 32-bit shifted imm -------- */
9396 /* -------- x,0,0110 MOVI 32-bit shifted imm -------- */
9397 case BITS5(0,0,0,0,0): case BITS5(0,0,0,1,0):
9398 case BITS5(0,0,1,0,0): case BITS5(0,0,1,1,0): // 0:0xx0
9399 ok = True; isMOV = True; break;
9401 /* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */
9402 /* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */
9403 /* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */
9404 /* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */
9405 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1):
9406 case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1
9407 ok = True; isORR = True; break;
9409 /* -------- x,0,1000 MOVI 16-bit shifted imm -------- */
9410 /* -------- x,0,1010 MOVI 16-bit shifted imm -------- */
9411 case BITS5(0,1,0,0,0): case BITS5(0,1,0,1,0): // 0:10x0
9412 ok = True; isMOV = True; break;
9414 /* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */
9415 /* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */
9416 case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1
9417 ok = True; isORR = True; break;
9419 /* -------- x,0,1100 MOVI 32-bit shifting ones -------- */
9420 /* -------- x,0,1101 MOVI 32-bit shifting ones -------- */
9421 case BITS5(0,1,1,0,0): case BITS5(0,1,1,0,1): // 0:110x
9422 ok = True; isMOV = True; break;
9424 /* -------- x,0,1110 MOVI 8-bit -------- */
9425 case BITS5(0,1,1,1,0):
9426 ok = True; isMOV = True; break;
9428 /* -------- x,0,1111 FMOV (vector, immediate, F32) -------- */
9429 case BITS5(0,1,1,1,1): // 0:1111
9430 ok = True; isFMOV = True; break;
9432 /* -------- x,1,0000 MVNI 32-bit shifted imm -------- */
9433 /* -------- x,1,0010 MVNI 32-bit shifted imm -------- */
9434 /* -------- x,1,0100 MVNI 32-bit shifted imm -------- */
9435 /* -------- x,1,0110 MVNI 32-bit shifted imm -------- */
9436 case BITS5(1,0,0,0,0): case BITS5(1,0,0,1,0):
9437 case BITS5(1,0,1,0,0): case BITS5(1,0,1,1,0): // 1:0xx0
9438 ok = True; isMVN = True; break;
9440 /* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */
9441 /* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */
9442 /* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */
9443 /* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */
9444 case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1):
9445 case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1
9446 ok = True; isBIC = True; break;
9448 /* -------- x,1,1000 MVNI 16-bit shifted imm -------- */
9449 /* -------- x,1,1010 MVNI 16-bit shifted imm -------- */
9450 case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
9451 ok = True; isMVN = True; break;
9453 /* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */
9454 /* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */
9455 case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1
9456 ok = True; isBIC = True; break;
9458 /* -------- x,1,1100 MVNI 32-bit shifting ones -------- */
9459 /* -------- x,1,1101 MVNI 32-bit shifting ones -------- */
9460 case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
9461 ok = True; isMVN = True; break;
9463 /* -------- 0,1,1110 MOVI 64-bit scalar -------- */
9464 /* -------- 1,1,1110 MOVI 64-bit vector -------- */
9465 case BITS5(1,1,1,1,0):
9466 ok = True; isMOV = True; break;
9468 /* -------- 1,1,1111 FMOV (vector, immediate, F64) -------- */
9469 case BITS5(1,1,1,1,1): // 1:1111
9470 ok = bitQ == 1; isFMOV = True; break;
9472 default:
9473 break;
9475 if (ok) {
9476 vassert(1 == (isMOV ? 1 : 0) + (isMVN ? 1 : 0)
9477 + (isORR ? 1 : 0) + (isBIC ? 1 : 0) + (isFMOV ? 1 : 0));
9478 ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, abcdefgh);
9480 if (ok) {
9481 if (isORR || isBIC) {
9482 ULong inv
9483 = isORR ? 0ULL : ~0ULL;
9484 IRExpr* immV128
9485 = binop(Iop_64HLtoV128, mkU64(inv ^ imm64lo), mkU64(inv ^ imm64lo));
9486 IRExpr* res
9487 = binop(isORR ? Iop_OrV128 : Iop_AndV128, getQReg128(dd), immV128);
9488 const HChar* nm = isORR ? "orr" : "bic";
9489 if (bitQ == 0) {
9490 putQReg128(dd, unop(Iop_ZeroHI64ofV128, res));
9491 DIP("%s %s.1d, %016llx\n", nm, nameQReg128(dd), imm64lo);
9492 } else {
9493 putQReg128(dd, res);
9494 DIP("%s %s.2d, #0x%016llx'%016llx\n", nm,
9495 nameQReg128(dd), imm64lo, imm64lo);
9498 else if (isMOV || isMVN || isFMOV) {
9499 if (isMVN) imm64lo = ~imm64lo;
9500 ULong imm64hi = bitQ == 0 ? 0 : imm64lo;
9501 IRExpr* immV128 = binop(Iop_64HLtoV128, mkU64(imm64hi),
9502 mkU64(imm64lo));
9503 putQReg128(dd, immV128);
9504 DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
9506 return True;
9508 /* else fall through */
9510 return False;
9511 # undef INSN
9515 static
9516 Bool dis_AdvSIMD_scalar_copy(/*MB_OUT*/DisResult* dres, UInt insn)
9518 /* 31 28 20 15 14 10 9 4
9519 01 op 11110000 imm5 0 imm4 1 n d
9520 Decode fields: op,imm4
9522 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9523 if (INSN(31,30) != BITS2(0,1)
9524 || INSN(28,21) != BITS8(1,1,1,1,0,0,0,0)
9525 || INSN(15,15) != 0 || INSN(10,10) != 1) {
9526 return False;
9528 UInt bitOP = INSN(29,29);
9529 UInt imm5 = INSN(20,16);
9530 UInt imm4 = INSN(14,11);
9531 UInt nn = INSN(9,5);
9532 UInt dd = INSN(4,0);
9534 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
9535 /* -------- 0,0000 DUP (element, scalar) -------- */
9536 IRTemp w0 = newTemp(Ity_I64);
9537 const HChar* arTs = "??";
9538 IRType laneTy = Ity_INVALID;
9539 UInt laneNo = 16; /* invalid */
9540 if (imm5 & 1) {
9541 arTs = "b";
9542 laneNo = (imm5 >> 1) & 15;
9543 laneTy = Ity_I8;
9544 assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
9546 else if (imm5 & 2) {
9547 arTs = "h";
9548 laneNo = (imm5 >> 2) & 7;
9549 laneTy = Ity_I16;
9550 assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
9552 else if (imm5 & 4) {
9553 arTs = "s";
9554 laneNo = (imm5 >> 3) & 3;
9555 laneTy = Ity_I32;
9556 assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
9558 else if (imm5 & 8) {
9559 arTs = "d";
9560 laneNo = (imm5 >> 4) & 1;
9561 laneTy = Ity_I64;
9562 assign(w0, getQRegLane(nn, laneNo, laneTy));
9564 else {
9565 /* invalid; leave laneTy unchanged. */
9567 /* */
9568 if (laneTy != Ity_INVALID) {
9569 vassert(laneNo < 16);
9570 putQReg128(dd, binop(Iop_64HLtoV128, mkU64(0), mkexpr(w0)));
9571 DIP("dup %s, %s.%s[%u]\n",
9572 nameQRegLO(dd, laneTy), nameQReg128(nn), arTs, laneNo);
9573 return True;
9575 /* else fall through */
9578 return False;
9579 # undef INSN
9583 static
9584 Bool dis_AdvSIMD_scalar_pairwise(/*MB_OUT*/DisResult* dres, UInt insn)
9586 /* 31 28 23 21 16 11 9 4
9587 01 u 11110 sz 11000 opcode 10 n d
9588 Decode fields: u,sz,opcode
9590 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9591 if (INSN(31,30) != BITS2(0,1)
9592 || INSN(28,24) != BITS5(1,1,1,1,0)
9593 || INSN(21,17) != BITS5(1,1,0,0,0)
9594 || INSN(11,10) != BITS2(1,0)) {
9595 return False;
9597 UInt bitU = INSN(29,29);
9598 UInt sz = INSN(23,22);
9599 UInt opcode = INSN(16,12);
9600 UInt nn = INSN(9,5);
9601 UInt dd = INSN(4,0);
9603 if (bitU == 0 && sz == X11 && opcode == BITS5(1,1,0,1,1)) {
9604 /* -------- 0,11,11011 ADDP d_2d -------- */
9605 IRTemp xy = newTempV128();
9606 IRTemp xx = newTempV128();
9607 assign(xy, getQReg128(nn));
9608 assign(xx, binop(Iop_InterleaveHI64x2, mkexpr(xy), mkexpr(xy)));
9609 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9610 binop(Iop_Add64x2, mkexpr(xy), mkexpr(xx))));
9611 DIP("addp d%u, %s.2d\n", dd, nameQReg128(nn));
9612 return True;
9615 if (bitU == 1 && sz <= X01 && opcode == BITS5(0,1,1,0,1)) {
9616 /* -------- 1,00,01101 ADDP s_2s -------- */
9617 /* -------- 1,01,01101 ADDP d_2d -------- */
9618 Bool isD = sz == X01;
9619 IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
9620 IROp opADD = mkVecADDF(isD ? 3 : 2);
9621 IRTemp src = newTempV128();
9622 IRTemp argL = newTempV128();
9623 IRTemp argR = newTempV128();
9624 assign(src, getQReg128(nn));
9625 assign(argL, unop(opZHI, mkexpr(src)));
9626 assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
9627 mkU8(isD ? 8 : 4))));
9628 putQReg128(dd, unop(opZHI,
9629 triop(opADD, mkexpr(mk_get_IR_rounding_mode()),
9630 mkexpr(argL), mkexpr(argR))));
9631 DIP(isD ? "faddp d%u, v%u.2d\n" : "faddp s%u, v%u.2s\n", dd, nn);
9632 return True;
9635 if (bitU == 1
9636 && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
9637 /* -------- 1,0x,01100 FMAXNMP d_2d, s_2s -------- */
9638 /* -------- 1,1x,01100 FMINNMP d_2d, s_2s -------- */
9639 /* -------- 1,0x,01111 FMAXP d_2d, s_2s -------- */
9640 /* -------- 1,1x,01111 FMINP d_2d, s_2s -------- */
9641 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
9642 Bool isD = (sz & 1) == 1;
9643 Bool isMIN = (sz & 2) == 2;
9644 Bool isNM = opcode == BITS5(0,1,1,0,0);
9645 IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
9646 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
9647 IRTemp src = newTempV128();
9648 IRTemp argL = newTempV128();
9649 IRTemp argR = newTempV128();
9650 assign(src, getQReg128(nn));
9651 assign(argL, unop(opZHI, mkexpr(src)));
9652 assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
9653 mkU8(isD ? 8 : 4))));
9654 putQReg128(dd, unop(opZHI,
9655 binop(opMXX, mkexpr(argL), mkexpr(argR))));
9656 HChar c = isD ? 'd' : 's';
9657 DIP("%s%sp %c%u, v%u.2%c\n",
9658 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", c, dd, nn, c);
9659 return True;
9662 return False;
9663 # undef INSN
9667 static
9668 Bool dis_AdvSIMD_scalar_shift_by_imm(/*MB_OUT*/DisResult* dres, UInt insn)
9670 /* 31 28 22 18 15 10 9 4
9671 01 u 111110 immh immb opcode 1 n d
9672 Decode fields: u,immh,opcode
9674 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9675 if (INSN(31,30) != BITS2(0,1)
9676 || INSN(28,23) != BITS6(1,1,1,1,1,0) || INSN(10,10) != 1) {
9677 return False;
9679 UInt bitU = INSN(29,29);
9680 UInt immh = INSN(22,19);
9681 UInt immb = INSN(18,16);
9682 UInt opcode = INSN(15,11);
9683 UInt nn = INSN(9,5);
9684 UInt dd = INSN(4,0);
9685 UInt immhb = (immh << 3) | immb;
9687 if ((immh & 8) == 8
9688 && (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0))) {
9689 /* -------- 0,1xxx,00000 SSHR d_d_#imm -------- */
9690 /* -------- 1,1xxx,00000 USHR d_d_#imm -------- */
9691 /* -------- 0,1xxx,00010 SSRA d_d_#imm -------- */
9692 /* -------- 1,1xxx,00010 USRA d_d_#imm -------- */
9693 Bool isU = bitU == 1;
9694 Bool isAcc = opcode == BITS5(0,0,0,1,0);
9695 UInt sh = 128 - immhb;
9696 vassert(sh >= 1 && sh <= 64);
9697 IROp op = isU ? Iop_ShrN64x2 : Iop_SarN64x2;
9698 IRExpr* src = getQReg128(nn);
9699 IRTemp shf = newTempV128();
9700 IRTemp res = newTempV128();
9701 if (sh == 64 && isU) {
9702 assign(shf, mkV128(0x0000));
9703 } else {
9704 UInt nudge = 0;
9705 if (sh == 64) {
9706 vassert(!isU);
9707 nudge = 1;
9709 assign(shf, binop(op, src, mkU8(sh - nudge)));
9711 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
9712 : mkexpr(shf));
9713 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9714 const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
9715 : (isU ? "ushr" : "sshr");
9716 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
9717 return True;
9720 if ((immh & 8) == 8
9721 && (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0))) {
9722 /* -------- 0,1xxx,00100 SRSHR d_d_#imm -------- */
9723 /* -------- 1,1xxx,00100 URSHR d_d_#imm -------- */
9724 /* -------- 0,1xxx,00110 SRSRA d_d_#imm -------- */
9725 /* -------- 1,1xxx,00110 URSRA d_d_#imm -------- */
9726 Bool isU = bitU == 1;
9727 Bool isAcc = opcode == BITS5(0,0,1,1,0);
9728 UInt sh = 128 - immhb;
9729 vassert(sh >= 1 && sh <= 64);
9730 IROp op = isU ? Iop_Rsh64Ux2 : Iop_Rsh64Sx2;
9731 vassert(sh >= 1 && sh <= 64);
9732 IRExpr* src = getQReg128(nn);
9733 IRTemp imm8 = newTemp(Ity_I8);
9734 assign(imm8, mkU8((UChar)(-sh)));
9735 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
9736 IRTemp shf = newTempV128();
9737 IRTemp res = newTempV128();
9738 assign(shf, binop(op, src, amt));
9739 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
9740 : mkexpr(shf));
9741 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9742 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
9743 : (isU ? "urshr" : "srshr");
9744 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
9745 return True;
9748 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,0,0)) {
9749 /* -------- 1,1xxx,01000 SRI d_d_#imm -------- */
9750 UInt sh = 128 - immhb;
9751 vassert(sh >= 1 && sh <= 64);
9752 if (sh == 64) {
9753 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
9754 } else {
9755 /* sh is in range 1 .. 63 */
9756 ULong nmask = (ULong)(((Long)0x8000000000000000ULL) >> (sh-1));
9757 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
9758 IRTemp res = newTempV128();
9759 assign(res, binop(Iop_OrV128,
9760 binop(Iop_AndV128, getQReg128(dd), nmaskV),
9761 binop(Iop_ShrN64x2, getQReg128(nn), mkU8(sh))));
9762 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9764 DIP("sri d%u, d%u, #%u\n", dd, nn, sh);
9765 return True;
9768 if (bitU == 0 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
9769 /* -------- 0,1xxx,01010 SHL d_d_#imm -------- */
9770 UInt sh = immhb - 64;
9771 vassert(sh >= 0 && sh < 64);
9772 putQReg128(dd,
9773 unop(Iop_ZeroHI64ofV128,
9774 sh == 0 ? getQReg128(nn)
9775 : binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
9776 DIP("shl d%u, d%u, #%u\n", dd, nn, sh);
9777 return True;
9780 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
9781 /* -------- 1,1xxx,01010 SLI d_d_#imm -------- */
9782 UInt sh = immhb - 64;
9783 vassert(sh >= 0 && sh < 64);
9784 if (sh == 0) {
9785 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(nn)));
9786 } else {
9787 /* sh is in range 1 .. 63 */
9788 ULong nmask = (1ULL << sh) - 1;
9789 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
9790 IRTemp res = newTempV128();
9791 assign(res, binop(Iop_OrV128,
9792 binop(Iop_AndV128, getQReg128(dd), nmaskV),
9793 binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
9794 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9796 DIP("sli d%u, d%u, #%u\n", dd, nn, sh);
9797 return True;
9800 if (opcode == BITS5(0,1,1,1,0)
9801 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
9802 /* -------- 0,01110 SQSHL #imm -------- */
9803 /* -------- 1,01110 UQSHL #imm -------- */
9804 /* -------- 1,01100 SQSHLU #imm -------- */
9805 UInt size = 0;
9806 UInt shift = 0;
9807 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
9808 if (!ok) return False;
9809 vassert(size >= 0 && size <= 3);
9810 /* The shift encoding has opposite sign for the leftwards case.
9811 Adjust shift to compensate. */
9812 UInt lanebits = 8 << size;
9813 shift = lanebits - shift;
9814 vassert(shift >= 0 && shift < lanebits);
9815 const HChar* nm = NULL;
9816 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
9817 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
9818 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
9819 else vassert(0);
9820 IRTemp qDiff1 = IRTemp_INVALID;
9821 IRTemp qDiff2 = IRTemp_INVALID;
9822 IRTemp res = IRTemp_INVALID;
9823 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn));
9824 /* This relies on the fact that the zeroed out lanes generate zeroed
9825 result lanes and don't saturate, so there's no point in trimming
9826 the resulting res, qDiff1 or qDiff2 values. */
9827 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
9828 putQReg128(dd, mkexpr(res));
9829 updateQCFLAGwithDifference(qDiff1, qDiff2);
9830 const HChar arr = "bhsd"[size];
9831 DIP("%s %c%u, %c%u, #%u\n", nm, arr, dd, arr, nn, shift);
9832 return True;
9835 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
9836 || (bitU == 1
9837 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
9838 /* -------- 0,10010 SQSHRN #imm -------- */
9839 /* -------- 1,10010 UQSHRN #imm -------- */
9840 /* -------- 0,10011 SQRSHRN #imm -------- */
9841 /* -------- 1,10011 UQRSHRN #imm -------- */
9842 /* -------- 1,10000 SQSHRUN #imm -------- */
9843 /* -------- 1,10001 SQRSHRUN #imm -------- */
9844 UInt size = 0;
9845 UInt shift = 0;
9846 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
9847 if (!ok || size == X11) return False;
9848 vassert(size >= X00 && size <= X10);
9849 vassert(shift >= 1 && shift <= (8 << size));
9850 const HChar* nm = "??";
9851 IROp op = Iop_INVALID;
9852 /* Decide on the name and the operation. */
9853 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
9854 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
9856 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
9857 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
9859 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
9860 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
9862 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
9863 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
9865 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
9866 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
9868 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
9869 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
9871 else vassert(0);
9872 /* Compute the result (Q, shifted value) pair. */
9873 IRTemp src128 = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size+1, getQReg128(nn));
9874 IRTemp pair = newTempV128();
9875 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
9876 /* Update the result reg */
9877 IRTemp res64in128 = newTempV128();
9878 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
9879 putQReg128(dd, mkexpr(res64in128));
9880 /* Update the Q flag. */
9881 IRTemp q64q64 = newTempV128();
9882 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
9883 IRTemp z128 = newTempV128();
9884 assign(z128, mkV128(0x0000));
9885 updateQCFLAGwithDifference(q64q64, z128);
9886 /* */
9887 const HChar arrNarrow = "bhsd"[size];
9888 const HChar arrWide = "bhsd"[size+1];
9889 DIP("%s %c%u, %c%u, #%u\n", nm, arrNarrow, dd, arrWide, nn, shift);
9890 return True;
9893 if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,0,0)) {
9894 /* -------- 0,!=00xx,11100 SCVTF d_d_imm, s_s_imm -------- */
9895 /* -------- 1,!=00xx,11100 UCVTF d_d_imm, s_s_imm -------- */
9896 UInt size = 0;
9897 UInt fbits = 0;
9898 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
9899 /* The following holds because immh is never zero. */
9900 vassert(ok);
9901 /* The following holds because immh >= 0100. */
9902 vassert(size == X10 || size == X11);
9903 Bool isD = size == X11;
9904 Bool isU = bitU == 1;
9905 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
9906 Double scale = two_to_the_minus(fbits);
9907 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
9908 : IRExpr_Const(IRConst_F32( (Float)scale ));
9909 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
9910 IROp opCVT = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
9911 : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
9912 IRType tyF = isD ? Ity_F64 : Ity_F32;
9913 IRType tyI = isD ? Ity_I64 : Ity_I32;
9914 IRTemp src = newTemp(tyI);
9915 IRTemp res = newTemp(tyF);
9916 IRTemp rm = mk_get_IR_rounding_mode();
9917 assign(src, getQRegLane(nn, 0, tyI));
9918 assign(res, triop(opMUL, mkexpr(rm),
9919 binop(opCVT, mkexpr(rm), mkexpr(src)), scaleE));
9920 putQRegLane(dd, 0, mkexpr(res));
9921 if (!isD) {
9922 putQRegLane(dd, 1, mkU32(0));
9924 putQRegLane(dd, 1, mkU64(0));
9925 const HChar ch = isD ? 'd' : 's';
9926 DIP("%s %c%u, %c%u, #%u\n", isU ? "ucvtf" : "scvtf",
9927 ch, dd, ch, nn, fbits);
9928 return True;
9931 if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,1,1)) {
9932 /* -------- 0,!=00xx,11111 FCVTZS d_d_imm, s_s_imm -------- */
9933 /* -------- 1,!=00xx,11111 FCVTZU d_d_imm, s_s_imm -------- */
9934 UInt size = 0;
9935 UInt fbits = 0;
9936 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
9937 /* The following holds because immh is never zero. */
9938 vassert(ok);
9939 /* The following holds because immh >= 0100. */
9940 vassert(size == X10 || size == X11);
9941 Bool isD = size == X11;
9942 Bool isU = bitU == 1;
9943 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
9944 Double scale = two_to_the_plus(fbits);
9945 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
9946 : IRExpr_Const(IRConst_F32( (Float)scale ));
9947 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
9948 IROp opCVT = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
9949 : (isD ? Iop_F64toI64S : Iop_F32toI32S);
9950 IRType tyF = isD ? Ity_F64 : Ity_F32;
9951 IRType tyI = isD ? Ity_I64 : Ity_I32;
9952 IRTemp src = newTemp(tyF);
9953 IRTemp res = newTemp(tyI);
9954 IRTemp rm = newTemp(Ity_I32);
9955 assign(src, getQRegLane(nn, 0, tyF));
9956 assign(rm, mkU32(Irrm_ZERO));
9957 assign(res, binop(opCVT, mkexpr(rm),
9958 triop(opMUL, mkexpr(rm), mkexpr(src), scaleE)));
9959 putQRegLane(dd, 0, mkexpr(res));
9960 if (!isD) {
9961 putQRegLane(dd, 1, mkU32(0));
9963 putQRegLane(dd, 1, mkU64(0));
9964 const HChar ch = isD ? 'd' : 's';
9965 DIP("%s %c%u, %c%u, #%u\n", isU ? "fcvtzu" : "fcvtzs",
9966 ch, dd, ch, nn, fbits);
9967 return True;
9970 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9971 return False;
9972 # undef INSN
9976 static
9977 Bool dis_AdvSIMD_scalar_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
9979 /* 31 29 28 23 21 20 15 11 9 4
9980 01 U 11110 size 1 m opcode 00 n d
9981 Decode fields: u,opcode
9983 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9984 if (INSN(31,30) != BITS2(0,1)
9985 || INSN(28,24) != BITS5(1,1,1,1,0)
9986 || INSN(21,21) != 1
9987 || INSN(11,10) != BITS2(0,0)) {
9988 return False;
9990 UInt bitU = INSN(29,29);
9991 UInt size = INSN(23,22);
9992 UInt mm = INSN(20,16);
9993 UInt opcode = INSN(15,12);
9994 UInt nn = INSN(9,5);
9995 UInt dd = INSN(4,0);
9996 vassert(size < 4);
9998 if (bitU == 0
9999 && (opcode == BITS4(1,1,0,1)
10000 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
10001 /* -------- 0,1101 SQDMULL -------- */ // 0 (ks)
10002 /* -------- 0,1001 SQDMLAL -------- */ // 1
10003 /* -------- 0,1011 SQDMLSL -------- */ // 2
10004 /* Widens, and size refers to the narrowed lanes. */
10005 UInt ks = 3;
10006 switch (opcode) {
10007 case BITS4(1,1,0,1): ks = 0; break;
10008 case BITS4(1,0,0,1): ks = 1; break;
10009 case BITS4(1,0,1,1): ks = 2; break;
10010 default: vassert(0);
10012 vassert(ks >= 0 && ks <= 2);
10013 if (size == X00 || size == X11) return False;
10014 vassert(size <= 2);
10015 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
10016 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
10017 newTempsV128_3(&vecN, &vecM, &vecD);
10018 assign(vecN, getQReg128(nn));
10019 assign(vecM, getQReg128(mm));
10020 assign(vecD, getQReg128(dd));
10021 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
10022 False/*!is2*/, size, "mas"[ks],
10023 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
10024 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
10025 putQReg128(dd, unop(opZHI, mkexpr(res)));
10026 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
10027 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10028 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
10029 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
10031 const HChar* nm = ks == 0 ? "sqdmull"
10032 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
10033 const HChar arrNarrow = "bhsd"[size];
10034 const HChar arrWide = "bhsd"[size+1];
10035 DIP("%s %c%u, %c%u, %c%u\n",
10036 nm, arrWide, dd, arrNarrow, nn, arrNarrow, mm);
10037 return True;
10040 return False;
10041 # undef INSN
10045 static
10046 Bool dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
10048 /* 31 29 28 23 21 20 15 10 9 4
10049 01 U 11110 size 1 m opcode 1 n d
10050 Decode fields: u,size,opcode
10052 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10053 if (INSN(31,30) != BITS2(0,1)
10054 || INSN(28,24) != BITS5(1,1,1,1,0)
10055 || INSN(21,21) != 1
10056 || INSN(10,10) != 1) {
10057 return False;
10059 UInt bitU = INSN(29,29);
10060 UInt size = INSN(23,22);
10061 UInt mm = INSN(20,16);
10062 UInt opcode = INSN(15,11);
10063 UInt nn = INSN(9,5);
10064 UInt dd = INSN(4,0);
10065 vassert(size < 4);
10067 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
10068 /* -------- 0,xx,00001 SQADD std4_std4_std4 -------- */
10069 /* -------- 1,xx,00001 UQADD std4_std4_std4 -------- */
10070 /* -------- 0,xx,00101 SQSUB std4_std4_std4 -------- */
10071 /* -------- 1,xx,00101 UQSUB std4_std4_std4 -------- */
10072 Bool isADD = opcode == BITS5(0,0,0,0,1);
10073 Bool isU = bitU == 1;
10074 IROp qop = Iop_INVALID;
10075 IROp nop = Iop_INVALID;
10076 if (isADD) {
10077 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
10078 nop = mkVecADD(size);
10079 } else {
10080 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
10081 nop = mkVecSUB(size);
10083 IRTemp argL = newTempV128();
10084 IRTemp argR = newTempV128();
10085 IRTemp qres = newTempV128();
10086 IRTemp nres = newTempV128();
10087 assign(argL, getQReg128(nn));
10088 assign(argR, getQReg128(mm));
10089 assign(qres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10090 size, binop(qop, mkexpr(argL), mkexpr(argR)))));
10091 assign(nres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10092 size, binop(nop, mkexpr(argL), mkexpr(argR)))));
10093 putQReg128(dd, mkexpr(qres));
10094 updateQCFLAGwithDifference(qres, nres);
10095 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
10096 : (isU ? "uqsub" : "sqsub");
10097 const HChar arr = "bhsd"[size];
10098 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
10099 return True;
10102 if (size == X11 && opcode == BITS5(0,0,1,1,0)) {
10103 /* -------- 0,11,00110 CMGT d_d_d -------- */ // >s
10104 /* -------- 1,11,00110 CMHI d_d_d -------- */ // >u
10105 Bool isGT = bitU == 0;
10106 IRExpr* argL = getQReg128(nn);
10107 IRExpr* argR = getQReg128(mm);
10108 IRTemp res = newTempV128();
10109 assign(res,
10110 isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
10111 : binop(Iop_CmpGT64Ux2, argL, argR));
10112 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10113 DIP("%s %s, %s, %s\n",isGT ? "cmgt" : "cmhi",
10114 nameQRegLO(dd, Ity_I64),
10115 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
10116 return True;
10119 if (size == X11 && opcode == BITS5(0,0,1,1,1)) {
10120 /* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s
10121 /* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u
10122 Bool isGE = bitU == 0;
10123 IRExpr* argL = getQReg128(nn);
10124 IRExpr* argR = getQReg128(mm);
10125 IRTemp res = newTempV128();
10126 assign(res,
10127 isGE ? unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL))
10128 : unop(Iop_NotV128, binop(Iop_CmpGT64Ux2, argR, argL)));
10129 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10130 DIP("%s %s, %s, %s\n", isGE ? "cmge" : "cmhs",
10131 nameQRegLO(dd, Ity_I64),
10132 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
10133 return True;
10136 if (size == X11 && (opcode == BITS5(0,1,0,0,0)
10137 || opcode == BITS5(0,1,0,1,0))) {
10138 /* -------- 0,xx,01000 SSHL d_d_d -------- */
10139 /* -------- 0,xx,01010 SRSHL d_d_d -------- */
10140 /* -------- 1,xx,01000 USHL d_d_d -------- */
10141 /* -------- 1,xx,01010 URSHL d_d_d -------- */
10142 Bool isU = bitU == 1;
10143 Bool isR = opcode == BITS5(0,1,0,1,0);
10144 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
10145 : (isU ? mkVecSHU(size) : mkVecSHS(size));
10146 IRTemp res = newTempV128();
10147 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
10148 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10149 const HChar* nm = isR ? (isU ? "urshl" : "srshl")
10150 : (isU ? "ushl" : "sshl");
10151 DIP("%s %s, %s, %s\n", nm,
10152 nameQRegLO(dd, Ity_I64),
10153 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
10154 return True;
10157 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
10158 /* -------- 0,xx,01001 SQSHL std4_std4_std4 -------- */
10159 /* -------- 0,xx,01011 SQRSHL std4_std4_std4 -------- */
10160 /* -------- 1,xx,01001 UQSHL std4_std4_std4 -------- */
10161 /* -------- 1,xx,01011 UQRSHL std4_std4_std4 -------- */
10162 Bool isU = bitU == 1;
10163 Bool isR = opcode == BITS5(0,1,0,1,1);
10164 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
10165 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
10166 /* This is a bit tricky. Since we're only interested in the lowest
10167 lane of the result, we zero out all the rest in the operands, so
10168 as to ensure that other lanes don't pollute the returned Q value.
10169 This works because it means, for the lanes we don't care about, we
10170 are shifting zero by zero, which can never saturate. */
10171 IRTemp res256 = newTemp(Ity_V256);
10172 IRTemp resSH = newTempV128();
10173 IRTemp resQ = newTempV128();
10174 IRTemp zero = newTempV128();
10175 assign(
10176 res256,
10177 binop(op,
10178 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn))),
10179 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(mm)))));
10180 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
10181 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
10182 assign(zero, mkV128(0x0000));
10183 putQReg128(dd, mkexpr(resSH));
10184 updateQCFLAGwithDifference(resQ, zero);
10185 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
10186 : (isU ? "uqshl" : "sqshl");
10187 const HChar arr = "bhsd"[size];
10188 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
10189 return True;
10192 if (size == X11 && opcode == BITS5(1,0,0,0,0)) {
10193 /* -------- 0,11,10000 ADD d_d_d -------- */
10194 /* -------- 1,11,10000 SUB d_d_d -------- */
10195 Bool isSUB = bitU == 1;
10196 IRTemp res = newTemp(Ity_I64);
10197 assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
10198 getQRegLane(nn, 0, Ity_I64),
10199 getQRegLane(mm, 0, Ity_I64)));
10200 putQRegLane(dd, 0, mkexpr(res));
10201 putQRegLane(dd, 1, mkU64(0));
10202 DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
10203 nameQRegLO(dd, Ity_I64),
10204 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
10205 return True;
10208 if (size == X11 && opcode == BITS5(1,0,0,0,1)) {
10209 /* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0
10210 /* -------- 1,11,10001 CMEQ d_d_d -------- */ // ==
10211 Bool isEQ = bitU == 1;
10212 IRExpr* argL = getQReg128(nn);
10213 IRExpr* argR = getQReg128(mm);
10214 IRTemp res = newTempV128();
10215 assign(res,
10216 isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
10217 : unop(Iop_NotV128, binop(Iop_CmpEQ64x2,
10218 binop(Iop_AndV128, argL, argR),
10219 mkV128(0x0000))));
10220 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10221 DIP("%s %s, %s, %s\n", isEQ ? "cmeq" : "cmtst",
10222 nameQRegLO(dd, Ity_I64),
10223 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
10224 return True;
10227 if (opcode == BITS5(1,0,1,1,0)) {
10228 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
10229 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
10230 if (size == X00 || size == X11) return False;
10231 Bool isR = bitU == 1;
10232 IRTemp res, sat1q, sat1n, vN, vM;
10233 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
10234 newTempsV128_2(&vN, &vM);
10235 assign(vN, getQReg128(nn));
10236 assign(vM, getQReg128(mm));
10237 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
10238 putQReg128(dd,
10239 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res))));
10240 updateQCFLAGwithDifference(
10241 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1q)),
10242 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1n)));
10243 const HChar arr = "bhsd"[size];
10244 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
10245 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
10246 return True;
10249 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
10250 /* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */
10251 IRType ity = size == X11 ? Ity_F64 : Ity_F32;
10252 IRTemp res = newTemp(ity);
10253 assign(res, unop(mkABSF(ity),
10254 triop(mkSUBF(ity),
10255 mkexpr(mk_get_IR_rounding_mode()),
10256 getQRegLO(nn,ity), getQRegLO(mm,ity))));
10257 putQReg128(dd, mkV128(0x0000));
10258 putQRegLO(dd, mkexpr(res));
10259 DIP("fabd %s, %s, %s\n",
10260 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10261 return True;
10264 if (bitU == 0 && size <= X01 && opcode == BITS5(1,1,0,1,1)) {
10265 /* -------- 0,0x,11011 FMULX d_d_d, s_s_s -------- */
10266 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
10267 IRType ity = size == X01 ? Ity_F64 : Ity_F32;
10268 IRTemp res = newTemp(ity);
10269 assign(res, triop(mkMULF(ity),
10270 mkexpr(mk_get_IR_rounding_mode()),
10271 getQRegLO(nn,ity), getQRegLO(mm,ity)));
10272 putQReg128(dd, mkV128(0x0000));
10273 putQRegLO(dd, mkexpr(res));
10274 DIP("fmulx %s, %s, %s\n",
10275 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10276 return True;
10279 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
10280 /* -------- 0,0x,11100 FCMEQ d_d_d, s_s_s -------- */
10281 /* -------- 1,0x,11100 FCMGE d_d_d, s_s_s -------- */
10282 Bool isD = size == X01;
10283 IRType ity = isD ? Ity_F64 : Ity_F32;
10284 Bool isGE = bitU == 1;
10285 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
10286 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
10287 IRTemp res = newTempV128();
10288 assign(res, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
10289 : binop(opCMP, getQReg128(nn), getQReg128(mm)));
10290 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10291 mkexpr(res))));
10292 DIP("%s %s, %s, %s\n", isGE ? "fcmge" : "fcmeq",
10293 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10294 return True;
10297 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
10298 /* -------- 1,1x,11100 FCMGT d_d_d, s_s_s -------- */
10299 Bool isD = size == X11;
10300 IRType ity = isD ? Ity_F64 : Ity_F32;
10301 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
10302 IRTemp res = newTempV128();
10303 assign(res, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
10304 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10305 mkexpr(res))));
10306 DIP("%s %s, %s, %s\n", "fcmgt",
10307 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10308 return True;
10311 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
10312 /* -------- 1,0x,11101 FACGE d_d_d, s_s_s -------- */
10313 /* -------- 1,1x,11101 FACGT d_d_d, s_s_s -------- */
10314 Bool isD = (size & 1) == 1;
10315 IRType ity = isD ? Ity_F64 : Ity_F32;
10316 Bool isGT = (size & 2) == 2;
10317 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
10318 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
10319 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
10320 IRTemp res = newTempV128();
10321 assign(res, binop(opCMP, unop(opABS, getQReg128(mm)),
10322 unop(opABS, getQReg128(nn)))); // swapd
10323 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10324 mkexpr(res))));
10325 DIP("%s %s, %s, %s\n", isGT ? "facgt" : "facge",
10326 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10327 return True;
10330 if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
10331 /* -------- 0,0x,11111: FRECPS d_d_d, s_s_s -------- */
10332 /* -------- 0,1x,11111: FRSQRTS d_d_d, s_s_s -------- */
10333 Bool isSQRT = (size & 2) == 2;
10334 Bool isD = (size & 1) == 1;
10335 IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
10336 : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
10337 IRTemp res = newTempV128();
10338 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
10339 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10340 mkexpr(res))));
10341 HChar c = isD ? 'd' : 's';
10342 DIP("%s %c%u, %c%u, %c%u\n", isSQRT ? "frsqrts" : "frecps",
10343 c, dd, c, nn, c, mm);
10344 return True;
10347 return False;
10348 # undef INSN
10351 static
10352 Bool dis_AdvSIMD_scalar_three_same_extra(/*MB_OUT*/DisResult* dres, UInt insn)
10354 /* 31 29 28 23 21 20 15 10 9 4
10355 01 U 11110 size 0 m opcode 1 n d
10356 Decode fields: u,size,opcode
10358 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10359 if (INSN(31,30) != BITS2(0,1)
10360 || INSN(28,24) != BITS5(1,1,1,1,0)
10361 || INSN(21,21) != 0
10362 || INSN(10,10) != 1) {
10363 return False;
10365 UInt bitU = INSN(29,29);
10366 UInt size = INSN(23,22);
10367 UInt mm = INSN(20,16);
10368 UInt opcode = INSN(15,11);
10369 UInt nn = INSN(9,5);
10370 UInt dd = INSN(4,0);
10371 vassert(size < 4);
10372 vassert(mm < 32 && nn < 32 && dd < 32);
10374 if (bitU == 1 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) {
10375 /* -------- xx,10000 SQRDMLAH s and h variants only -------- */
10376 /* -------- xx,10001 SQRDMLSH s and h variants only -------- */
10377 if (size == X00 || size == X11) return False;
10378 Bool isAdd = opcode == BITS5(1,0,0,0,0);
10380 IRTemp res, res_nosat, vD, vN, vM;
10381 res = res_nosat = vD = vN = vM = IRTemp_INVALID;
10382 newTempsV128_3(&vD, &vN, &vM);
10383 assign(vD, getQReg128(dd));
10384 assign(vN, getQReg128(nn));
10385 assign(vM, getQReg128(mm));
10387 math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM);
10388 putQReg128(dd,
10389 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res))));
10390 updateQCFLAGwithDifference(
10391 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res)),
10392 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res_nosat)));
10394 const HChar arr = "hs"[size];
10395 const HChar* nm = isAdd ? "sqrdmlah" : "sqrdmlsh";
10396 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
10397 return True;
10400 return False;
10401 # undef INSN
10405 static
10406 Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
10408 /* 31 29 28 23 21 16 11 9 4
10409 01 U 11110 size 10000 opcode 10 n d
10410 Decode fields: u,size,opcode
10412 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10413 if (INSN(31,30) != BITS2(0,1)
10414 || INSN(28,24) != BITS5(1,1,1,1,0)
10415 || INSN(21,17) != BITS5(1,0,0,0,0)
10416 || INSN(11,10) != BITS2(1,0)) {
10417 return False;
10419 UInt bitU = INSN(29,29);
10420 UInt size = INSN(23,22);
10421 UInt opcode = INSN(16,12);
10422 UInt nn = INSN(9,5);
10423 UInt dd = INSN(4,0);
10424 vassert(size < 4);
10426 if (opcode == BITS5(0,0,0,1,1)) {
10427 /* -------- 0,xx,00011: SUQADD std4_std4 -------- */
10428 /* -------- 1,xx,00011: USQADD std4_std4 -------- */
10429 /* These are a bit tricky (to say the least). See comments on
10430 the vector variants (in dis_AdvSIMD_two_reg_misc) below for
10431 details. */
10432 Bool isUSQADD = bitU == 1;
10433 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
10434 : mkVecQADDEXTUSSATSS(size);
10435 IROp nop = mkVecADD(size);
10436 IRTemp argL = newTempV128();
10437 IRTemp argR = newTempV128();
10438 assign(argL, getQReg128(nn));
10439 assign(argR, getQReg128(dd));
10440 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10441 size, binop(qop, mkexpr(argL), mkexpr(argR)));
10442 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10443 size, binop(nop, mkexpr(argL), mkexpr(argR)));
10444 putQReg128(dd, mkexpr(qres));
10445 updateQCFLAGwithDifference(qres, nres);
10446 const HChar arr = "bhsd"[size];
10447 DIP("%s %c%u, %c%u\n", isUSQADD ? "usqadd" : "suqadd", arr, dd, arr, nn);
10448 return True;
10451 if (opcode == BITS5(0,0,1,1,1)) {
10452 /* -------- 0,xx,00111 SQABS std4_std4 -------- */
10453 /* -------- 1,xx,00111 SQNEG std4_std4 -------- */
10454 Bool isNEG = bitU == 1;
10455 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
10456 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
10457 getQReg128(nn), size );
10458 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(qresFW));
10459 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(nresFW));
10460 putQReg128(dd, mkexpr(qres));
10461 updateQCFLAGwithDifference(qres, nres);
10462 const HChar arr = "bhsd"[size];
10463 DIP("%s %c%u, %c%u\n", isNEG ? "sqneg" : "sqabs", arr, dd, arr, nn);
10464 return True;
10467 if (size == X11 && opcode == BITS5(0,1,0,0,0)) {
10468 /* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0
10469 /* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0
10470 Bool isGT = bitU == 0;
10471 IRExpr* argL = getQReg128(nn);
10472 IRExpr* argR = mkV128(0x0000);
10473 IRTemp res = newTempV128();
10474 assign(res, isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
10475 : unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL)));
10476 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10477 DIP("cm%s d%u, d%u, #0\n", isGT ? "gt" : "ge", dd, nn);
10478 return True;
10481 if (size == X11 && opcode == BITS5(0,1,0,0,1)) {
10482 /* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0
10483 /* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0
10484 Bool isEQ = bitU == 0;
10485 IRExpr* argL = getQReg128(nn);
10486 IRExpr* argR = mkV128(0x0000);
10487 IRTemp res = newTempV128();
10488 assign(res, isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
10489 : unop(Iop_NotV128,
10490 binop(Iop_CmpGT64Sx2, argL, argR)));
10491 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10492 DIP("cm%s d%u, d%u, #0\n", isEQ ? "eq" : "le", dd, nn);
10493 return True;
10496 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,0)) {
10497 /* -------- 0,11,01010: CMLT d_d_#0 -------- */ // <s 0
10498 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
10499 binop(Iop_CmpGT64Sx2, mkV128(0x0000),
10500 getQReg128(nn))));
10501 DIP("cm%s d%u, d%u, #0\n", "lt", dd, nn);
10502 return True;
10505 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
10506 /* -------- 0,11,01011 ABS d_d -------- */
10507 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
10508 unop(Iop_Abs64x2, getQReg128(nn))));
10509 DIP("abs d%u, d%u\n", dd, nn);
10510 return True;
10513 if (bitU == 1 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
10514 /* -------- 1,11,01011 NEG d_d -------- */
10515 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
10516 binop(Iop_Sub64x2, mkV128(0x0000), getQReg128(nn))));
10517 DIP("neg d%u, d%u\n", dd, nn);
10518 return True;
10521 UInt ix = 0; /*INVALID*/
10522 if (size >= X10) {
10523 switch (opcode) {
10524 case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
10525 case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
10526 case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
10527 default: break;
10530 if (ix > 0) {
10531 /* -------- 0,1x,01100 FCMGT d_d_#0.0, s_s_#0.0 (ix 1) -------- */
10532 /* -------- 0,1x,01101 FCMEQ d_d_#0.0, s_s_#0.0 (ix 2) -------- */
10533 /* -------- 0,1x,01110 FCMLT d_d_#0.0, s_s_#0.0 (ix 3) -------- */
10534 /* -------- 1,1x,01100 FCMGE d_d_#0.0, s_s_#0.0 (ix 4) -------- */
10535 /* -------- 1,1x,01101 FCMLE d_d_#0.0, s_s_#0.0 (ix 5) -------- */
10536 Bool isD = size == X11;
10537 IRType ity = isD ? Ity_F64 : Ity_F32;
10538 IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
10539 IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
10540 IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
10541 IROp opCmp = Iop_INVALID;
10542 Bool swap = False;
10543 const HChar* nm = "??";
10544 switch (ix) {
10545 case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
10546 case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
10547 case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
10548 case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
10549 case 5: nm = "fcmle"; opCmp = opCmpLE; break;
10550 default: vassert(0);
10552 IRExpr* zero = mkV128(0x0000);
10553 IRTemp res = newTempV128();
10554 assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
10555 : binop(opCmp, getQReg128(nn), zero));
10556 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10557 mkexpr(res))));
10559 DIP("%s %s, %s, #0.0\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
10560 return True;
10563 if (opcode == BITS5(1,0,1,0,0)
10564 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
10565 /* -------- 0,xx,10100: SQXTN -------- */
10566 /* -------- 1,xx,10100: UQXTN -------- */
10567 /* -------- 1,xx,10010: SQXTUN -------- */
10568 if (size == X11) return False;
10569 vassert(size < 3);
10570 IROp opN = Iop_INVALID;
10571 Bool zWiden = True;
10572 const HChar* nm = "??";
10573 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
10574 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
10576 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
10577 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
10579 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
10580 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
10582 else vassert(0);
10583 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10584 size+1, getQReg128(nn));
10585 IRTemp resN = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10586 size, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
10587 putQReg128(dd, mkexpr(resN));
10588 /* This widens zero lanes to zero, and compares it against zero, so all
10589 of the non-participating lanes make no contribution to the
10590 Q flag state. */
10591 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
10592 size, mkexpr(resN));
10593 updateQCFLAGwithDifference(src, resW);
10594 const HChar arrNarrow = "bhsd"[size];
10595 const HChar arrWide = "bhsd"[size+1];
10596 DIP("%s %c%u, %c%u\n", nm, arrNarrow, dd, arrWide, nn);
10597 return True;
10600 if (opcode == BITS5(1,0,1,1,0) && bitU == 1 && size == X01) {
10601 /* -------- 1,01,10110 FCVTXN s_d -------- */
10602 /* Using Irrm_NEAREST here isn't right. The docs say "round to
10603 odd" but I don't know what that really means. */
10604 putQRegLO(dd,
10605 binop(Iop_F64toF32, mkU32(Irrm_NEAREST),
10606 getQRegLO(nn, Ity_F64)));
10607 putQRegLane(dd, 1, mkU32(0));
10608 putQRegLane(dd, 1, mkU64(0));
10609 DIP("fcvtxn s%u, d%u\n", dd, nn);
10610 return True;
10613 ix = 0; /*INVALID*/
10614 switch (opcode) {
10615 case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
10616 case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
10617 case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
10618 default: break;
10620 if (ix > 0) {
10621 /* -------- 0,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
10622 /* -------- 0,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
10623 /* -------- 0,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
10624 /* -------- 0,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
10625 /* -------- 0,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
10626 /* -------- 1,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
10627 /* -------- 1,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
10628 /* -------- 1,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
10629 /* -------- 1,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
10630 /* -------- 1,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
10631 Bool isD = (size & 1) == 1;
10632 IRType tyF = isD ? Ity_F64 : Ity_F32;
10633 IRType tyI = isD ? Ity_I64 : Ity_I32;
10634 IRRoundingMode irrm = 8; /*impossible*/
10635 HChar ch = '?';
10636 switch (ix) {
10637 case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
10638 case 2: ch = 'm'; irrm = Irrm_NegINF; break;
10639 case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
10640 case 4: ch = 'p'; irrm = Irrm_PosINF; break;
10641 case 5: ch = 'z'; irrm = Irrm_ZERO; break;
10642 default: vassert(0);
10644 IROp cvt = Iop_INVALID;
10645 if (bitU == 1) {
10646 cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
10647 } else {
10648 cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
10650 IRTemp src = newTemp(tyF);
10651 IRTemp res = newTemp(tyI);
10652 assign(src, getQRegLane(nn, 0, tyF));
10653 assign(res, binop(cvt, mkU32(irrm), mkexpr(src)));
10654 putQRegLane(dd, 0, mkexpr(res)); /* bits 31-0 or 63-0 */
10655 if (!isD) {
10656 putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
10658 putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */
10659 HChar sOrD = isD ? 'd' : 's';
10660 DIP("fcvt%c%c %c%u, %c%u\n", ch, bitU == 1 ? 'u' : 's',
10661 sOrD, dd, sOrD, nn);
10662 return True;
10665 if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
10666 /* -------- 0,0x,11101: SCVTF d_d, s_s -------- */
10667 /* -------- 1,0x,11101: UCVTF d_d, s_s -------- */
10668 Bool isU = bitU == 1;
10669 Bool isD = (size & 1) == 1;
10670 IRType tyI = isD ? Ity_I64 : Ity_I32;
10671 IROp iop = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
10672 : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
10673 IRTemp rm = mk_get_IR_rounding_mode();
10674 putQRegLO(dd, binop(iop, mkexpr(rm), getQRegLO(nn, tyI)));
10675 if (!isD) {
10676 putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
10678 putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */
10679 HChar c = isD ? 'd' : 's';
10680 DIP("%ccvtf %c%u, %c%u\n", isU ? 'u' : 's', c, dd, c, nn);
10681 return True;
10684 if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
10685 /* -------- 0,1x,11101: FRECPE d_d, s_s -------- */
10686 /* -------- 1,1x,11101: FRSQRTE d_d, s_s -------- */
10687 Bool isSQRT = bitU == 1;
10688 Bool isD = (size & 1) == 1;
10689 IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
10690 : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
10691 IRTemp resV = newTempV128();
10692 assign(resV, unop(op, getQReg128(nn)));
10693 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10694 mkexpr(resV))));
10695 HChar c = isD ? 'd' : 's';
10696 DIP("%s %c%u, %c%u\n", isSQRT ? "frsqrte" : "frecpe", c, dd, c, nn);
10697 return True;
10700 if (bitU == 0 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
10701 /* -------- 0,1x,11111: FRECPX d_d, s_s -------- */
10702 Bool isD = (size & 1) == 1;
10703 IRType ty = isD ? Ity_F64 : Ity_F32;
10704 IROp op = isD ? Iop_RecpExpF64 : Iop_RecpExpF32;
10705 IRTemp res = newTemp(ty);
10706 IRTemp rm = mk_get_IR_rounding_mode();
10707 assign(res, binop(op, mkexpr(rm), getQRegLane(nn, 0, ty)));
10708 putQReg128(dd, mkV128(0x0000));
10709 putQRegLane(dd, 0, mkexpr(res));
10710 HChar c = isD ? 'd' : 's';
10711 DIP("%s %c%u, %c%u\n", "frecpx", c, dd, c, nn);
10712 return True;
10715 return False;
10716 # undef INSN
10720 static
10721 Bool dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult* dres, UInt insn)
10723 /* 31 28 23 21 20 19 15 11 9 4
10724 01 U 11111 size L M m opcode H 0 n d
10725 Decode fields are: u,size,opcode
10726 M is really part of the mm register number. Individual
10727 cases need to inspect L and H though.
10729 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10730 if (INSN(31,30) != BITS2(0,1)
10731 || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) != 0) {
10732 return False;
10734 UInt bitU = INSN(29,29);
10735 UInt size = INSN(23,22);
10736 UInt bitL = INSN(21,21);
10737 UInt bitM = INSN(20,20);
10738 UInt mmLO4 = INSN(19,16);
10739 UInt opcode = INSN(15,12);
10740 UInt bitH = INSN(11,11);
10741 UInt nn = INSN(9,5);
10742 UInt dd = INSN(4,0);
10743 vassert(size < 4);
10744 vassert(bitH < 2 && bitM < 2 && bitL < 2);
10746 if (bitU == 0 && size >= X10
10747 && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
10748 /* -------- 0,1x,0001 FMLA d_d_d[], s_s_s[] -------- */
10749 /* -------- 0,1x,0101 FMLS d_d_d[], s_s_s[] -------- */
10750 Bool isD = (size & 1) == 1;
10751 Bool isSUB = opcode == BITS4(0,1,0,1);
10752 UInt index;
10753 if (!isD) index = (bitH << 1) | bitL;
10754 else if (isD && bitL == 0) index = bitH;
10755 else return False; // sz:L == x11 => unallocated encoding
10756 vassert(index < (isD ? 2 : 4));
10757 IRType ity = isD ? Ity_F64 : Ity_F32;
10758 IRTemp elem = newTemp(ity);
10759 UInt mm = (bitM << 4) | mmLO4;
10760 assign(elem, getQRegLane(mm, index, ity));
10761 IRTemp dupd = math_DUP_TO_V128(elem, ity);
10762 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
10763 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
10764 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
10765 IRTemp rm = mk_get_IR_rounding_mode();
10766 IRTemp t1 = newTempV128();
10767 IRTemp t2 = newTempV128();
10768 // FIXME: double rounding; use FMA primops instead
10769 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
10770 assign(t2, triop(isSUB ? opSUB : opADD,
10771 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
10772 putQReg128(dd,
10773 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
10774 mkexpr(t2))));
10775 const HChar c = isD ? 'd' : 's';
10776 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
10777 c, dd, c, nn, nameQReg128(mm), c, index);
10778 return True;
10781 if (size >= X10 && opcode == BITS4(1,0,0,1)) {
10782 /* -------- 0,1x,1001 FMUL d_d_d[], s_s_s[] -------- */
10783 /* -------- 1,1x,1001 FMULX d_d_d[], s_s_s[] -------- */
10784 Bool isD = (size & 1) == 1;
10785 Bool isMULX = bitU == 1;
10786 UInt index;
10787 if (!isD) index = (bitH << 1) | bitL;
10788 else if (isD && bitL == 0) index = bitH;
10789 else return False; // sz:L == x11 => unallocated encoding
10790 vassert(index < (isD ? 2 : 4));
10791 IRType ity = isD ? Ity_F64 : Ity_F32;
10792 IRTemp elem = newTemp(ity);
10793 UInt mm = (bitM << 4) | mmLO4;
10794 assign(elem, getQRegLane(mm, index, ity));
10795 IRTemp dupd = math_DUP_TO_V128(elem, ity);
10796 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
10797 IRTemp rm = mk_get_IR_rounding_mode();
10798 IRTemp t1 = newTempV128();
10799 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
10800 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
10801 putQReg128(dd,
10802 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
10803 mkexpr(t1))));
10804 const HChar c = isD ? 'd' : 's';
10805 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isMULX ? "fmulx" : "fmul",
10806 c, dd, c, nn, nameQReg128(mm), c, index);
10807 return True;
10810 if (bitU == 0
10811 && (opcode == BITS4(1,0,1,1)
10812 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
10813 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
10814 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
10815 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
10816 /* Widens, and size refers to the narrowed lanes. */
10817 UInt ks = 3;
10818 switch (opcode) {
10819 case BITS4(1,0,1,1): ks = 0; break;
10820 case BITS4(0,0,1,1): ks = 1; break;
10821 case BITS4(0,1,1,1): ks = 2; break;
10822 default: vassert(0);
10824 vassert(ks >= 0 && ks <= 2);
10825 UInt mm = 32; // invalid
10826 UInt ix = 16; // invalid
10827 switch (size) {
10828 case X00:
10829 return False; // h_b_b[] case is not allowed
10830 case X01:
10831 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10832 case X10:
10833 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10834 case X11:
10835 return False; // q_d_d[] case is not allowed
10836 default:
10837 vassert(0);
10839 vassert(mm < 32 && ix < 16);
10840 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
10841 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
10842 newTempsV128_2(&vecN, &vecD);
10843 assign(vecN, getQReg128(nn));
10844 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
10845 assign(vecD, getQReg128(dd));
10846 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
10847 False/*!is2*/, size, "mas"[ks],
10848 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
10849 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
10850 putQReg128(dd, unop(opZHI, mkexpr(res)));
10851 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
10852 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10853 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
10854 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
10856 const HChar* nm = ks == 0 ? "sqmull"
10857 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
10858 const HChar arrNarrow = "bhsd"[size];
10859 const HChar arrWide = "bhsd"[size+1];
10860 DIP("%s %c%u, %c%u, v%u.%c[%u]\n",
10861 nm, arrWide, dd, arrNarrow, nn, dd, arrNarrow, ix);
10862 return True;
10865 if (bitU == 0 && (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1))) {
10866 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
10867 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
10868 UInt mm = 32; // invalid
10869 UInt ix = 16; // invalid
10870 switch (size) {
10871 case X00:
10872 return False; // b case is not allowed
10873 case X01:
10874 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10875 case X10:
10876 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10877 case X11:
10878 return False; // q case is not allowed
10879 default:
10880 vassert(0);
10882 vassert(mm < 32 && ix < 16);
10883 Bool isR = opcode == BITS4(1,1,0,1);
10884 IRTemp res, sat1q, sat1n, vN, vM;
10885 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
10886 vN = newTempV128();
10887 assign(vN, getQReg128(nn));
10888 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
10889 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
10890 IROp opZHI = mkVecZEROHIxxOFV128(size);
10891 putQReg128(dd, unop(opZHI, mkexpr(res)));
10892 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10893 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
10894 HChar ch = size == X01 ? 'h' : 's';
10895 DIP("%s %c%u, %c%u, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, (Int)dd, ix);
10896 return True;
10899 if (bitU == 1 && (opcode == BITS4(1,1,0,1) || opcode == BITS4(1,1,1,1))) {
10900 /* -------- 0,xx,1101 SQRDMLAH s and h variants only -------- */
10901 /* -------- 0,xx,1111 SQRDMLSH s and h variants only -------- */
10902 UInt mm = 32; // invalid
10903 UInt ix = 16; // invalid
10904 switch (size) {
10905 case X00:
10906 return False; // b case is not allowed
10907 case X01:
10908 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10909 case X10:
10910 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10911 case X11:
10912 return False; // d case is not allowed
10913 default:
10914 vassert(0);
10916 vassert(size < 4);
10917 vassert(mm < 32 && ix < 16);
10918 Bool isAdd = opcode == BITS4(1,1,0,1);
10920 IRTemp res, res_nosat, vD, vN, vM;
10921 res = res_nosat = vD = vN = vM = IRTemp_INVALID;
10922 newTempsV128_2(&vD, &vN);
10923 assign(vD, getQReg128(dd));
10924 assign(vN, getQReg128(nn));
10925 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
10927 math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM);
10928 IROp opZHI = mkVecZEROHIxxOFV128(size);
10929 putQReg128(dd, unop(opZHI, mkexpr(res)));
10930 updateQCFLAGwithDifferenceZHI(res, res_nosat, opZHI);
10932 const HChar* nm = isAdd ? "sqrdmlah" : "sqrdmlsh";
10933 HChar ch = size == X01 ? 'h' : 's';
10934 DIP("%s %c%u, %c%u, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, (Int)dd, ix);
10935 return True;
10938 return False;
10939 # undef INSN
10943 static
10944 Bool dis_AdvSIMD_shift_by_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
10946 /* 31 28 22 18 15 10 9 4
10947 0 q u 011110 immh immb opcode 1 n d
10948 Decode fields: u,opcode
10950 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10951 if (INSN(31,31) != 0
10952 || INSN(28,23) != BITS6(0,1,1,1,1,0) || INSN(10,10) != 1) {
10953 return False;
10955 UInt bitQ = INSN(30,30);
10956 UInt bitU = INSN(29,29);
10957 UInt immh = INSN(22,19);
10958 UInt immb = INSN(18,16);
10959 UInt opcode = INSN(15,11);
10960 UInt nn = INSN(9,5);
10961 UInt dd = INSN(4,0);
10963 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0)) {
10964 /* -------- 0,00000 SSHR std7_std7_#imm -------- */
10965 /* -------- 1,00000 USHR std7_std7_#imm -------- */
10966 /* -------- 0,00010 SSRA std7_std7_#imm -------- */
10967 /* -------- 1,00010 USRA std7_std7_#imm -------- */
10968 /* laneTy, shift = case immh:immb of
10969 0001:xxx -> B, SHR:8-xxx
10970 001x:xxx -> H, SHR:16-xxxx
10971 01xx:xxx -> S, SHR:32-xxxxx
10972 1xxx:xxx -> D, SHR:64-xxxxxx
10973 other -> invalid
10975 UInt size = 0;
10976 UInt shift = 0;
10977 Bool isQ = bitQ == 1;
10978 Bool isU = bitU == 1;
10979 Bool isAcc = opcode == BITS5(0,0,0,1,0);
10980 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10981 if (!ok || (bitQ == 0 && size == X11)) return False;
10982 vassert(size >= 0 && size <= 3);
10983 UInt lanebits = 8 << size;
10984 vassert(shift >= 1 && shift <= lanebits);
10985 IROp op = isU ? mkVecSHRN(size) : mkVecSARN(size);
10986 IRExpr* src = getQReg128(nn);
10987 IRTemp shf = newTempV128();
10988 IRTemp res = newTempV128();
10989 if (shift == lanebits && isU) {
10990 assign(shf, mkV128(0x0000));
10991 } else {
10992 UInt nudge = 0;
10993 if (shift == lanebits) {
10994 vassert(!isU);
10995 nudge = 1;
10997 assign(shf, binop(op, src, mkU8(shift - nudge)));
10999 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
11000 : mkexpr(shf));
11001 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11002 HChar laneCh = "bhsd"[size];
11003 UInt nLanes = (isQ ? 128 : 64) / lanebits;
11004 const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
11005 : (isU ? "ushr" : "sshr");
11006 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
11007 nameQReg128(dd), nLanes, laneCh,
11008 nameQReg128(nn), nLanes, laneCh, shift);
11009 return True;
11012 if (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0)) {
11013 /* -------- 0,00100 SRSHR std7_std7_#imm -------- */
11014 /* -------- 1,00100 URSHR std7_std7_#imm -------- */
11015 /* -------- 0,00110 SRSRA std7_std7_#imm -------- */
11016 /* -------- 1,00110 URSRA std7_std7_#imm -------- */
11017 /* laneTy, shift = case immh:immb of
11018 0001:xxx -> B, SHR:8-xxx
11019 001x:xxx -> H, SHR:16-xxxx
11020 01xx:xxx -> S, SHR:32-xxxxx
11021 1xxx:xxx -> D, SHR:64-xxxxxx
11022 other -> invalid
11024 UInt size = 0;
11025 UInt shift = 0;
11026 Bool isQ = bitQ == 1;
11027 Bool isU = bitU == 1;
11028 Bool isAcc = opcode == BITS5(0,0,1,1,0);
11029 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
11030 if (!ok || (bitQ == 0 && size == X11)) return False;
11031 vassert(size >= 0 && size <= 3);
11032 UInt lanebits = 8 << size;
11033 vassert(shift >= 1 && shift <= lanebits);
11034 IROp op = isU ? mkVecRSHU(size) : mkVecRSHS(size);
11035 IRExpr* src = getQReg128(nn);
11036 IRTemp imm8 = newTemp(Ity_I8);
11037 assign(imm8, mkU8((UChar)(-shift)));
11038 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
11039 IRTemp shf = newTempV128();
11040 IRTemp res = newTempV128();
11041 assign(shf, binop(op, src, amt));
11042 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
11043 : mkexpr(shf));
11044 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11045 HChar laneCh = "bhsd"[size];
11046 UInt nLanes = (isQ ? 128 : 64) / lanebits;
11047 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
11048 : (isU ? "urshr" : "srshr");
11049 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
11050 nameQReg128(dd), nLanes, laneCh,
11051 nameQReg128(nn), nLanes, laneCh, shift);
11052 return True;
11055 if (bitU == 1 && opcode == BITS5(0,1,0,0,0)) {
11056 /* -------- 1,01000 SRI std7_std7_#imm -------- */
11057 /* laneTy, shift = case immh:immb of
11058 0001:xxx -> B, SHR:8-xxx
11059 001x:xxx -> H, SHR:16-xxxx
11060 01xx:xxx -> S, SHR:32-xxxxx
11061 1xxx:xxx -> D, SHR:64-xxxxxx
11062 other -> invalid
11064 UInt size = 0;
11065 UInt shift = 0;
11066 Bool isQ = bitQ == 1;
11067 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
11068 if (!ok || (bitQ == 0 && size == X11)) return False;
11069 vassert(size >= 0 && size <= 3);
11070 UInt lanebits = 8 << size;
11071 vassert(shift >= 1 && shift <= lanebits);
11072 IRExpr* src = getQReg128(nn);
11073 IRTemp res = newTempV128();
11074 if (shift == lanebits) {
11075 assign(res, getQReg128(dd));
11076 } else {
11077 assign(res, binop(mkVecSHRN(size), src, mkU8(shift)));
11078 IRExpr* nmask = binop(mkVecSHLN(size),
11079 mkV128(0xFFFF), mkU8(lanebits - shift));
11080 IRTemp tmp = newTempV128();
11081 assign(tmp, binop(Iop_OrV128,
11082 mkexpr(res),
11083 binop(Iop_AndV128, getQReg128(dd), nmask)));
11084 res = tmp;
11086 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11087 HChar laneCh = "bhsd"[size];
11088 UInt nLanes = (isQ ? 128 : 64) / lanebits;
11089 DIP("%s %s.%u%c, %s.%u%c, #%u\n", "sri",
11090 nameQReg128(dd), nLanes, laneCh,
11091 nameQReg128(nn), nLanes, laneCh, shift);
11092 return True;
11095 if (opcode == BITS5(0,1,0,1,0)) {
11096 /* -------- 0,01010 SHL std7_std7_#imm -------- */
11097 /* -------- 1,01010 SLI std7_std7_#imm -------- */
11098 /* laneTy, shift = case immh:immb of
11099 0001:xxx -> B, xxx
11100 001x:xxx -> H, xxxx
11101 01xx:xxx -> S, xxxxx
11102 1xxx:xxx -> D, xxxxxx
11103 other -> invalid
11105 UInt size = 0;
11106 UInt shift = 0;
11107 Bool isSLI = bitU == 1;
11108 Bool isQ = bitQ == 1;
11109 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
11110 if (!ok || (bitQ == 0 && size == X11)) return False;
11111 vassert(size >= 0 && size <= 3);
11112 /* The shift encoding has opposite sign for the leftwards case.
11113 Adjust shift to compensate. */
11114 UInt lanebits = 8 << size;
11115 shift = lanebits - shift;
11116 vassert(shift >= 0 && shift < lanebits);
11117 IROp op = mkVecSHLN(size);
11118 IRExpr* src = getQReg128(nn);
11119 IRTemp res = newTempV128();
11120 if (shift == 0) {
11121 assign(res, src);
11122 } else {
11123 assign(res, binop(op, src, mkU8(shift)));
11124 if (isSLI) {
11125 IRExpr* nmask = binop(mkVecSHRN(size),
11126 mkV128(0xFFFF), mkU8(lanebits - shift));
11127 IRTemp tmp = newTempV128();
11128 assign(tmp, binop(Iop_OrV128,
11129 mkexpr(res),
11130 binop(Iop_AndV128, getQReg128(dd), nmask)));
11131 res = tmp;
11134 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11135 HChar laneCh = "bhsd"[size];
11136 UInt nLanes = (isQ ? 128 : 64) / lanebits;
11137 const HChar* nm = isSLI ? "sli" : "shl";
11138 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
11139 nameQReg128(dd), nLanes, laneCh,
11140 nameQReg128(nn), nLanes, laneCh, shift);
11141 return True;
11144 if (opcode == BITS5(0,1,1,1,0)
11145 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
11146 /* -------- 0,01110 SQSHL std7_std7_#imm -------- */
11147 /* -------- 1,01110 UQSHL std7_std7_#imm -------- */
11148 /* -------- 1,01100 SQSHLU std7_std7_#imm -------- */
11149 UInt size = 0;
11150 UInt shift = 0;
11151 Bool isQ = bitQ == 1;
11152 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
11153 if (!ok || (bitQ == 0 && size == X11)) return False;
11154 vassert(size >= 0 && size <= 3);
11155 /* The shift encoding has opposite sign for the leftwards case.
11156 Adjust shift to compensate. */
11157 UInt lanebits = 8 << size;
11158 shift = lanebits - shift;
11159 vassert(shift >= 0 && shift < lanebits);
11160 const HChar* nm = NULL;
11161 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
11162 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
11163 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
11164 else vassert(0);
11165 IRTemp qDiff1 = IRTemp_INVALID;
11166 IRTemp qDiff2 = IRTemp_INVALID;
11167 IRTemp res = IRTemp_INVALID;
11168 IRTemp src = newTempV128();
11169 assign(src, getQReg128(nn));
11170 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
11171 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11172 updateQCFLAGwithDifferenceZHI(qDiff1, qDiff2,
11173 isQ ? Iop_INVALID : Iop_ZeroHI64ofV128);
11174 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11175 DIP("%s %s.%s, %s.%s, #%u\n", nm,
11176 nameQReg128(dd), arr, nameQReg128(nn), arr, shift);
11177 return True;
11180 if (bitU == 0
11181 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) {
11182 /* -------- 0,10000 SHRN{,2} #imm -------- */
11183 /* -------- 0,10001 RSHRN{,2} #imm -------- */
11184 /* Narrows, and size is the narrow size. */
11185 UInt size = 0;
11186 UInt shift = 0;
11187 Bool is2 = bitQ == 1;
11188 Bool isR = opcode == BITS5(1,0,0,0,1);
11189 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
11190 if (!ok || size == X11) return False;
11191 vassert(shift >= 1);
11192 IRTemp t1 = newTempV128();
11193 IRTemp t2 = newTempV128();
11194 IRTemp t3 = newTempV128();
11195 assign(t1, getQReg128(nn));
11196 assign(t2, isR ? binop(mkVecADD(size+1),
11197 mkexpr(t1),
11198 mkexpr(math_VEC_DUP_IMM(size+1, 1ULL<<(shift-1))))
11199 : mkexpr(t1));
11200 assign(t3, binop(mkVecSHRN(size+1), mkexpr(t2), mkU8(shift)));
11201 IRTemp t4 = math_NARROW_LANES(t3, t3, size);
11202 putLO64andZUorPutHI64(is2, dd, t4);
11203 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11204 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11205 DIP("%s %s.%s, %s.%s, #%u\n", isR ? "rshrn" : "shrn",
11206 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
11207 return True;
11210 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
11211 || (bitU == 1
11212 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
11213 /* -------- 0,10010 SQSHRN{,2} #imm -------- */
11214 /* -------- 1,10010 UQSHRN{,2} #imm -------- */
11215 /* -------- 0,10011 SQRSHRN{,2} #imm -------- */
11216 /* -------- 1,10011 UQRSHRN{,2} #imm -------- */
11217 /* -------- 1,10000 SQSHRUN{,2} #imm -------- */
11218 /* -------- 1,10001 SQRSHRUN{,2} #imm -------- */
11219 UInt size = 0;
11220 UInt shift = 0;
11221 Bool is2 = bitQ == 1;
11222 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
11223 if (!ok || size == X11) return False;
11224 vassert(shift >= 1 && shift <= (8 << size));
11225 const HChar* nm = "??";
11226 IROp op = Iop_INVALID;
11227 /* Decide on the name and the operation. */
11228 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
11229 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
11231 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
11232 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
11234 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
11235 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
11237 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
11238 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
11240 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
11241 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
11243 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
11244 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
11246 else vassert(0);
11247 /* Compute the result (Q, shifted value) pair. */
11248 IRTemp src128 = newTempV128();
11249 assign(src128, getQReg128(nn));
11250 IRTemp pair = newTempV128();
11251 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
11252 /* Update the result reg */
11253 IRTemp res64in128 = newTempV128();
11254 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
11255 putLO64andZUorPutHI64(is2, dd, res64in128);
11256 /* Update the Q flag. */
11257 IRTemp q64q64 = newTempV128();
11258 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
11259 IRTemp z128 = newTempV128();
11260 assign(z128, mkV128(0x0000));
11261 updateQCFLAGwithDifference(q64q64, z128);
11262 /* */
11263 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11264 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11265 DIP("%s %s.%s, %s.%s, #%u\n", nm,
11266 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
11267 return True;
11270 if (opcode == BITS5(1,0,1,0,0)) {
11271 /* -------- 0,10100 SSHLL{,2} #imm -------- */
11272 /* -------- 1,10100 USHLL{,2} #imm -------- */
11273 /* 31 28 22 18 15 9 4
11274 0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh
11275 0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh
11276 where Ta,Tb,sh
11277 = case immh of 1xxx -> invalid
11278 01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31)
11279 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15)
11280 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7)
11281 0000 -> AdvSIMD modified immediate (???)
11283 Bool isQ = bitQ == 1;
11284 Bool isU = bitU == 1;
11285 UInt immhb = (immh << 3) | immb;
11286 IRTemp src = newTempV128();
11287 IRTemp zero = newTempV128();
11288 IRExpr* res = NULL;
11289 UInt sh = 0;
11290 const HChar* ta = "??";
11291 const HChar* tb = "??";
11292 assign(src, getQReg128(nn));
11293 assign(zero, mkV128(0x0000));
11294 if (immh & 8) {
11295 /* invalid; don't assign to res */
11297 else if (immh & 4) {
11298 sh = immhb - 32;
11299 vassert(sh < 32); /* so 32-sh is 1..32 */
11300 ta = "2d";
11301 tb = isQ ? "4s" : "2s";
11302 IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
11303 : mk_InterleaveLO32x4(src, zero);
11304 res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
11306 else if (immh & 2) {
11307 sh = immhb - 16;
11308 vassert(sh < 16); /* so 16-sh is 1..16 */
11309 ta = "4s";
11310 tb = isQ ? "8h" : "4h";
11311 IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero)
11312 : mk_InterleaveLO16x8(src, zero);
11313 res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
11315 else if (immh & 1) {
11316 sh = immhb - 8;
11317 vassert(sh < 8); /* so 8-sh is 1..8 */
11318 ta = "8h";
11319 tb = isQ ? "16b" : "8b";
11320 IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
11321 : mk_InterleaveLO8x16(src, zero);
11322 res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
11323 } else {
11324 vassert(immh == 0);
11325 /* invalid; don't assign to res */
11327 /* */
11328 if (res) {
11329 putQReg128(dd, res);
11330 DIP("%cshll%s %s.%s, %s.%s, #%u\n",
11331 isU ? 'u' : 's', isQ ? "2" : "",
11332 nameQReg128(dd), ta, nameQReg128(nn), tb, sh);
11333 return True;
11335 return False;
11338 if (opcode == BITS5(1,1,1,0,0)) {
11339 /* -------- 0,11100 SCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
11340 /* -------- 1,11100 UCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
11341 /* If immh is of the form 00xx, the insn is invalid. */
11342 if (immh < BITS4(0,1,0,0)) return False;
11343 UInt size = 0;
11344 UInt fbits = 0;
11345 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
11346 /* The following holds because immh is never zero. */
11347 vassert(ok);
11348 /* The following holds because immh >= 0100. */
11349 vassert(size == X10 || size == X11);
11350 Bool isD = size == X11;
11351 Bool isU = bitU == 1;
11352 Bool isQ = bitQ == 1;
11353 if (isD && !isQ) return False; /* reject .1d case */
11354 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
11355 Double scale = two_to_the_minus(fbits);
11356 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
11357 : IRExpr_Const(IRConst_F32( (Float)scale ));
11358 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
11359 IROp opCVT = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
11360 : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
11361 IRType tyF = isD ? Ity_F64 : Ity_F32;
11362 IRType tyI = isD ? Ity_I64 : Ity_I32;
11363 UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
11364 vassert(nLanes == 2 || nLanes == 4);
11365 for (UInt i = 0; i < nLanes; i++) {
11366 IRTemp src = newTemp(tyI);
11367 IRTemp res = newTemp(tyF);
11368 IRTemp rm = mk_get_IR_rounding_mode();
11369 assign(src, getQRegLane(nn, i, tyI));
11370 assign(res, triop(opMUL, mkexpr(rm),
11371 binop(opCVT, mkexpr(rm), mkexpr(src)),
11372 scaleE));
11373 putQRegLane(dd, i, mkexpr(res));
11375 if (!isQ) {
11376 putQRegLane(dd, 1, mkU64(0));
11378 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11379 DIP("%s %s.%s, %s.%s, #%u\n", isU ? "ucvtf" : "scvtf",
11380 nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
11381 return True;
11384 if (opcode == BITS5(1,1,1,1,1)) {
11385 /* -------- 0,11111 FCVTZS {2d_2d,4s_4s,2s_2s}_imm -------- */
11386 /* -------- 1,11111 FCVTZU {2d_2d,4s_4s,2s_2s}_imm -------- */
11387 /* If immh is of the form 00xx, the insn is invalid. */
11388 if (immh < BITS4(0,1,0,0)) return False;
11389 UInt size = 0;
11390 UInt fbits = 0;
11391 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
11392 /* The following holds because immh is never zero. */
11393 vassert(ok);
11394 /* The following holds because immh >= 0100. */
11395 vassert(size == X10 || size == X11);
11396 Bool isD = size == X11;
11397 Bool isU = bitU == 1;
11398 Bool isQ = bitQ == 1;
11399 if (isD && !isQ) return False; /* reject .1d case */
11400 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
11401 Double scale = two_to_the_plus(fbits);
11402 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
11403 : IRExpr_Const(IRConst_F32( (Float)scale ));
11404 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
11405 IROp opCVT = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
11406 : (isD ? Iop_F64toI64S : Iop_F32toI32S);
11407 IRType tyF = isD ? Ity_F64 : Ity_F32;
11408 IRType tyI = isD ? Ity_I64 : Ity_I32;
11409 UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
11410 vassert(nLanes == 2 || nLanes == 4);
11411 for (UInt i = 0; i < nLanes; i++) {
11412 IRTemp src = newTemp(tyF);
11413 IRTemp res = newTemp(tyI);
11414 IRTemp rm = newTemp(Ity_I32);
11415 assign(src, getQRegLane(nn, i, tyF));
11416 assign(rm, mkU32(Irrm_ZERO));
11417 assign(res, binop(opCVT, mkexpr(rm),
11418 triop(opMUL, mkexpr(rm),
11419 mkexpr(src), scaleE)));
11420 putQRegLane(dd, i, mkexpr(res));
11422 if (!isQ) {
11423 putQRegLane(dd, 1, mkU64(0));
11425 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11426 DIP("%s %s.%s, %s.%s, #%u\n", isU ? "fcvtzu" : "fcvtzs",
11427 nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
11428 return True;
11431 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11432 return False;
11433 # undef INSN
11437 static
11438 Bool dis_AdvSIMD_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
11440 /* 31 30 29 28 23 21 20 15 11 9 4
11441 0 Q U 01110 size 1 m opcode 00 n d
11442 Decode fields: u,opcode
11444 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11445 if (INSN(31,31) != 0
11446 || INSN(28,24) != BITS5(0,1,1,1,0)
11447 || INSN(21,21) != 1
11448 || INSN(11,10) != BITS2(0,0)) {
11449 return False;
11451 UInt bitQ = INSN(30,30);
11452 UInt bitU = INSN(29,29);
11453 UInt size = INSN(23,22);
11454 UInt mm = INSN(20,16);
11455 UInt opcode = INSN(15,12);
11456 UInt nn = INSN(9,5);
11457 UInt dd = INSN(4,0);
11458 vassert(size < 4);
11459 Bool is2 = bitQ == 1;
11461 if (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,0,1,0)) {
11462 /* -------- 0,0000 SADDL{2} -------- */
11463 /* -------- 1,0000 UADDL{2} -------- */
11464 /* -------- 0,0010 SSUBL{2} -------- */
11465 /* -------- 1,0010 USUBL{2} -------- */
11466 /* Widens, and size refers to the narrow lanes. */
11467 if (size == X11) return False;
11468 vassert(size <= 2);
11469 Bool isU = bitU == 1;
11470 Bool isADD = opcode == BITS4(0,0,0,0);
11471 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
11472 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
11473 IRTemp res = newTempV128();
11474 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
11475 mkexpr(argL), mkexpr(argR)));
11476 putQReg128(dd, mkexpr(res));
11477 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11478 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11479 const HChar* nm = isADD ? (isU ? "uaddl" : "saddl")
11480 : (isU ? "usubl" : "ssubl");
11481 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11482 nameQReg128(dd), arrWide,
11483 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11484 return True;
11487 if (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,0,1,1)) {
11488 /* -------- 0,0001 SADDW{2} -------- */
11489 /* -------- 1,0001 UADDW{2} -------- */
11490 /* -------- 0,0011 SSUBW{2} -------- */
11491 /* -------- 1,0011 USUBW{2} -------- */
11492 /* Widens, and size refers to the narrow lanes. */
11493 if (size == X11) return False;
11494 vassert(size <= 2);
11495 Bool isU = bitU == 1;
11496 Bool isADD = opcode == BITS4(0,0,0,1);
11497 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
11498 IRTemp res = newTempV128();
11499 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
11500 getQReg128(nn), mkexpr(argR)));
11501 putQReg128(dd, mkexpr(res));
11502 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11503 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11504 const HChar* nm = isADD ? (isU ? "uaddw" : "saddw")
11505 : (isU ? "usubw" : "ssubw");
11506 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11507 nameQReg128(dd), arrWide,
11508 nameQReg128(nn), arrWide, nameQReg128(mm), arrNarrow);
11509 return True;
11512 if (opcode == BITS4(0,1,0,0) || opcode == BITS4(0,1,1,0)) {
11513 /* -------- 0,0100 ADDHN{2} -------- */
11514 /* -------- 1,0100 RADDHN{2} -------- */
11515 /* -------- 0,0110 SUBHN{2} -------- */
11516 /* -------- 1,0110 RSUBHN{2} -------- */
11517 /* Narrows, and size refers to the narrowed lanes. */
11518 if (size == X11) return False;
11519 vassert(size <= 2);
11520 const UInt shift[3] = { 8, 16, 32 };
11521 Bool isADD = opcode == BITS4(0,1,0,0);
11522 Bool isR = bitU == 1;
11523 /* Combined elements in wide lanes */
11524 IRTemp wide = newTempV128();
11525 IRExpr* wideE = binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
11526 getQReg128(nn), getQReg128(mm));
11527 if (isR) {
11528 wideE = binop(mkVecADD(size+1),
11529 wideE,
11530 mkexpr(math_VEC_DUP_IMM(size+1,
11531 1ULL << (shift[size]-1))));
11533 assign(wide, wideE);
11534 /* Top halves of elements, still in wide lanes */
11535 IRTemp shrd = newTempV128();
11536 assign(shrd, binop(mkVecSHRN(size+1), mkexpr(wide), mkU8(shift[size])));
11537 /* Elements now compacted into lower 64 bits */
11538 IRTemp new64 = newTempV128();
11539 assign(new64, binop(mkVecCATEVENLANES(size), mkexpr(shrd), mkexpr(shrd)));
11540 putLO64andZUorPutHI64(is2, dd, new64);
11541 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11542 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11543 const HChar* nm = isADD ? (isR ? "raddhn" : "addhn")
11544 : (isR ? "rsubhn" : "subhn");
11545 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11546 nameQReg128(dd), arrNarrow,
11547 nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
11548 return True;
11551 if (opcode == BITS4(0,1,0,1) || opcode == BITS4(0,1,1,1)) {
11552 /* -------- 0,0101 SABAL{2} -------- */
11553 /* -------- 1,0101 UABAL{2} -------- */
11554 /* -------- 0,0111 SABDL{2} -------- */
11555 /* -------- 1,0111 UABDL{2} -------- */
11556 /* Widens, and size refers to the narrow lanes. */
11557 if (size == X11) return False;
11558 vassert(size <= 2);
11559 Bool isU = bitU == 1;
11560 Bool isACC = opcode == BITS4(0,1,0,1);
11561 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
11562 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
11563 IRTemp abd = math_ABD(isU, size+1, mkexpr(argL), mkexpr(argR));
11564 IRTemp res = newTempV128();
11565 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(abd), getQReg128(dd))
11566 : mkexpr(abd));
11567 putQReg128(dd, mkexpr(res));
11568 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11569 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11570 const HChar* nm = isACC ? (isU ? "uabal" : "sabal")
11571 : (isU ? "uabdl" : "sabdl");
11572 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11573 nameQReg128(dd), arrWide,
11574 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11575 return True;
11578 if (opcode == BITS4(1,1,0,0)
11579 || opcode == BITS4(1,0,0,0) || opcode == BITS4(1,0,1,0)) {
11580 /* -------- 0,1100 SMULL{2} -------- */ // 0 (ks)
11581 /* -------- 1,1100 UMULL{2} -------- */ // 0
11582 /* -------- 0,1000 SMLAL{2} -------- */ // 1
11583 /* -------- 1,1000 UMLAL{2} -------- */ // 1
11584 /* -------- 0,1010 SMLSL{2} -------- */ // 2
11585 /* -------- 1,1010 UMLSL{2} -------- */ // 2
11586 /* Widens, and size refers to the narrow lanes. */
11587 UInt ks = 3;
11588 switch (opcode) {
11589 case BITS4(1,1,0,0): ks = 0; break;
11590 case BITS4(1,0,0,0): ks = 1; break;
11591 case BITS4(1,0,1,0): ks = 2; break;
11592 default: vassert(0);
11594 vassert(ks >= 0 && ks <= 2);
11595 if (size == X11) return False;
11596 vassert(size <= 2);
11597 Bool isU = bitU == 1;
11598 IRTemp vecN = newTempV128();
11599 IRTemp vecM = newTempV128();
11600 IRTemp vecD = newTempV128();
11601 assign(vecN, getQReg128(nn));
11602 assign(vecM, getQReg128(mm));
11603 assign(vecD, getQReg128(dd));
11604 IRTemp res = IRTemp_INVALID;
11605 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
11606 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
11607 putQReg128(dd, mkexpr(res));
11608 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11609 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11610 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
11611 DIP("%c%s%s %s.%s, %s.%s, %s.%s\n", isU ? 'u' : 's', nm, is2 ? "2" : "",
11612 nameQReg128(dd), arrWide,
11613 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11614 return True;
11617 if (bitU == 0
11618 && (opcode == BITS4(1,1,0,1)
11619 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
11620 /* -------- 0,1101 SQDMULL{2} -------- */ // 0 (ks)
11621 /* -------- 0,1001 SQDMLAL{2} -------- */ // 1
11622 /* -------- 0,1011 SQDMLSL{2} -------- */ // 2
11623 /* Widens, and size refers to the narrow lanes. */
11624 UInt ks = 3;
11625 switch (opcode) {
11626 case BITS4(1,1,0,1): ks = 0; break;
11627 case BITS4(1,0,0,1): ks = 1; break;
11628 case BITS4(1,0,1,1): ks = 2; break;
11629 default: vassert(0);
11631 vassert(ks >= 0 && ks <= 2);
11632 if (size == X00 || size == X11) return False;
11633 vassert(size <= 2);
11634 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
11635 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
11636 newTempsV128_3(&vecN, &vecM, &vecD);
11637 assign(vecN, getQReg128(nn));
11638 assign(vecM, getQReg128(mm));
11639 assign(vecD, getQReg128(dd));
11640 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
11641 is2, size, "mas"[ks],
11642 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
11643 putQReg128(dd, mkexpr(res));
11644 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
11645 updateQCFLAGwithDifference(sat1q, sat1n);
11646 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
11647 updateQCFLAGwithDifference(sat2q, sat2n);
11649 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11650 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11651 const HChar* nm = ks == 0 ? "sqdmull"
11652 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
11653 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11654 nameQReg128(dd), arrWide,
11655 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11656 return True;
11659 if (bitU == 0 && opcode == BITS4(1,1,1,0)) {
11660 /* -------- 0,1110 PMULL{2} -------- */
11661 /* Widens, and size refers to the narrow lanes. */
11662 if (size != X00 && size != X11) return False;
11663 IRTemp res = IRTemp_INVALID;
11664 IRExpr* srcN = getQReg128(nn);
11665 IRExpr* srcM = getQReg128(mm);
11666 const HChar* arrNarrow = NULL;
11667 const HChar* arrWide = NULL;
11668 if (size == X00) {
11669 res = math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8,
11670 srcN, srcM);
11671 arrNarrow = nameArr_Q_SZ(bitQ, size);
11672 arrWide = nameArr_Q_SZ(1, size+1);
11673 } else {
11674 /* The same thing as the X00 case, except we have to call
11675 a helper to do it. */
11676 vassert(size == X11);
11677 res = newTemp(Ity_V128);
11678 IROp slice
11679 = is2 ? Iop_V128HIto64 : Iop_V128to64;
11680 IRExpr** args
11681 = mkIRExprVec_3( IRExpr_VECRET(),
11682 unop(slice, srcN), unop(slice, srcM));
11683 IRDirty* di
11684 = unsafeIRDirty_1_N( res, 0/*regparms*/,
11685 "arm64g_dirtyhelper_PMULLQ",
11686 &arm64g_dirtyhelper_PMULLQ, args);
11687 stmt(IRStmt_Dirty(di));
11688 /* We can't use nameArr_Q_SZ for this because it can't deal with
11689 Q-sized (128 bit) results. Hence do it by hand. */
11690 arrNarrow = bitQ == 0 ? "1d" : "2d";
11691 arrWide = "1q";
11693 putQReg128(dd, mkexpr(res));
11694 DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2 ? "2" : "",
11695 nameQReg128(dd), arrWide,
11696 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11697 return True;
11700 return False;
11701 # undef INSN
11705 static
11706 Bool dis_AdvSIMD_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
11708 /* 31 30 29 28 23 21 20 15 10 9 4
11709 0 Q U 01110 size 1 m opcode 1 n d
11710 Decode fields: u,size,opcode
11712 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11713 if (INSN(31,31) != 0
11714 || INSN(28,24) != BITS5(0,1,1,1,0)
11715 || INSN(21,21) != 1
11716 || INSN(10,10) != 1) {
11717 return False;
11719 UInt bitQ = INSN(30,30);
11720 UInt bitU = INSN(29,29);
11721 UInt size = INSN(23,22);
11722 UInt mm = INSN(20,16);
11723 UInt opcode = INSN(15,11);
11724 UInt nn = INSN(9,5);
11725 UInt dd = INSN(4,0);
11726 vassert(size < 4);
11728 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,1,0,0)) {
11729 /* -------- 0,xx,00000 SHADD std6_std6_std6 -------- */
11730 /* -------- 1,xx,00000 UHADD std6_std6_std6 -------- */
11731 /* -------- 0,xx,00100 SHSUB std6_std6_std6 -------- */
11732 /* -------- 1,xx,00100 UHSUB std6_std6_std6 -------- */
11733 if (size == X11) return False;
11734 Bool isADD = opcode == BITS5(0,0,0,0,0);
11735 Bool isU = bitU == 1;
11736 /* Widen both args out, do the math, narrow to final result. */
11737 IRTemp argL = newTempV128();
11738 IRTemp argLhi = IRTemp_INVALID;
11739 IRTemp argLlo = IRTemp_INVALID;
11740 IRTemp argR = newTempV128();
11741 IRTemp argRhi = IRTemp_INVALID;
11742 IRTemp argRlo = IRTemp_INVALID;
11743 IRTemp resHi = newTempV128();
11744 IRTemp resLo = newTempV128();
11745 IRTemp res = IRTemp_INVALID;
11746 assign(argL, getQReg128(nn));
11747 argLlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argL));
11748 argLhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argL));
11749 assign(argR, getQReg128(mm));
11750 argRlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argR));
11751 argRhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argR));
11752 IROp opADDSUB = isADD ? mkVecADD(size+1) : mkVecSUB(size+1);
11753 IROp opSxR = isU ? mkVecSHRN(size+1) : mkVecSARN(size+1);
11754 assign(resHi, binop(opSxR,
11755 binop(opADDSUB, mkexpr(argLhi), mkexpr(argRhi)),
11756 mkU8(1)));
11757 assign(resLo, binop(opSxR,
11758 binop(opADDSUB, mkexpr(argLlo), mkexpr(argRlo)),
11759 mkU8(1)));
11760 res = math_NARROW_LANES ( resHi, resLo, size );
11761 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11762 const HChar* nm = isADD ? (isU ? "uhadd" : "shadd")
11763 : (isU ? "uhsub" : "shsub");
11764 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11765 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11766 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11767 return True;
11770 if (opcode == BITS5(0,0,0,1,0)) {
11771 /* -------- 0,xx,00010 SRHADD std7_std7_std7 -------- */
11772 /* -------- 1,xx,00010 URHADD std7_std7_std7 -------- */
11773 if (bitQ == 0 && size == X11) return False; // implied 1d case
11774 Bool isU = bitU == 1;
11775 IRTemp argL = newTempV128();
11776 IRTemp argR = newTempV128();
11777 assign(argL, getQReg128(nn));
11778 assign(argR, getQReg128(mm));
11779 IRTemp res = math_RHADD(size, isU, argL, argR);
11780 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11781 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11782 DIP("%s %s.%s, %s.%s, %s.%s\n", isU ? "urhadd" : "srhadd",
11783 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11784 return True;
11787 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
11788 /* -------- 0,xx,00001 SQADD std7_std7_std7 -------- */
11789 /* -------- 1,xx,00001 UQADD std7_std7_std7 -------- */
11790 /* -------- 0,xx,00101 SQSUB std7_std7_std7 -------- */
11791 /* -------- 1,xx,00101 UQSUB std7_std7_std7 -------- */
11792 if (bitQ == 0 && size == X11) return False; // implied 1d case
11793 Bool isADD = opcode == BITS5(0,0,0,0,1);
11794 Bool isU = bitU == 1;
11795 IROp qop = Iop_INVALID;
11796 IROp nop = Iop_INVALID;
11797 if (isADD) {
11798 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
11799 nop = mkVecADD(size);
11800 } else {
11801 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
11802 nop = mkVecSUB(size);
11804 IRTemp argL = newTempV128();
11805 IRTemp argR = newTempV128();
11806 IRTemp qres = newTempV128();
11807 IRTemp nres = newTempV128();
11808 assign(argL, getQReg128(nn));
11809 assign(argR, getQReg128(mm));
11810 assign(qres, math_MAYBE_ZERO_HI64_fromE(
11811 bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
11812 assign(nres, math_MAYBE_ZERO_HI64_fromE(
11813 bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
11814 putQReg128(dd, mkexpr(qres));
11815 updateQCFLAGwithDifference(qres, nres);
11816 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
11817 : (isU ? "uqsub" : "sqsub");
11818 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11819 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11820 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11821 return True;
11824 if (bitU == 0 && opcode == BITS5(0,0,0,1,1)) {
11825 /* -------- 0,00,00011 AND 16b_16b_16b, 8b_8b_8b -------- */
11826 /* -------- 0,01,00011 BIC 16b_16b_16b, 8b_8b_8b -------- */
11827 /* -------- 0,10,00011 ORR 16b_16b_16b, 8b_8b_8b -------- */
11828 /* -------- 0,10,00011 ORN 16b_16b_16b, 8b_8b_8b -------- */
11829 Bool isORx = (size & 2) == 2;
11830 Bool invert = (size & 1) == 1;
11831 IRTemp res = newTempV128();
11832 assign(res, binop(isORx ? Iop_OrV128 : Iop_AndV128,
11833 getQReg128(nn),
11834 invert ? unop(Iop_NotV128, getQReg128(mm))
11835 : getQReg128(mm)));
11836 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11837 const HChar* names[4] = { "and", "bic", "orr", "orn" };
11838 const HChar* ar = bitQ == 1 ? "16b" : "8b";
11839 DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)],
11840 nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar);
11841 return True;
11844 if (bitU == 1 && opcode == BITS5(0,0,0,1,1)) {
11845 /* -------- 1,00,00011 EOR 16b_16b_16b, 8b_8b_8b -------- */
11846 /* -------- 1,01,00011 BSL 16b_16b_16b, 8b_8b_8b -------- */
11847 /* -------- 1,10,00011 BIT 16b_16b_16b, 8b_8b_8b -------- */
11848 /* -------- 1,10,00011 BIF 16b_16b_16b, 8b_8b_8b -------- */
11849 IRTemp argD = newTempV128();
11850 IRTemp argN = newTempV128();
11851 IRTemp argM = newTempV128();
11852 assign(argD, getQReg128(dd));
11853 assign(argN, getQReg128(nn));
11854 assign(argM, getQReg128(mm));
11855 const IROp opXOR = Iop_XorV128;
11856 const IROp opAND = Iop_AndV128;
11857 const IROp opNOT = Iop_NotV128;
11858 IRTemp res = newTempV128();
11859 switch (size) {
11860 case BITS2(0,0): /* EOR */
11861 assign(res, binop(opXOR, mkexpr(argM), mkexpr(argN)));
11862 break;
11863 case BITS2(0,1): /* BSL */
11864 assign(res, binop(opXOR, mkexpr(argM),
11865 binop(opAND,
11866 binop(opXOR, mkexpr(argM), mkexpr(argN)),
11867 mkexpr(argD))));
11868 break;
11869 case BITS2(1,0): /* BIT */
11870 assign(res, binop(opXOR, mkexpr(argD),
11871 binop(opAND,
11872 binop(opXOR, mkexpr(argD), mkexpr(argN)),
11873 mkexpr(argM))));
11874 break;
11875 case BITS2(1,1): /* BIF */
11876 assign(res, binop(opXOR, mkexpr(argD),
11877 binop(opAND,
11878 binop(opXOR, mkexpr(argD), mkexpr(argN)),
11879 unop(opNOT, mkexpr(argM)))));
11880 break;
11881 default:
11882 vassert(0);
11884 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11885 const HChar* nms[4] = { "eor", "bsl", "bit", "bif" };
11886 const HChar* arr = bitQ == 1 ? "16b" : "8b";
11887 DIP("%s %s.%s, %s.%s, %s.%s\n", nms[size],
11888 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11889 return True;
11892 if (opcode == BITS5(0,0,1,1,0)) {
11893 /* -------- 0,xx,00110 CMGT std7_std7_std7 -------- */ // >s
11894 /* -------- 1,xx,00110 CMHI std7_std7_std7 -------- */ // >u
11895 if (bitQ == 0 && size == X11) return False; // implied 1d case
11896 Bool isGT = bitU == 0;
11897 IRExpr* argL = getQReg128(nn);
11898 IRExpr* argR = getQReg128(mm);
11899 IRTemp res = newTempV128();
11900 assign(res,
11901 isGT ? binop(mkVecCMPGTS(size), argL, argR)
11902 : binop(mkVecCMPGTU(size), argL, argR));
11903 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11904 const HChar* nm = isGT ? "cmgt" : "cmhi";
11905 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11906 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11907 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11908 return True;
11911 if (opcode == BITS5(0,0,1,1,1)) {
11912 /* -------- 0,xx,00111 CMGE std7_std7_std7 -------- */ // >=s
11913 /* -------- 1,xx,00111 CMHS std7_std7_std7 -------- */ // >=u
11914 if (bitQ == 0 && size == X11) return False; // implied 1d case
11915 Bool isGE = bitU == 0;
11916 IRExpr* argL = getQReg128(nn);
11917 IRExpr* argR = getQReg128(mm);
11918 IRTemp res = newTempV128();
11919 assign(res,
11920 isGE ? unop(Iop_NotV128, binop(mkVecCMPGTS(size), argR, argL))
11921 : unop(Iop_NotV128, binop(mkVecCMPGTU(size), argR, argL)));
11922 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11923 const HChar* nm = isGE ? "cmge" : "cmhs";
11924 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11925 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11926 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11927 return True;
11930 if (opcode == BITS5(0,1,0,0,0) || opcode == BITS5(0,1,0,1,0)) {
11931 /* -------- 0,xx,01000 SSHL std7_std7_std7 -------- */
11932 /* -------- 0,xx,01010 SRSHL std7_std7_std7 -------- */
11933 /* -------- 1,xx,01000 USHL std7_std7_std7 -------- */
11934 /* -------- 1,xx,01010 URSHL std7_std7_std7 -------- */
11935 if (bitQ == 0 && size == X11) return False; // implied 1d case
11936 Bool isU = bitU == 1;
11937 Bool isR = opcode == BITS5(0,1,0,1,0);
11938 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
11939 : (isU ? mkVecSHU(size) : mkVecSHS(size));
11940 IRTemp res = newTempV128();
11941 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
11942 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11943 const HChar* nm = isR ? (isU ? "urshl" : "srshl")
11944 : (isU ? "ushl" : "sshl");
11945 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11946 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11947 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11948 return True;
11951 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
11952 /* -------- 0,xx,01001 SQSHL std7_std7_std7 -------- */
11953 /* -------- 0,xx,01011 SQRSHL std7_std7_std7 -------- */
11954 /* -------- 1,xx,01001 UQSHL std7_std7_std7 -------- */
11955 /* -------- 1,xx,01011 UQRSHL std7_std7_std7 -------- */
11956 if (bitQ == 0 && size == X11) return False; // implied 1d case
11957 Bool isU = bitU == 1;
11958 Bool isR = opcode == BITS5(0,1,0,1,1);
11959 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
11960 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
11961 /* This is a bit tricky. If we're only interested in the lowest 64 bits
11962 of the result (viz, bitQ == 0), then we must adjust the operands to
11963 ensure that the upper part of the result, that we don't care about,
11964 doesn't pollute the returned Q value. To do this, zero out the upper
11965 operand halves beforehand. This works because it means, for the
11966 lanes we don't care about, we are shifting zero by zero, which can
11967 never saturate. */
11968 IRTemp res256 = newTemp(Ity_V256);
11969 IRTemp resSH = newTempV128();
11970 IRTemp resQ = newTempV128();
11971 IRTemp zero = newTempV128();
11972 assign(res256, binop(op,
11973 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(nn)),
11974 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(mm))));
11975 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
11976 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
11977 assign(zero, mkV128(0x0000));
11978 putQReg128(dd, mkexpr(resSH));
11979 updateQCFLAGwithDifference(resQ, zero);
11980 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
11981 : (isU ? "uqshl" : "sqshl");
11982 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11983 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11984 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11985 return True;
11988 if (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,0,1)) {
11989 /* -------- 0,xx,01100 SMAX std7_std7_std7 -------- */
11990 /* -------- 1,xx,01100 UMAX std7_std7_std7 -------- */
11991 /* -------- 0,xx,01101 SMIN std7_std7_std7 -------- */
11992 /* -------- 1,xx,01101 UMIN std7_std7_std7 -------- */
11993 if (bitQ == 0 && size == X11) return False; // implied 1d case
11994 Bool isU = bitU == 1;
11995 Bool isMAX = (opcode & 1) == 0;
11996 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
11997 : (isU ? mkVecMINU(size) : mkVecMINS(size));
11998 IRTemp t = newTempV128();
11999 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
12000 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
12001 const HChar* nm = isMAX ? (isU ? "umax" : "smax")
12002 : (isU ? "umin" : "smin");
12003 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12004 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12005 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12006 return True;
12009 if (opcode == BITS5(0,1,1,1,0) || opcode == BITS5(0,1,1,1,1)) {
12010 /* -------- 0,xx,01110 SABD std6_std6_std6 -------- */
12011 /* -------- 1,xx,01110 UABD std6_std6_std6 -------- */
12012 /* -------- 0,xx,01111 SABA std6_std6_std6 -------- */
12013 /* -------- 1,xx,01111 UABA std6_std6_std6 -------- */
12014 if (size == X11) return False; // 1d/2d cases not allowed
12015 Bool isU = bitU == 1;
12016 Bool isACC = opcode == BITS5(0,1,1,1,1);
12017 vassert(size <= 2);
12018 IRTemp t1 = math_ABD(isU, size, getQReg128(nn), getQReg128(mm));
12019 IRTemp t2 = newTempV128();
12020 assign(t2, isACC ? binop(mkVecADD(size), mkexpr(t1), getQReg128(dd))
12021 : mkexpr(t1));
12022 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
12023 const HChar* nm = isACC ? (isU ? "uaba" : "saba")
12024 : (isU ? "uabd" : "sabd");
12025 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12026 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12027 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12028 return True;
12031 if (opcode == BITS5(1,0,0,0,0)) {
12032 /* -------- 0,xx,10000 ADD std7_std7_std7 -------- */
12033 /* -------- 1,xx,10000 SUB std7_std7_std7 -------- */
12034 if (bitQ == 0 && size == X11) return False; // implied 1d case
12035 Bool isSUB = bitU == 1;
12036 IROp op = isSUB ? mkVecSUB(size) : mkVecADD(size);
12037 IRTemp t = newTempV128();
12038 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
12039 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
12040 const HChar* nm = isSUB ? "sub" : "add";
12041 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12042 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12043 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12044 return True;
12047 if (opcode == BITS5(1,0,0,0,1)) {
12048 /* -------- 0,xx,10001 CMTST std7_std7_std7 -------- */ // &, != 0
12049 /* -------- 1,xx,10001 CMEQ std7_std7_std7 -------- */ // ==
12050 if (bitQ == 0 && size == X11) return False; // implied 1d case
12051 Bool isEQ = bitU == 1;
12052 IRExpr* argL = getQReg128(nn);
12053 IRExpr* argR = getQReg128(mm);
12054 IRTemp res = newTempV128();
12055 assign(res,
12056 isEQ ? binop(mkVecCMPEQ(size), argL, argR)
12057 : unop(Iop_NotV128, binop(mkVecCMPEQ(size),
12058 binop(Iop_AndV128, argL, argR),
12059 mkV128(0x0000))));
12060 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12061 const HChar* nm = isEQ ? "cmeq" : "cmtst";
12062 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12063 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12064 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12065 return True;
12068 if (opcode == BITS5(1,0,0,1,0)) {
12069 /* -------- 0,xx,10010 MLA std7_std7_std7 -------- */
12070 /* -------- 1,xx,10010 MLS std7_std7_std7 -------- */
12071 if (bitQ == 0 && size == X11) return False; // implied 1d case
12072 Bool isMLS = bitU == 1;
12073 IROp opMUL = mkVecMUL(size);
12074 IROp opADDSUB = isMLS ? mkVecSUB(size) : mkVecADD(size);
12075 IRTemp res = newTempV128();
12076 if (opMUL != Iop_INVALID && opADDSUB != Iop_INVALID) {
12077 assign(res, binop(opADDSUB,
12078 getQReg128(dd),
12079 binop(opMUL, getQReg128(nn), getQReg128(mm))));
12080 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12081 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12082 DIP("%s %s.%s, %s.%s, %s.%s\n", isMLS ? "mls" : "mla",
12083 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12084 return True;
12086 return False;
12089 if (opcode == BITS5(1,0,0,1,1)) {
12090 /* -------- 0,xx,10011 MUL std7_std7_std7 -------- */
12091 /* -------- 1,xx,10011 PMUL 16b_16b_16b, 8b_8b_8b -------- */
12092 if (bitQ == 0 && size == X11) return False; // implied 1d case
12093 Bool isPMUL = bitU == 1;
12094 const IROp opsPMUL[4]
12095 = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
12096 IROp opMUL = isPMUL ? opsPMUL[size] : mkVecMUL(size);
12097 IRTemp res = newTempV128();
12098 if (opMUL != Iop_INVALID) {
12099 assign(res, binop(opMUL, getQReg128(nn), getQReg128(mm)));
12100 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12101 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12102 DIP("%s %s.%s, %s.%s, %s.%s\n", isPMUL ? "pmul" : "mul",
12103 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12104 return True;
12106 return False;
12109 if (opcode == BITS5(1,0,1,0,0) || opcode == BITS5(1,0,1,0,1)) {
12110 /* -------- 0,xx,10100 SMAXP std6_std6_std6 -------- */
12111 /* -------- 1,xx,10100 UMAXP std6_std6_std6 -------- */
12112 /* -------- 0,xx,10101 SMINP std6_std6_std6 -------- */
12113 /* -------- 1,xx,10101 UMINP std6_std6_std6 -------- */
12114 if (size == X11) return False;
12115 Bool isU = bitU == 1;
12116 Bool isMAX = opcode == BITS5(1,0,1,0,0);
12117 IRTemp vN = newTempV128();
12118 IRTemp vM = newTempV128();
12119 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
12120 : (isU ? mkVecMINU(size) : mkVecMINS(size));
12121 assign(vN, getQReg128(nn));
12122 assign(vM, getQReg128(mm));
12123 IRTemp res128 = newTempV128();
12124 assign(res128,
12125 binop(op,
12126 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
12127 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
12128 /* In the half-width case, use CatEL32x4 to extract the half-width
12129 result from the full-width result. */
12130 IRExpr* res
12131 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
12132 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
12133 mkexpr(res128)))
12134 : mkexpr(res128);
12135 putQReg128(dd, res);
12136 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12137 const HChar* nm = isMAX ? (isU ? "umaxp" : "smaxp")
12138 : (isU ? "uminp" : "sminp");
12139 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12140 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12141 return True;
12144 if (opcode == BITS5(1,0,1,1,0)) {
12145 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
12146 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
12147 if (size == X00 || size == X11) return False;
12148 Bool isR = bitU == 1;
12149 IRTemp res, sat1q, sat1n, vN, vM;
12150 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
12151 newTempsV128_2(&vN, &vM);
12152 assign(vN, getQReg128(nn));
12153 assign(vM, getQReg128(mm));
12154 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
12155 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12156 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
12157 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
12158 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12159 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
12160 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12161 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12162 return True;
12165 if (bitU == 0 && opcode == BITS5(1,0,1,1,1)) {
12166 /* -------- 0,xx,10111 ADDP std7_std7_std7 -------- */
12167 if (bitQ == 0 && size == X11) return False; // implied 1d case
12168 IRTemp vN = newTempV128();
12169 IRTemp vM = newTempV128();
12170 assign(vN, getQReg128(nn));
12171 assign(vM, getQReg128(mm));
12172 IRTemp res128 = newTempV128();
12173 assign(res128,
12174 binop(mkVecADD(size),
12175 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
12176 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
12177 /* In the half-width case, use CatEL32x4 to extract the half-width
12178 result from the full-width result. */
12179 IRExpr* res
12180 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
12181 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
12182 mkexpr(res128)))
12183 : mkexpr(res128);
12184 putQReg128(dd, res);
12185 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12186 DIP("addp %s.%s, %s.%s, %s.%s\n",
12187 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12188 return True;
12191 if (bitU == 0
12192 && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
12193 /* -------- 0,0x,11000 FMAXNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12194 /* -------- 0,1x,11000 FMINNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12195 /* -------- 0,0x,11110 FMAX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12196 /* -------- 0,1x,11110 FMIN 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12197 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
12198 Bool isD = (size & 1) == 1;
12199 if (bitQ == 0 && isD) return False; // implied 1d case
12200 Bool isMIN = (size & 2) == 2;
12201 Bool isNM = opcode == BITS5(1,1,0,0,0);
12202 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? X11 : X10);
12203 IRTemp res = newTempV128();
12204 assign(res, binop(opMXX, getQReg128(nn), getQReg128(mm)));
12205 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12206 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12207 DIP("%s%s %s.%s, %s.%s, %s.%s\n",
12208 isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
12209 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12210 return True;
12213 if (bitU == 0 && opcode == BITS5(1,1,0,0,1)) {
12214 /* -------- 0,0x,11001 FMLA 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12215 /* -------- 0,1x,11001 FMLS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12216 Bool isD = (size & 1) == 1;
12217 Bool isSUB = (size & 2) == 2;
12218 if (bitQ == 0 && isD) return False; // implied 1d case
12219 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
12220 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
12221 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
12222 IRTemp rm = mk_get_IR_rounding_mode();
12223 IRTemp t1 = newTempV128();
12224 IRTemp t2 = newTempV128();
12225 // FIXME: double rounding; use FMA primops instead
12226 assign(t1, triop(opMUL,
12227 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
12228 assign(t2, triop(isSUB ? opSUB : opADD,
12229 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
12230 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
12231 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12232 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fmls" : "fmla",
12233 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12234 return True;
12237 if (bitU == 0 && opcode == BITS5(1,1,0,1,0)) {
12238 /* -------- 0,0x,11010 FADD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12239 /* -------- 0,1x,11010 FSUB 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12240 Bool isD = (size & 1) == 1;
12241 Bool isSUB = (size & 2) == 2;
12242 if (bitQ == 0 && isD) return False; // implied 1d case
12243 const IROp ops[4]
12244 = { Iop_Add32Fx4, Iop_Add64Fx2, Iop_Sub32Fx4, Iop_Sub64Fx2 };
12245 IROp op = ops[size];
12246 IRTemp rm = mk_get_IR_rounding_mode();
12247 IRTemp t1 = newTempV128();
12248 IRTemp t2 = newTempV128();
12249 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
12250 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
12251 putQReg128(dd, mkexpr(t2));
12252 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12253 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fsub" : "fadd",
12254 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12255 return True;
12258 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
12259 /* -------- 1,1x,11010 FABD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12260 Bool isD = (size & 1) == 1;
12261 if (bitQ == 0 && isD) return False; // implied 1d case
12262 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
12263 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
12264 IRTemp rm = mk_get_IR_rounding_mode();
12265 IRTemp t1 = newTempV128();
12266 IRTemp t2 = newTempV128();
12267 // FIXME: use Abd primop instead?
12268 assign(t1, triop(opSUB, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
12269 assign(t2, unop(opABS, mkexpr(t1)));
12270 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
12271 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12272 DIP("fabd %s.%s, %s.%s, %s.%s\n",
12273 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12274 return True;
12277 if (size <= X01 && opcode == BITS5(1,1,0,1,1)) {
12278 /* -------- 0,0x,11011 FMULX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12279 /* -------- 1,0x,11011 FMUL 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12280 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
12281 Bool isD = (size & 1) == 1;
12282 Bool isMULX = bitU == 0;
12283 if (bitQ == 0 && isD) return False; // implied 1d case
12284 IRTemp rm = mk_get_IR_rounding_mode();
12285 IRTemp t1 = newTempV128();
12286 assign(t1, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
12287 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
12288 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
12289 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12290 DIP("%s %s.%s, %s.%s, %s.%s\n", isMULX ? "fmulx" : "fmul",
12291 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12292 return True;
12295 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
12296 /* -------- 0,0x,11100 FCMEQ 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12297 /* -------- 1,0x,11100 FCMGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12298 Bool isD = (size & 1) == 1;
12299 if (bitQ == 0 && isD) return False; // implied 1d case
12300 Bool isGE = bitU == 1;
12301 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
12302 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
12303 IRTemp t1 = newTempV128();
12304 assign(t1, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
12305 : binop(opCMP, getQReg128(nn), getQReg128(mm)));
12306 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
12307 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12308 DIP("%s %s.%s, %s.%s, %s.%s\n", isGE ? "fcmge" : "fcmeq",
12309 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12310 return True;
12313 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
12314 /* -------- 1,1x,11100 FCMGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12315 Bool isD = (size & 1) == 1;
12316 if (bitQ == 0 && isD) return False; // implied 1d case
12317 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
12318 IRTemp t1 = newTempV128();
12319 assign(t1, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
12320 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
12321 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12322 DIP("%s %s.%s, %s.%s, %s.%s\n", "fcmgt",
12323 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12324 return True;
12327 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
12328 /* -------- 1,0x,11101 FACGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12329 /* -------- 1,1x,11101 FACGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12330 Bool isD = (size & 1) == 1;
12331 Bool isGT = (size & 2) == 2;
12332 if (bitQ == 0 && isD) return False; // implied 1d case
12333 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
12334 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
12335 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
12336 IRTemp t1 = newTempV128();
12337 assign(t1, binop(opCMP, unop(opABS, getQReg128(mm)),
12338 unop(opABS, getQReg128(nn)))); // swapd
12339 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
12340 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12341 DIP("%s %s.%s, %s.%s, %s.%s\n", isGT ? "facgt" : "facge",
12342 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12343 return True;
12346 if (bitU == 1
12347 && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
12348 /* -------- 1,0x,11000 FMAXNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12349 /* -------- 1,1x,11000 FMINNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12350 /* -------- 1,0x,11110 FMAXP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12351 /* -------- 1,1x,11110 FMINP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12352 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
12353 Bool isD = (size & 1) == 1;
12354 if (bitQ == 0 && isD) return False; // implied 1d case
12355 Bool isMIN = (size & 2) == 2;
12356 Bool isNM = opcode == BITS5(1,1,0,0,0);
12357 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
12358 IRTemp srcN = newTempV128();
12359 IRTemp srcM = newTempV128();
12360 IRTemp preL = IRTemp_INVALID;
12361 IRTemp preR = IRTemp_INVALID;
12362 assign(srcN, getQReg128(nn));
12363 assign(srcM, getQReg128(mm));
12364 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
12365 srcM, srcN, isD, bitQ);
12366 putQReg128(
12367 dd, math_MAYBE_ZERO_HI64_fromE(
12368 bitQ,
12369 binop(opMXX, mkexpr(preL), mkexpr(preR))));
12370 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12371 DIP("%s%sp %s.%s, %s.%s, %s.%s\n",
12372 isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
12373 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12374 return True;
12377 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,0,1,0)) {
12378 /* -------- 1,0x,11010 FADDP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12379 Bool isD = size == X01;
12380 if (bitQ == 0 && isD) return False; // implied 1d case
12381 IRTemp srcN = newTempV128();
12382 IRTemp srcM = newTempV128();
12383 IRTemp preL = IRTemp_INVALID;
12384 IRTemp preR = IRTemp_INVALID;
12385 assign(srcN, getQReg128(nn));
12386 assign(srcM, getQReg128(mm));
12387 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
12388 srcM, srcN, isD, bitQ);
12389 putQReg128(
12390 dd, math_MAYBE_ZERO_HI64_fromE(
12391 bitQ,
12392 triop(mkVecADDF(isD ? 3 : 2),
12393 mkexpr(mk_get_IR_rounding_mode()),
12394 mkexpr(preL), mkexpr(preR))));
12395 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12396 DIP("%s %s.%s, %s.%s, %s.%s\n", "faddp",
12397 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12398 return True;
12401 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,1,1,1)) {
12402 /* -------- 1,0x,11111 FDIV 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12403 Bool isD = (size & 1) == 1;
12404 if (bitQ == 0 && isD) return False; // implied 1d case
12405 vassert(size <= 1);
12406 const IROp ops[2] = { Iop_Div32Fx4, Iop_Div64Fx2 };
12407 IROp op = ops[size];
12408 IRTemp rm = mk_get_IR_rounding_mode();
12409 IRTemp t1 = newTempV128();
12410 IRTemp t2 = newTempV128();
12411 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
12412 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
12413 putQReg128(dd, mkexpr(t2));
12414 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12415 DIP("%s %s.%s, %s.%s, %s.%s\n", "fdiv",
12416 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12417 return True;
12420 if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
12421 /* -------- 0,0x,11111: FRECPS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12422 /* -------- 0,1x,11111: FRSQRTS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12423 Bool isSQRT = (size & 2) == 2;
12424 Bool isD = (size & 1) == 1;
12425 if (bitQ == 0 && isD) return False; // implied 1d case
12426 IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
12427 : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
12428 IRTemp res = newTempV128();
12429 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
12430 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12431 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12432 DIP("%s %s.%s, %s.%s, %s.%s\n", isSQRT ? "frsqrts" : "frecps",
12433 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12434 return True;
12437 return False;
12438 # undef INSN
12442 static
12443 Bool dis_AdvSIMD_three_same_extra(/*MB_OUT*/DisResult* dres, UInt insn)
12445 /* 31 30 29 28 23 21 20 15 14 10 9 4
12446 0 Q U 01110 size 0 m 1 opcode 1 n d
12447 Decode fields: u,size,opcode
12449 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12450 if (INSN(31,31) != 0
12451 || INSN(28,24) != BITS5(0,1,1,1,0)
12452 || INSN(21,21) != 0
12453 || INSN(15,15) != 1
12454 || INSN(10,10) != 1) {
12455 return False;
12457 UInt bitQ = INSN(30,30);
12458 UInt bitU = INSN(29,29);
12459 UInt size = INSN(23,22);
12460 UInt mm = INSN(20,16);
12461 UInt opcode = INSN(14,11);
12462 UInt nn = INSN(9,5);
12463 UInt dd = INSN(4,0);
12464 vassert(size < 4);
12465 vassert(mm < 32 && nn < 32 && dd < 32);
12467 if (bitU == 1 && (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,0,0,1))) {
12468 /* -------- 0,xx,10110 SQRDMLAH s and h variants only -------- */
12469 /* -------- 1,xx,10110 SQRDMLSH s and h variants only -------- */
12470 if (size == X00 || size == X11) return False;
12471 Bool isAdd = opcode == BITS4(0,0,0,0);
12473 IRTemp res, res_nosat, vD, vN, vM;
12474 res = res_nosat = vD = vN = vM = IRTemp_INVALID;
12475 newTempsV128_3(&vD, &vN, &vM);
12476 assign(vD, getQReg128(dd));
12477 assign(vN, getQReg128(nn));
12478 assign(vM, getQReg128(mm));
12480 math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM);
12481 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
12482 updateQCFLAGwithDifferenceZHI(res, res_nosat, opZHI);
12483 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12485 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12486 const HChar* nm = isAdd ? "sqrdmlah" : "sqrdmlsh";
12487 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12488 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12489 return True;
12492 return False;
12493 # undef INSN
12497 static
12498 Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
12500 /* 31 30 29 28 23 21 16 11 9 4
12501 0 Q U 01110 size 10000 opcode 10 n d
12502 Decode fields: U,size,opcode
12504 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12505 if (INSN(31,31) != 0
12506 || INSN(28,24) != BITS5(0,1,1,1,0)
12507 || INSN(21,17) != BITS5(1,0,0,0,0)
12508 || INSN(11,10) != BITS2(1,0)) {
12509 return False;
12511 UInt bitQ = INSN(30,30);
12512 UInt bitU = INSN(29,29);
12513 UInt size = INSN(23,22);
12514 UInt opcode = INSN(16,12);
12515 UInt nn = INSN(9,5);
12516 UInt dd = INSN(4,0);
12517 vassert(size < 4);
12519 if (bitU == 0 && size <= X10 && opcode == BITS5(0,0,0,0,0)) {
12520 /* -------- 0,00,00000: REV64 16b_16b, 8b_8b -------- */
12521 /* -------- 0,01,00000: REV64 8h_8h, 4h_4h -------- */
12522 /* -------- 0,10,00000: REV64 4s_4s, 2s_2s -------- */
12523 const IROp iops[3] = { Iop_Reverse8sIn64_x2,
12524 Iop_Reverse16sIn64_x2, Iop_Reverse32sIn64_x2 };
12525 vassert(size <= 2);
12526 IRTemp res = newTempV128();
12527 assign(res, unop(iops[size], getQReg128(nn)));
12528 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12529 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12530 DIP("%s %s.%s, %s.%s\n", "rev64",
12531 nameQReg128(dd), arr, nameQReg128(nn), arr);
12532 return True;
12535 if (bitU == 1 && size <= X01 && opcode == BITS5(0,0,0,0,0)) {
12536 /* -------- 1,00,00000: REV32 16b_16b, 8b_8b -------- */
12537 /* -------- 1,01,00000: REV32 8h_8h, 4h_4h -------- */
12538 Bool isH = size == X01;
12539 IRTemp res = newTempV128();
12540 IROp iop = isH ? Iop_Reverse16sIn32_x4 : Iop_Reverse8sIn32_x4;
12541 assign(res, unop(iop, getQReg128(nn)));
12542 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12543 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12544 DIP("%s %s.%s, %s.%s\n", "rev32",
12545 nameQReg128(dd), arr, nameQReg128(nn), arr);
12546 return True;
12549 if (bitU == 0 && size == X00 && opcode == BITS5(0,0,0,0,1)) {
12550 /* -------- 0,00,00001: REV16 16b_16b, 8b_8b -------- */
12551 IRTemp res = newTempV128();
12552 assign(res, unop(Iop_Reverse8sIn16_x8, getQReg128(nn)));
12553 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12554 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12555 DIP("%s %s.%s, %s.%s\n", "rev16",
12556 nameQReg128(dd), arr, nameQReg128(nn), arr);
12557 return True;
12560 if (opcode == BITS5(0,0,0,1,0) || opcode == BITS5(0,0,1,1,0)) {
12561 /* -------- 0,xx,00010: SADDLP std6_std6 -------- */
12562 /* -------- 1,xx,00010: UADDLP std6_std6 -------- */
12563 /* -------- 0,xx,00110: SADALP std6_std6 -------- */
12564 /* -------- 1,xx,00110: UADALP std6_std6 -------- */
12565 /* Widens, and size refers to the narrow size. */
12566 if (size == X11) return False; // no 1d or 2d cases
12567 Bool isU = bitU == 1;
12568 Bool isACC = opcode == BITS5(0,0,1,1,0);
12569 IRTemp src = newTempV128();
12570 IRTemp sum = newTempV128();
12571 IRTemp res = newTempV128();
12572 assign(src, getQReg128(nn));
12573 assign(sum,
12574 binop(mkVecADD(size+1),
12575 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
12576 isU, True/*fromOdd*/, size, mkexpr(src))),
12577 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
12578 isU, False/*!fromOdd*/, size, mkexpr(src)))));
12579 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(sum), getQReg128(dd))
12580 : mkexpr(sum));
12581 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12582 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12583 const HChar* arrWide = nameArr_Q_SZ(bitQ, size+1);
12584 DIP("%s %s.%s, %s.%s\n", isACC ? (isU ? "uadalp" : "sadalp")
12585 : (isU ? "uaddlp" : "saddlp"),
12586 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
12587 return True;
12590 if (opcode == BITS5(0,0,0,1,1)) {
12591 /* -------- 0,xx,00011: SUQADD std7_std7 -------- */
12592 /* -------- 1,xx,00011: USQADD std7_std7 -------- */
12593 if (bitQ == 0 && size == X11) return False; // implied 1d case
12594 Bool isUSQADD = bitU == 1;
12595 /* This is switched (in the US vs SU sense) deliberately.
12596 SUQADD corresponds to the ExtUSsatSS variants and
12597 USQADD corresponds to the ExtSUsatUU variants.
12598 See libvex_ir for more details. */
12599 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
12600 : mkVecQADDEXTUSSATSS(size);
12601 IROp nop = mkVecADD(size);
12602 IRTemp argL = newTempV128();
12603 IRTemp argR = newTempV128();
12604 IRTemp qres = newTempV128();
12605 IRTemp nres = newTempV128();
12606 /* Because the two arguments to the addition are implicitly
12607 extended differently (one signedly, the other unsignedly) it is
12608 important to present them to the primop in the correct order. */
12609 assign(argL, getQReg128(nn));
12610 assign(argR, getQReg128(dd));
12611 assign(qres, math_MAYBE_ZERO_HI64_fromE(
12612 bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
12613 assign(nres, math_MAYBE_ZERO_HI64_fromE(
12614 bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
12615 putQReg128(dd, mkexpr(qres));
12616 updateQCFLAGwithDifference(qres, nres);
12617 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12618 DIP("%s %s.%s, %s.%s\n", isUSQADD ? "usqadd" : "suqadd",
12619 nameQReg128(dd), arr, nameQReg128(nn), arr);
12620 return True;
12623 if (opcode == BITS5(0,0,1,0,0)) {
12624 /* -------- 0,xx,00100: CLS std6_std6 -------- */
12625 /* -------- 1,xx,00100: CLZ std6_std6 -------- */
12626 if (size == X11) return False; // no 1d or 2d cases
12627 const IROp opsCLS[3] = { Iop_Cls8x16, Iop_Cls16x8, Iop_Cls32x4 };
12628 const IROp opsCLZ[3] = { Iop_Clz8x16, Iop_Clz16x8, Iop_Clz32x4 };
12629 Bool isCLZ = bitU == 1;
12630 IRTemp res = newTempV128();
12631 vassert(size <= 2);
12632 assign(res, unop(isCLZ ? opsCLZ[size] : opsCLS[size], getQReg128(nn)));
12633 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12634 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12635 DIP("%s %s.%s, %s.%s\n", isCLZ ? "clz" : "cls",
12636 nameQReg128(dd), arr, nameQReg128(nn), arr);
12637 return True;
12640 if (size == X00 && opcode == BITS5(0,0,1,0,1)) {
12641 /* -------- 0,00,00101: CNT 16b_16b, 8b_8b -------- */
12642 /* -------- 1,00,00101: NOT 16b_16b, 8b_8b -------- */
12643 IRTemp res = newTempV128();
12644 assign(res, unop(bitU == 0 ? Iop_Cnt8x16 : Iop_NotV128, getQReg128(nn)));
12645 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12646 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
12647 DIP("%s %s.%s, %s.%s\n", bitU == 0 ? "cnt" : "not",
12648 nameQReg128(dd), arr, nameQReg128(nn), arr);
12649 return True;
12652 if (bitU == 1 && size == X01 && opcode == BITS5(0,0,1,0,1)) {
12653 /* -------- 1,01,00101 RBIT 16b_16b, 8b_8b -------- */
12654 IRTemp res = newTempV128();
12655 assign(res, unop(Iop_Reverse1sIn8_x16, getQReg128(nn)));
12656 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12657 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
12658 DIP("%s %s.%s, %s.%s\n", "rbit",
12659 nameQReg128(dd), arr, nameQReg128(nn), arr);
12660 return True;
12663 if (opcode == BITS5(0,0,1,1,1)) {
12664 /* -------- 0,xx,00111 SQABS std7_std7 -------- */
12665 /* -------- 1,xx,00111 SQNEG std7_std7 -------- */
12666 if (bitQ == 0 && size == X11) return False; // implied 1d case
12667 Bool isNEG = bitU == 1;
12668 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
12669 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
12670 getQReg128(nn), size );
12671 IRTemp qres = newTempV128(), nres = newTempV128();
12672 assign(qres, math_MAYBE_ZERO_HI64(bitQ, qresFW));
12673 assign(nres, math_MAYBE_ZERO_HI64(bitQ, nresFW));
12674 putQReg128(dd, mkexpr(qres));
12675 updateQCFLAGwithDifference(qres, nres);
12676 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12677 DIP("%s %s.%s, %s.%s\n", isNEG ? "sqneg" : "sqabs",
12678 nameQReg128(dd), arr, nameQReg128(nn), arr);
12679 return True;
12682 if (opcode == BITS5(0,1,0,0,0)) {
12683 /* -------- 0,xx,01000: CMGT std7_std7_#0 -------- */ // >s 0
12684 /* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0
12685 if (bitQ == 0 && size == X11) return False; // implied 1d case
12686 Bool isGT = bitU == 0;
12687 IRExpr* argL = getQReg128(nn);
12688 IRExpr* argR = mkV128(0x0000);
12689 IRTemp res = newTempV128();
12690 IROp opGTS = mkVecCMPGTS(size);
12691 assign(res, isGT ? binop(opGTS, argL, argR)
12692 : unop(Iop_NotV128, binop(opGTS, argR, argL)));
12693 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12694 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12695 DIP("cm%s %s.%s, %s.%s, #0\n", isGT ? "gt" : "ge",
12696 nameQReg128(dd), arr, nameQReg128(nn), arr);
12697 return True;
12700 if (opcode == BITS5(0,1,0,0,1)) {
12701 /* -------- 0,xx,01001: CMEQ std7_std7_#0 -------- */ // == 0
12702 /* -------- 1,xx,01001: CMLE std7_std7_#0 -------- */ // <=s 0
12703 if (bitQ == 0 && size == X11) return False; // implied 1d case
12704 Bool isEQ = bitU == 0;
12705 IRExpr* argL = getQReg128(nn);
12706 IRExpr* argR = mkV128(0x0000);
12707 IRTemp res = newTempV128();
12708 assign(res, isEQ ? binop(mkVecCMPEQ(size), argL, argR)
12709 : unop(Iop_NotV128,
12710 binop(mkVecCMPGTS(size), argL, argR)));
12711 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12712 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12713 DIP("cm%s %s.%s, %s.%s, #0\n", isEQ ? "eq" : "le",
12714 nameQReg128(dd), arr, nameQReg128(nn), arr);
12715 return True;
12718 if (bitU == 0 && opcode == BITS5(0,1,0,1,0)) {
12719 /* -------- 0,xx,01010: CMLT std7_std7_#0 -------- */ // <s 0
12720 if (bitQ == 0 && size == X11) return False; // implied 1d case
12721 IRExpr* argL = getQReg128(nn);
12722 IRExpr* argR = mkV128(0x0000);
12723 IRTemp res = newTempV128();
12724 assign(res, binop(mkVecCMPGTS(size), argR, argL));
12725 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12726 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12727 DIP("cm%s %s.%s, %s.%s, #0\n", "lt",
12728 nameQReg128(dd), arr, nameQReg128(nn), arr);
12729 return True;
12732 if (bitU == 0 && opcode == BITS5(0,1,0,1,1)) {
12733 /* -------- 0,xx,01011: ABS std7_std7 -------- */
12734 if (bitQ == 0 && size == X11) return False; // implied 1d case
12735 IRTemp res = newTempV128();
12736 assign(res, unop(mkVecABS(size), getQReg128(nn)));
12737 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12738 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12739 DIP("abs %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
12740 return True;
12743 if (bitU == 1 && opcode == BITS5(0,1,0,1,1)) {
12744 /* -------- 1,xx,01011: NEG std7_std7 -------- */
12745 if (bitQ == 0 && size == X11) return False; // implied 1d case
12746 IRTemp res = newTempV128();
12747 assign(res, binop(mkVecSUB(size), mkV128(0x0000), getQReg128(nn)));
12748 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12749 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12750 DIP("neg %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
12751 return True;
12754 UInt ix = 0; /*INVALID*/
12755 if (size >= X10) {
12756 switch (opcode) {
12757 case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
12758 case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
12759 case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
12760 default: break;
12763 if (ix > 0) {
12764 /* -------- 0,1x,01100 FCMGT 2d_2d,4s_4s,2s_2s _#0.0 (ix 1) -------- */
12765 /* -------- 0,1x,01101 FCMEQ 2d_2d,4s_4s,2s_2s _#0.0 (ix 2) -------- */
12766 /* -------- 0,1x,01110 FCMLT 2d_2d,4s_4s,2s_2s _#0.0 (ix 3) -------- */
12767 /* -------- 1,1x,01100 FCMGE 2d_2d,4s_4s,2s_2s _#0.0 (ix 4) -------- */
12768 /* -------- 1,1x,01101 FCMLE 2d_2d,4s_4s,2s_2s _#0.0 (ix 5) -------- */
12769 if (bitQ == 0 && size == X11) return False; // implied 1d case
12770 Bool isD = size == X11;
12771 IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
12772 IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
12773 IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
12774 IROp opCmp = Iop_INVALID;
12775 Bool swap = False;
12776 const HChar* nm = "??";
12777 switch (ix) {
12778 case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
12779 case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
12780 case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
12781 case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
12782 case 5: nm = "fcmle"; opCmp = opCmpLE; break;
12783 default: vassert(0);
12785 IRExpr* zero = mkV128(0x0000);
12786 IRTemp res = newTempV128();
12787 assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
12788 : binop(opCmp, getQReg128(nn), zero));
12789 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12790 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12791 DIP("%s %s.%s, %s.%s, #0.0\n", nm,
12792 nameQReg128(dd), arr, nameQReg128(nn), arr);
12793 return True;
12796 if (size >= X10 && opcode == BITS5(0,1,1,1,1)) {
12797 /* -------- 0,1x,01111: FABS 2d_2d, 4s_4s, 2s_2s -------- */
12798 /* -------- 1,1x,01111: FNEG 2d_2d, 4s_4s, 2s_2s -------- */
12799 if (bitQ == 0 && size == X11) return False; // implied 1d case
12800 Bool isFNEG = bitU == 1;
12801 IROp op = isFNEG ? (size == X10 ? Iop_Neg32Fx4 : Iop_Neg64Fx2)
12802 : (size == X10 ? Iop_Abs32Fx4 : Iop_Abs64Fx2);
12803 IRTemp res = newTempV128();
12804 assign(res, unop(op, getQReg128(nn)));
12805 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12806 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12807 DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
12808 nameQReg128(dd), arr, nameQReg128(nn), arr);
12809 return True;
12812 if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
12813 /* -------- 0,xx,10010: XTN{,2} -------- */
12814 if (size == X11) return False;
12815 vassert(size < 3);
12816 Bool is2 = bitQ == 1;
12817 IROp opN = mkVecNARROWUN(size);
12818 IRTemp resN = newTempV128();
12819 assign(resN, unop(Iop_64UtoV128, unop(opN, getQReg128(nn))));
12820 putLO64andZUorPutHI64(is2, dd, resN);
12821 const HChar* nm = "xtn";
12822 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12823 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12824 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
12825 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12826 return True;
12829 if (opcode == BITS5(1,0,1,0,0)
12830 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
12831 /* -------- 0,xx,10100: SQXTN{,2} -------- */
12832 /* -------- 1,xx,10100: UQXTN{,2} -------- */
12833 /* -------- 1,xx,10010: SQXTUN{,2} -------- */
12834 if (size == X11) return False;
12835 vassert(size < 3);
12836 Bool is2 = bitQ == 1;
12837 IROp opN = Iop_INVALID;
12838 Bool zWiden = True;
12839 const HChar* nm = "??";
12840 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
12841 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
12843 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
12844 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
12846 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
12847 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
12849 else vassert(0);
12850 IRTemp src = newTempV128();
12851 assign(src, getQReg128(nn));
12852 IRTemp resN = newTempV128();
12853 assign(resN, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
12854 putLO64andZUorPutHI64(is2, dd, resN);
12855 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
12856 size, mkexpr(resN));
12857 updateQCFLAGwithDifference(src, resW);
12858 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12859 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12860 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
12861 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12862 return True;
12865 if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
12866 /* -------- 1,xx,10011 SHLL{2} #lane-width -------- */
12867 /* Widens, and size is the narrow size. */
12868 if (size == X11) return False;
12869 Bool is2 = bitQ == 1;
12870 IROp opINT = is2 ? mkVecINTERLEAVEHI(size) : mkVecINTERLEAVELO(size);
12871 IROp opSHL = mkVecSHLN(size+1);
12872 IRTemp src = newTempV128();
12873 IRTemp res = newTempV128();
12874 assign(src, getQReg128(nn));
12875 assign(res, binop(opSHL, binop(opINT, mkexpr(src), mkexpr(src)),
12876 mkU8(8 << size)));
12877 putQReg128(dd, mkexpr(res));
12878 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12879 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12880 DIP("shll%s %s.%s, %s.%s, #%d\n", is2 ? "2" : "",
12881 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow, 8 << size);
12882 return True;
12885 if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,0)) {
12886 /* -------- 0,0x,10110: FCVTN 4h/8h_4s, 2s/4s_2d -------- */
12887 UInt nLanes = size == X00 ? 4 : 2;
12888 IRType srcTy = size == X00 ? Ity_F32 : Ity_F64;
12889 IROp opCvt = size == X00 ? Iop_F32toF16 : Iop_F64toF32;
12890 IRTemp rm = mk_get_IR_rounding_mode();
12891 IRTemp src[nLanes];
12892 for (UInt i = 0; i < nLanes; i++) {
12893 src[i] = newTemp(srcTy);
12894 assign(src[i], getQRegLane(nn, i, srcTy));
12896 for (UInt i = 0; i < nLanes; i++) {
12897 putQRegLane(dd, nLanes * bitQ + i,
12898 binop(opCvt, mkexpr(rm), mkexpr(src[i])));
12900 if (bitQ == 0) {
12901 putQRegLane(dd, 1, mkU64(0));
12903 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12904 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
12905 DIP("fcvtn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12906 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12907 return True;
12910 if (bitU == 1 && size == X01 && opcode == BITS5(1,0,1,1,0)) {
12911 /* -------- 1,01,10110: FCVTXN 2s/4s_2d -------- */
12912 /* Using Irrm_NEAREST here isn't right. The docs say "round to
12913 odd" but I don't know what that really means. */
12914 IRType srcTy = Ity_F64;
12915 IROp opCvt = Iop_F64toF32;
12916 IRTemp src[2];
12917 for (UInt i = 0; i < 2; i++) {
12918 src[i] = newTemp(srcTy);
12919 assign(src[i], getQRegLane(nn, i, srcTy));
12921 for (UInt i = 0; i < 2; i++) {
12922 putQRegLane(dd, 2 * bitQ + i,
12923 binop(opCvt, mkU32(Irrm_NEAREST), mkexpr(src[i])));
12925 if (bitQ == 0) {
12926 putQRegLane(dd, 1, mkU64(0));
12928 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12929 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
12930 DIP("fcvtxn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12931 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12932 return True;
12935 if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,1)) {
12936 /* -------- 0,0x,10111: FCVTL 4s_4h/8h, 2d_2s/4s -------- */
12937 UInt nLanes = size == X00 ? 4 : 2;
12938 IRType srcTy = size == X00 ? Ity_F16 : Ity_F32;
12939 IROp opCvt = size == X00 ? Iop_F16toF32 : Iop_F32toF64;
12940 IRTemp src[nLanes];
12941 for (UInt i = 0; i < nLanes; i++) {
12942 src[i] = newTemp(srcTy);
12943 assign(src[i], getQRegLane(nn, nLanes * bitQ + i, srcTy));
12945 for (UInt i = 0; i < nLanes; i++) {
12946 putQRegLane(dd, i, unop(opCvt, mkexpr(src[i])));
12948 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12949 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
12950 DIP("fcvtl%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12951 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
12952 return True;
12955 ix = 0;
12956 if (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,0,0,1)) {
12957 ix = 1 + ((((bitU & 1) << 2) | ((size & 2) << 0)) | ((opcode & 1) << 0));
12958 // = 1 + bitU[0]:size[1]:opcode[0]
12959 vassert(ix >= 1 && ix <= 8);
12960 if (ix == 7) ix = 0;
12962 if (ix > 0) {
12963 /* -------- 0,0x,11000 FRINTN 2d_2d, 4s_4s, 2s_2s (1) -------- */
12964 /* -------- 0,0x,11001 FRINTM 2d_2d, 4s_4s, 2s_2s (2) -------- */
12965 /* -------- 0,1x,11000 FRINTP 2d_2d, 4s_4s, 2s_2s (3) -------- */
12966 /* -------- 0,1x,11001 FRINTZ 2d_2d, 4s_4s, 2s_2s (4) -------- */
12967 /* -------- 1,0x,11000 FRINTA 2d_2d, 4s_4s, 2s_2s (5) -------- */
12968 /* -------- 1,0x,11001 FRINTX 2d_2d, 4s_4s, 2s_2s (6) -------- */
12969 /* -------- 1,1x,11000 (apparently unassigned) (7) -------- */
12970 /* -------- 1,1x,11001 FRINTI 2d_2d, 4s_4s, 2s_2s (8) -------- */
12971 /* rm plan:
12972 FRINTN: tieeven -- !! FIXME KLUDGED !!
12973 FRINTM: -inf
12974 FRINTP: +inf
12975 FRINTZ: zero
12976 FRINTA: tieaway -- !! FIXME KLUDGED !!
12977 FRINTX: per FPCR + "exact = TRUE"
12978 FRINTI: per FPCR
12980 Bool isD = (size & 1) == 1;
12981 if (bitQ == 0 && isD) return False; // implied 1d case
12983 IRTemp irrmRM = mk_get_IR_rounding_mode();
12985 UChar ch = '?';
12986 IRTemp irrm = newTemp(Ity_I32);
12987 switch (ix) {
12988 case 1: ch = 'n'; assign(irrm, mkU32(Irrm_NEAREST)); break;
12989 case 2: ch = 'm'; assign(irrm, mkU32(Irrm_NegINF)); break;
12990 case 3: ch = 'p'; assign(irrm, mkU32(Irrm_PosINF)); break;
12991 case 4: ch = 'z'; assign(irrm, mkU32(Irrm_ZERO)); break;
12992 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
12993 case 5: ch = 'a'; assign(irrm, mkU32(Irrm_NEAREST)); break;
12994 // I am unsure about the following, due to the "integral exact"
12995 // description in the manual. What does it mean? (frintx, that is)
12996 case 6: ch = 'x'; assign(irrm, mkexpr(irrmRM)); break;
12997 case 8: ch = 'i'; assign(irrm, mkexpr(irrmRM)); break;
12998 default: vassert(0);
13001 IROp opRND = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt;
13002 if (isD) {
13003 for (UInt i = 0; i < 2; i++) {
13004 putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
13005 getQRegLane(nn, i, Ity_F64)));
13007 } else {
13008 UInt n = bitQ==1 ? 4 : 2;
13009 for (UInt i = 0; i < n; i++) {
13010 putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
13011 getQRegLane(nn, i, Ity_F32)));
13013 if (bitQ == 0)
13014 putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
13016 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13017 DIP("frint%c %s.%s, %s.%s\n", ch,
13018 nameQReg128(dd), arr, nameQReg128(nn), arr);
13019 return True;
13022 ix = 0; /*INVALID*/
13023 switch (opcode) {
13024 case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
13025 case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
13026 case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
13027 default: break;
13029 if (ix > 0) {
13030 /* -------- 0,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
13031 /* -------- 0,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
13032 /* -------- 0,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
13033 /* -------- 0,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
13034 /* -------- 0,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
13035 /* -------- 1,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
13036 /* -------- 1,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
13037 /* -------- 1,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
13038 /* -------- 1,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
13039 /* -------- 1,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
13040 Bool isD = (size & 1) == 1;
13041 if (bitQ == 0 && isD) return False; // implied 1d case
13043 IRRoundingMode irrm = 8; /*impossible*/
13044 HChar ch = '?';
13045 switch (ix) {
13046 case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
13047 case 2: ch = 'm'; irrm = Irrm_NegINF; break;
13048 case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
13049 case 4: ch = 'p'; irrm = Irrm_PosINF; break;
13050 case 5: ch = 'z'; irrm = Irrm_ZERO; break;
13051 default: vassert(0);
13053 IROp cvt = Iop_INVALID;
13054 if (bitU == 1) {
13055 cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
13056 } else {
13057 cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
13059 if (isD) {
13060 for (UInt i = 0; i < 2; i++) {
13061 putQRegLane(dd, i, binop(cvt, mkU32(irrm),
13062 getQRegLane(nn, i, Ity_F64)));
13064 } else {
13065 UInt n = bitQ==1 ? 4 : 2;
13066 for (UInt i = 0; i < n; i++) {
13067 putQRegLane(dd, i, binop(cvt, mkU32(irrm),
13068 getQRegLane(nn, i, Ity_F32)));
13070 if (bitQ == 0)
13071 putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
13073 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13074 DIP("fcvt%c%c %s.%s, %s.%s\n", ch, bitU == 1 ? 'u' : 's',
13075 nameQReg128(dd), arr, nameQReg128(nn), arr);
13076 return True;
13079 if (size == X10 && opcode == BITS5(1,1,1,0,0)) {
13080 /* -------- 0,10,11100: URECPE 4s_4s, 2s_2s -------- */
13081 /* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */
13082 Bool isREC = bitU == 0;
13083 IROp op = isREC ? Iop_RecipEst32Ux4 : Iop_RSqrtEst32Ux4;
13084 IRTemp res = newTempV128();
13085 assign(res, unop(op, getQReg128(nn)));
13086 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13087 const HChar* nm = isREC ? "urecpe" : "ursqrte";
13088 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13089 DIP("%s %s.%s, %s.%s\n", nm,
13090 nameQReg128(dd), arr, nameQReg128(nn), arr);
13091 return True;
13094 if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
13095 /* -------- 0,0x,11101: SCVTF -------- */
13096 /* -------- 1,0x,11101: UCVTF -------- */
13097 /* 31 28 22 21 15 9 4
13098 0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn
13099 0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn
13100 with laneage:
13101 case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
13103 Bool isQ = bitQ == 1;
13104 Bool isU = bitU == 1;
13105 Bool isF64 = (size & 1) == 1;
13106 if (isQ || !isF64) {
13107 IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
13108 UInt nLanes = 0;
13109 Bool zeroHI = False;
13110 const HChar* arrSpec = NULL;
13111 Bool ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
13112 isQ, isF64 );
13113 IROp iop = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
13114 : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
13115 IRTemp rm = mk_get_IR_rounding_mode();
13116 UInt i;
13117 vassert(ok); /* the 'if' above should ensure this */
13118 for (i = 0; i < nLanes; i++) {
13119 putQRegLane(dd, i,
13120 binop(iop, mkexpr(rm), getQRegLane(nn, i, tyI)));
13122 if (zeroHI) {
13123 putQRegLane(dd, 1, mkU64(0));
13125 DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
13126 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
13127 return True;
13129 /* else fall through */
13132 if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
13133 /* -------- 0,1x,11101: FRECPE 2d_2d, 4s_4s, 2s_2s -------- */
13134 /* -------- 1,1x,11101: FRSQRTE 2d_2d, 4s_4s, 2s_2s -------- */
13135 Bool isSQRT = bitU == 1;
13136 Bool isD = (size & 1) == 1;
13137 IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
13138 : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
13139 if (bitQ == 0 && isD) return False; // implied 1d case
13140 IRTemp resV = newTempV128();
13141 assign(resV, unop(op, getQReg128(nn)));
13142 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
13143 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
13144 DIP("%s %s.%s, %s.%s\n", isSQRT ? "frsqrte" : "frecpe",
13145 nameQReg128(dd), arr, nameQReg128(nn), arr);
13146 return True;
13149 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
13150 /* -------- 1,1x,11111: FSQRT 2d_2d, 4s_4s, 2s_2s -------- */
13151 Bool isD = (size & 1) == 1;
13152 IROp op = isD ? Iop_Sqrt64Fx2 : Iop_Sqrt32Fx4;
13153 if (bitQ == 0 && isD) return False; // implied 1d case
13154 IRTemp resV = newTempV128();
13155 assign(resV, binop(op, mkexpr(mk_get_IR_rounding_mode()),
13156 getQReg128(nn)));
13157 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
13158 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
13159 DIP("%s %s.%s, %s.%s\n", "fsqrt",
13160 nameQReg128(dd), arr, nameQReg128(nn), arr);
13161 return True;
13164 return False;
13165 # undef INSN
13169 static
13170 Bool dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult* dres, UInt insn)
13172 /* 31 28 23 21 20 19 15 11 9 4
13173 0 Q U 01111 size L M m opcode H 0 n d
13174 Decode fields are: u,size,opcode
13175 M is really part of the mm register number. Individual
13176 cases need to inspect L and H though.
13178 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13179 if (INSN(31,31) != 0
13180 || INSN(28,24) != BITS5(0,1,1,1,1) || INSN(10,10) !=0) {
13181 return False;
13183 UInt bitQ = INSN(30,30);
13184 UInt bitU = INSN(29,29);
13185 UInt size = INSN(23,22);
13186 UInt bitL = INSN(21,21);
13187 UInt bitM = INSN(20,20);
13188 UInt mmLO4 = INSN(19,16);
13189 UInt opcode = INSN(15,12);
13190 UInt bitH = INSN(11,11);
13191 UInt nn = INSN(9,5);
13192 UInt dd = INSN(4,0);
13193 vassert(size < 4);
13194 vassert(bitH < 2 && bitM < 2 && bitL < 2);
13196 if (bitU == 0 && size >= X10
13197 && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
13198 /* -------- 0,1x,0001 FMLA 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
13199 /* -------- 0,1x,0101 FMLS 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
13200 if (bitQ == 0 && size == X11) return False; // implied 1d case
13201 Bool isD = (size & 1) == 1;
13202 Bool isSUB = opcode == BITS4(0,1,0,1);
13203 UInt index;
13204 if (!isD) index = (bitH << 1) | bitL;
13205 else if (isD && bitL == 0) index = bitH;
13206 else return False; // sz:L == x11 => unallocated encoding
13207 vassert(index < (isD ? 2 : 4));
13208 IRType ity = isD ? Ity_F64 : Ity_F32;
13209 IRTemp elem = newTemp(ity);
13210 UInt mm = (bitM << 4) | mmLO4;
13211 assign(elem, getQRegLane(mm, index, ity));
13212 IRTemp dupd = math_DUP_TO_V128(elem, ity);
13213 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
13214 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
13215 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
13216 IRTemp rm = mk_get_IR_rounding_mode();
13217 IRTemp t1 = newTempV128();
13218 IRTemp t2 = newTempV128();
13219 // FIXME: double rounding; use FMA primops instead
13220 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
13221 assign(t2, triop(isSUB ? opSUB : opADD,
13222 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
13223 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
13224 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
13225 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
13226 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm),
13227 isD ? 'd' : 's', index);
13228 return True;
13231 if (size >= X10 && opcode == BITS4(1,0,0,1)) {
13232 /* -------- 0,1x,1001 FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
13233 /* -------- 1,1x,1001 FMULX 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
13234 if (bitQ == 0 && size == X11) return False; // implied 1d case
13235 Bool isD = (size & 1) == 1;
13236 Bool isMULX = bitU == 1;
13237 UInt index;
13238 if (!isD) index = (bitH << 1) | bitL;
13239 else if (isD && bitL == 0) index = bitH;
13240 else return False; // sz:L == x11 => unallocated encoding
13241 vassert(index < (isD ? 2 : 4));
13242 IRType ity = isD ? Ity_F64 : Ity_F32;
13243 IRTemp elem = newTemp(ity);
13244 UInt mm = (bitM << 4) | mmLO4;
13245 assign(elem, getQRegLane(mm, index, ity));
13246 IRTemp dupd = math_DUP_TO_V128(elem, ity);
13247 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
13248 IRTemp res = newTempV128();
13249 assign(res, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
13250 mkexpr(mk_get_IR_rounding_mode()),
13251 getQReg128(nn), mkexpr(dupd)));
13252 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13253 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
13254 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n",
13255 isMULX ? "fmulx" : "fmul", nameQReg128(dd), arr,
13256 nameQReg128(nn), arr, nameQReg128(mm), isD ? 'd' : 's', index);
13257 return True;
13260 if ((bitU == 1 && (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,1,0,0)))
13261 || (bitU == 0 && opcode == BITS4(1,0,0,0))) {
13262 /* -------- 1,xx,0000 MLA s/h variants only -------- */
13263 /* -------- 1,xx,0100 MLS s/h variants only -------- */
13264 /* -------- 0,xx,1000 MUL s/h variants only -------- */
13265 Bool isMLA = opcode == BITS4(0,0,0,0);
13266 Bool isMLS = opcode == BITS4(0,1,0,0);
13267 UInt mm = 32; // invalid
13268 UInt ix = 16; // invalid
13269 switch (size) {
13270 case X00:
13271 return False; // b case is not allowed
13272 case X01:
13273 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
13274 case X10:
13275 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
13276 case X11:
13277 return False; // d case is not allowed
13278 default:
13279 vassert(0);
13281 vassert(mm < 32 && ix < 16);
13282 IROp opMUL = mkVecMUL(size);
13283 IROp opADD = mkVecADD(size);
13284 IROp opSUB = mkVecSUB(size);
13285 HChar ch = size == X01 ? 'h' : 's';
13286 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
13287 IRTemp vecD = newTempV128();
13288 IRTemp vecN = newTempV128();
13289 IRTemp res = newTempV128();
13290 assign(vecD, getQReg128(dd));
13291 assign(vecN, getQReg128(nn));
13292 IRExpr* prod = binop(opMUL, mkexpr(vecN), mkexpr(vecM));
13293 if (isMLA || isMLS) {
13294 assign(res, binop(isMLA ? opADD : opSUB, mkexpr(vecD), prod));
13295 } else {
13296 assign(res, prod);
13298 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13299 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13300 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isMLA ? "mla"
13301 : (isMLS ? "mls" : "mul"),
13302 nameQReg128(dd), arr,
13303 nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
13304 return True;
13307 if (opcode == BITS4(1,0,1,0)
13308 || opcode == BITS4(0,0,1,0) || opcode == BITS4(0,1,1,0)) {
13309 /* -------- 0,xx,1010 SMULL s/h variants only -------- */ // 0 (ks)
13310 /* -------- 1,xx,1010 UMULL s/h variants only -------- */ // 0
13311 /* -------- 0,xx,0010 SMLAL s/h variants only -------- */ // 1
13312 /* -------- 1,xx,0010 UMLAL s/h variants only -------- */ // 1
13313 /* -------- 0,xx,0110 SMLSL s/h variants only -------- */ // 2
13314 /* -------- 1,xx,0110 SMLSL s/h variants only -------- */ // 2
13315 /* Widens, and size refers to the narrowed lanes. */
13316 UInt ks = 3;
13317 switch (opcode) {
13318 case BITS4(1,0,1,0): ks = 0; break;
13319 case BITS4(0,0,1,0): ks = 1; break;
13320 case BITS4(0,1,1,0): ks = 2; break;
13321 default: vassert(0);
13323 vassert(ks >= 0 && ks <= 2);
13324 Bool isU = bitU == 1;
13325 Bool is2 = bitQ == 1;
13326 UInt mm = 32; // invalid
13327 UInt ix = 16; // invalid
13328 switch (size) {
13329 case X00:
13330 return False; // h_b_b[] case is not allowed
13331 case X01:
13332 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
13333 case X10:
13334 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
13335 case X11:
13336 return False; // q_d_d[] case is not allowed
13337 default:
13338 vassert(0);
13340 vassert(mm < 32 && ix < 16);
13341 IRTemp vecN = newTempV128();
13342 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
13343 IRTemp vecD = newTempV128();
13344 assign(vecN, getQReg128(nn));
13345 assign(vecD, getQReg128(dd));
13346 IRTemp res = IRTemp_INVALID;
13347 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
13348 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
13349 putQReg128(dd, mkexpr(res));
13350 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
13351 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
13352 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
13353 HChar ch = size == X01 ? 'h' : 's';
13354 DIP("%c%s%s %s.%s, %s.%s, %s.%c[%u]\n",
13355 isU ? 'u' : 's', nm, is2 ? "2" : "",
13356 nameQReg128(dd), arrWide,
13357 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
13358 return True;
13361 if (bitU == 0
13362 && (opcode == BITS4(1,0,1,1)
13363 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
13364 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
13365 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
13366 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
13367 /* Widens, and size refers to the narrowed lanes. */
13368 UInt ks = 3;
13369 switch (opcode) {
13370 case BITS4(1,0,1,1): ks = 0; break;
13371 case BITS4(0,0,1,1): ks = 1; break;
13372 case BITS4(0,1,1,1): ks = 2; break;
13373 default: vassert(0);
13375 vassert(ks >= 0 && ks <= 2);
13376 Bool is2 = bitQ == 1;
13377 UInt mm = 32; // invalid
13378 UInt ix = 16; // invalid
13379 switch (size) {
13380 case X00:
13381 return False; // h_b_b[] case is not allowed
13382 case X01:
13383 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
13384 case X10:
13385 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
13386 case X11:
13387 return False; // q_d_d[] case is not allowed
13388 default:
13389 vassert(0);
13391 vassert(mm < 32 && ix < 16);
13392 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
13393 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
13394 newTempsV128_2(&vecN, &vecD);
13395 assign(vecN, getQReg128(nn));
13396 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
13397 assign(vecD, getQReg128(dd));
13398 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
13399 is2, size, "mas"[ks],
13400 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
13401 putQReg128(dd, mkexpr(res));
13402 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
13403 updateQCFLAGwithDifference(sat1q, sat1n);
13404 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
13405 updateQCFLAGwithDifference(sat2q, sat2n);
13407 const HChar* nm = ks == 0 ? "sqdmull"
13408 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
13409 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
13410 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
13411 HChar ch = size == X01 ? 'h' : 's';
13412 DIP("%s%s %s.%s, %s.%s, %s.%c[%u]\n",
13413 nm, is2 ? "2" : "",
13414 nameQReg128(dd), arrWide,
13415 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
13416 return True;
13419 if (bitU == 0 && (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1))) {
13420 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
13421 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
13422 UInt mm = 32; // invalid
13423 UInt ix = 16; // invalid
13424 switch (size) {
13425 case X00:
13426 return False; // b case is not allowed
13427 case X01:
13428 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
13429 case X10:
13430 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
13431 case X11:
13432 return False; // q case is not allowed
13433 default:
13434 vassert(0);
13436 vassert(mm < 32 && ix < 16);
13437 Bool isR = opcode == BITS4(1,1,0,1);
13438 IRTemp res, sat1q, sat1n, vN, vM;
13439 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
13440 vN = newTempV128();
13441 assign(vN, getQReg128(nn));
13442 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
13443 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
13444 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13445 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
13446 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
13447 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
13448 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13449 HChar ch = size == X01 ? 'h' : 's';
13450 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm,
13451 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
13452 return True;
13455 if (bitU == 1 && (opcode == BITS4(1,1,0,1) || opcode == BITS4(1,1,1,1))) {
13456 /* -------- 0,xx,1101 SQRDMLAH s and h variants only -------- */
13457 /* -------- 0,xx,1111 SQRDMLSH s and h variants only -------- */
13458 UInt mm = 32; // invalid
13459 UInt ix = 16; // invalid
13460 switch (size) {
13461 case X00:
13462 return False; // b case is not allowed
13463 case X01: // h
13464 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
13465 case X10: // s
13466 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
13467 case X11:
13468 return False; // d case is not allowed
13469 default:
13470 vassert(0);
13472 vassert(mm < 32 && ix < 16);
13474 IRTemp res, res_nosat, vD, vN, vM;
13475 res = res_nosat = vD = vN = vM = IRTemp_INVALID;
13476 newTempsV128_2(&vD, &vN);
13477 assign(vD, getQReg128(dd));
13478 assign(vN, getQReg128(nn));
13480 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
13481 Bool isAdd = opcode == BITS4(1,1,0,1);
13482 math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM);
13483 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
13484 updateQCFLAGwithDifferenceZHI(res, res_nosat, opZHI);
13485 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13487 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13488 const HChar* nm = isAdd ? "sqrdmlah" : "sqrdmlsh";
13489 HChar ch = size == X01 ? 'h' : 's';
13490 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm,
13491 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), ch, ix);
13492 return True;
13495 return False;
13496 # undef INSN
13500 static
13501 Bool dis_AdvSIMD_crypto_aes(/*MB_OUT*/DisResult* dres, UInt insn)
13503 /* 31 23 21 16 11 9 4
13504 0100 1110 size 10100 opcode 10 n d
13505 Decode fields are: size,opcode
13506 Size is always 00 in ARMv8, it appears.
13508 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13509 if (INSN(31,24) != BITS8(0,1,0,0,1,1,1,0)
13510 || INSN(21,17) != BITS5(1,0,1,0,0) || INSN(11,10) != BITS2(1,0)) {
13511 return False;
13513 UInt size = INSN(23,22);
13514 UInt opcode = INSN(16,12);
13515 UInt nn = INSN(9,5);
13516 UInt dd = INSN(4,0);
13518 if (size == BITS2(0,0)
13519 && (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,0,1))) {
13520 /* -------- 00,00100: AESE Vd.16b, Vn.16b -------- */
13521 /* -------- 00,00101: AESD Vd.16b, Vn.16b -------- */
13522 Bool isD = opcode == BITS5(0,0,1,0,1);
13523 IRTemp op1 = newTemp(Ity_V128);
13524 IRTemp op2 = newTemp(Ity_V128);
13525 IRTemp xord = newTemp(Ity_V128);
13526 IRTemp res = newTemp(Ity_V128);
13527 void* helper = isD ? &arm64g_dirtyhelper_AESD
13528 : &arm64g_dirtyhelper_AESE;
13529 const HChar* hname = isD ? "arm64g_dirtyhelper_AESD"
13530 : "arm64g_dirtyhelper_AESE";
13531 assign(op1, getQReg128(dd));
13532 assign(op2, getQReg128(nn));
13533 assign(xord, binop(Iop_XorV128, mkexpr(op1), mkexpr(op2)));
13534 IRDirty* di
13535 = unsafeIRDirty_1_N( res, 0/*regparms*/, hname, helper,
13536 mkIRExprVec_3(
13537 IRExpr_VECRET(),
13538 unop(Iop_V128HIto64, mkexpr(xord)),
13539 unop(Iop_V128to64, mkexpr(xord)) ) );
13540 stmt(IRStmt_Dirty(di));
13541 putQReg128(dd, mkexpr(res));
13542 DIP("aes%c %s.16b, %s.16b\n", isD ? 'd' : 'e',
13543 nameQReg128(dd), nameQReg128(nn));
13544 return True;
13547 if (size == BITS2(0,0)
13548 && (opcode == BITS5(0,0,1,1,0) || opcode == BITS5(0,0,1,1,1))) {
13549 /* -------- 00,00110: AESMC Vd.16b, Vn.16b -------- */
13550 /* -------- 00,00111: AESIMC Vd.16b, Vn.16b -------- */
13551 Bool isI = opcode == BITS5(0,0,1,1,1);
13552 IRTemp src = newTemp(Ity_V128);
13553 IRTemp res = newTemp(Ity_V128);
13554 void* helper = isI ? &arm64g_dirtyhelper_AESIMC
13555 : &arm64g_dirtyhelper_AESMC;
13556 const HChar* hname = isI ? "arm64g_dirtyhelper_AESIMC"
13557 : "arm64g_dirtyhelper_AESMC";
13558 assign(src, getQReg128(nn));
13559 IRDirty* di
13560 = unsafeIRDirty_1_N( res, 0/*regparms*/, hname, helper,
13561 mkIRExprVec_3(
13562 IRExpr_VECRET(),
13563 unop(Iop_V128HIto64, mkexpr(src)),
13564 unop(Iop_V128to64, mkexpr(src)) ) );
13565 stmt(IRStmt_Dirty(di));
13566 putQReg128(dd, mkexpr(res));
13567 DIP("aes%s %s.16b, %s.16b\n", isI ? "imc" : "mc",
13568 nameQReg128(dd), nameQReg128(nn));
13569 return True;
13572 return False;
13573 # undef INSN
13577 static
13578 Bool dis_AdvSIMD_crypto_three_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
13580 /* 31 28 23 21 20 15 14 11 9 4
13581 0101 1110 sz 0 m 0 opc 00 n d
13582 Decode fields are: sz,opc
13584 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13585 if (INSN(31,24) != BITS8(0,1,0,1,1,1,1,0) || INSN(21,21) != 0
13586 || INSN(15,15) != 0 || INSN(11,10) != BITS2(0,0)) {
13587 return False;
13589 UInt sz = INSN(23,22);
13590 UInt mm = INSN(20,16);
13591 UInt opc = INSN(14,12);
13592 UInt nn = INSN(9,5);
13593 UInt dd = INSN(4,0);
13594 if (sz == BITS2(0,0) && opc <= BITS3(1,1,0)) {
13595 /* -------- 00,000 SHA1C Qd, Sn, Vm.4S -------- */
13596 /* -------- 00,001 SHA1P Qd, Sn, Vm.4S -------- */
13597 /* -------- 00,010 SHA1M Qd, Sn, Vm.4S -------- */
13598 /* -------- 00,011 SHA1SU0 Vd.4S, Vn.4S, Vm.4S -------- */
13599 /* -------- 00,100 SHA256H Qd, Qn, Vm.4S -------- */
13600 /* -------- 00,101 SHA256H2 Qd, Qn, Vm.4S -------- */
13601 /* -------- 00,110 SHA256SU1 Vd.4S, Vn.4S, Vm.4S -------- */
13602 vassert(opc < 7);
13603 const HChar* inames[7]
13604 = { "sha1c", "sha1p", "sha1m", "sha1su0",
13605 "sha256h", "sha256h2", "sha256su1" };
13606 void(*helpers[7])(V128*,ULong,ULong,ULong,ULong,ULong,ULong)
13607 = { &arm64g_dirtyhelper_SHA1C, &arm64g_dirtyhelper_SHA1P,
13608 &arm64g_dirtyhelper_SHA1M, &arm64g_dirtyhelper_SHA1SU0,
13609 &arm64g_dirtyhelper_SHA256H, &arm64g_dirtyhelper_SHA256H2,
13610 &arm64g_dirtyhelper_SHA256SU1 };
13611 const HChar* hnames[7]
13612 = { "arm64g_dirtyhelper_SHA1C", "arm64g_dirtyhelper_SHA1P",
13613 "arm64g_dirtyhelper_SHA1M", "arm64g_dirtyhelper_SHA1SU0",
13614 "arm64g_dirtyhelper_SHA256H", "arm64g_dirtyhelper_SHA256H2",
13615 "arm64g_dirtyhelper_SHA256SU1" };
13616 IRTemp vD = newTemp(Ity_V128);
13617 IRTemp vN = newTemp(Ity_V128);
13618 IRTemp vM = newTemp(Ity_V128);
13619 IRTemp vDhi = newTemp(Ity_I64);
13620 IRTemp vDlo = newTemp(Ity_I64);
13621 IRTemp vNhiPre = newTemp(Ity_I64);
13622 IRTemp vNloPre = newTemp(Ity_I64);
13623 IRTemp vNhi = newTemp(Ity_I64);
13624 IRTemp vNlo = newTemp(Ity_I64);
13625 IRTemp vMhi = newTemp(Ity_I64);
13626 IRTemp vMlo = newTemp(Ity_I64);
13627 assign(vD, getQReg128(dd));
13628 assign(vN, getQReg128(nn));
13629 assign(vM, getQReg128(mm));
13630 assign(vDhi, unop(Iop_V128HIto64, mkexpr(vD)));
13631 assign(vDlo, unop(Iop_V128to64, mkexpr(vD)));
13632 assign(vNhiPre, unop(Iop_V128HIto64, mkexpr(vN)));
13633 assign(vNloPre, unop(Iop_V128to64, mkexpr(vN)));
13634 assign(vMhi, unop(Iop_V128HIto64, mkexpr(vM)));
13635 assign(vMlo, unop(Iop_V128to64, mkexpr(vM)));
13636 /* Mask off any bits of the N register operand that aren't actually
13637 needed, so that Memcheck doesn't complain unnecessarily. */
13638 switch (opc) {
13639 case BITS3(0,0,0): case BITS3(0,0,1): case BITS3(0,1,0):
13640 assign(vNhi, mkU64(0));
13641 assign(vNlo, unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(vNloPre))));
13642 break;
13643 case BITS3(0,1,1): case BITS3(1,0,0):
13644 case BITS3(1,0,1): case BITS3(1,1,0):
13645 assign(vNhi, mkexpr(vNhiPre));
13646 assign(vNlo, mkexpr(vNloPre));
13647 break;
13648 default:
13649 vassert(0);
13651 IRTemp res = newTemp(Ity_V128);
13652 IRDirty* di
13653 = unsafeIRDirty_1_N( res, 0/*regparms*/, hnames[opc], helpers[opc],
13654 mkIRExprVec_7(
13655 IRExpr_VECRET(),
13656 mkexpr(vDhi), mkexpr(vDlo), mkexpr(vNhi),
13657 mkexpr(vNlo), mkexpr(vMhi), mkexpr(vMlo)));
13658 stmt(IRStmt_Dirty(di));
13659 putQReg128(dd, mkexpr(res));
13660 switch (opc) {
13661 case BITS3(0,0,0): case BITS3(0,0,1): case BITS3(0,1,0):
13662 DIP("%s q%u, s%u, v%u.4s\n", inames[opc], dd, nn, mm);
13663 break;
13664 case BITS3(0,1,1): case BITS3(1,1,0):
13665 DIP("%s v%u.4s, v%u.4s, v%u.4s\n", inames[opc], dd, nn, mm);
13666 break;
13667 case BITS3(1,0,0): case BITS3(1,0,1):
13668 DIP("%s q%u, q%u, v%u.4s\n", inames[opc], dd, nn, mm);
13669 break;
13670 default:
13671 vassert(0);
13673 return True;
13676 return False;
13677 # undef INSN
13681 static
13682 Bool dis_AdvSIMD_crypto_two_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
13684 /* 31 28 23 21 16 11 9 4
13685 0101 1110 sz 10100 opc 10 n d
13686 Decode fields are: sz,opc
13688 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13689 if (INSN(31,24) != BITS8(0,1,0,1,1,1,1,0)
13690 || INSN(21,17) != BITS5(1,0,1,0,0) || INSN(11,10) != BITS2(1,0)) {
13691 return False;
13693 UInt sz = INSN(23,22);
13694 UInt opc = INSN(16,12);
13695 UInt nn = INSN(9,5);
13696 UInt dd = INSN(4,0);
13697 if (sz == BITS2(0,0) && opc <= BITS5(0,0,0,1,0)) {
13698 /* -------- 00,00000 SHA1H Sd, Sn -------- */
13699 /* -------- 00,00001 SHA1SU1 Vd.4S, Vn.4S -------- */
13700 /* -------- 00,00010 SHA256SU0 Vd.4S, Vn.4S -------- */
13701 vassert(opc < 3);
13702 const HChar* inames[3] = { "sha1h", "sha1su1", "sha256su0" };
13703 IRTemp vD = newTemp(Ity_V128);
13704 IRTemp vN = newTemp(Ity_V128);
13705 IRTemp vDhi = newTemp(Ity_I64);
13706 IRTemp vDlo = newTemp(Ity_I64);
13707 IRTemp vNhi = newTemp(Ity_I64);
13708 IRTemp vNlo = newTemp(Ity_I64);
13709 assign(vD, getQReg128(dd));
13710 assign(vN, getQReg128(nn));
13711 assign(vDhi, unop(Iop_V128HIto64, mkexpr(vD)));
13712 assign(vDlo, unop(Iop_V128to64, mkexpr(vD)));
13713 assign(vNhi, unop(Iop_V128HIto64, mkexpr(vN)));
13714 assign(vNlo, unop(Iop_V128to64, mkexpr(vN)));
13715 /* Mask off any bits of the N register operand that aren't actually
13716 needed, so that Memcheck doesn't complain unnecessarily. Also
13717 construct the calls, given that the helper functions don't take
13718 the same number of arguments. */
13719 IRDirty* di = NULL;
13720 IRTemp res = newTemp(Ity_V128);
13721 switch (opc) {
13722 case BITS5(0,0,0,0,0): {
13723 IRExpr* vNloMasked = unop(Iop_32Uto64,
13724 unop(Iop_64to32, mkexpr(vNlo)));
13725 di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13726 "arm64g_dirtyhelper_SHA1H",
13727 &arm64g_dirtyhelper_SHA1H,
13728 mkIRExprVec_3(
13729 IRExpr_VECRET(),
13730 mkU64(0), vNloMasked) );
13731 break;
13733 case BITS5(0,0,0,0,1):
13734 di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13735 "arm64g_dirtyhelper_SHA1SU1",
13736 &arm64g_dirtyhelper_SHA1SU1,
13737 mkIRExprVec_5(
13738 IRExpr_VECRET(),
13739 mkexpr(vDhi), mkexpr(vDlo),
13740 mkexpr(vNhi), mkexpr(vNlo)) );
13741 break;
13742 case BITS5(0,0,0,1,0):
13743 di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13744 "arm64g_dirtyhelper_SHA256SU0",
13745 &arm64g_dirtyhelper_SHA256SU0,
13746 mkIRExprVec_5(
13747 IRExpr_VECRET(),
13748 mkexpr(vDhi), mkexpr(vDlo),
13749 mkexpr(vNhi), mkexpr(vNlo)) );
13750 break;
13751 default:
13752 vassert(0);
13754 stmt(IRStmt_Dirty(di));
13755 putQReg128(dd, mkexpr(res));
13756 switch (opc) {
13757 case BITS5(0,0,0,0,0):
13758 DIP("%s s%u, s%u\n", inames[opc], dd, nn);
13759 break;
13760 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,0):
13761 DIP("%s v%u.4s, v%u.4s\n", inames[opc], dd, nn);
13762 break;
13763 default:
13764 vassert(0);
13766 return True;
13769 return False;
13770 # undef INSN
13774 static
13775 Bool dis_AdvSIMD_fp_compare(/*MB_OUT*/DisResult* dres, UInt insn)
13777 /* 31 28 23 21 20 15 13 9 4
13778 000 11110 ty 1 m op 1000 n opcode2
13779 The first 3 bits are really "M 0 S", but M and S are always zero.
13780 Decode fields are: ty,op,opcode2
13782 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13783 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13784 || INSN(21,21) != 1 || INSN(13,10) != BITS4(1,0,0,0)) {
13785 return False;
13787 UInt ty = INSN(23,22);
13788 UInt mm = INSN(20,16);
13789 UInt op = INSN(15,14);
13790 UInt nn = INSN(9,5);
13791 UInt opcode2 = INSN(4,0);
13792 vassert(ty < 4);
13794 if (ty <= X01 && op == X00
13795 && (opcode2 & BITS5(0,0,1,1,1)) == BITS5(0,0,0,0,0)) {
13796 /* -------- 0x,00,00000 FCMP d_d, s_s -------- */
13797 /* -------- 0x,00,01000 FCMP d_#0, s_#0 -------- */
13798 /* -------- 0x,00,10000 FCMPE d_d, s_s -------- */
13799 /* -------- 0x,00,11000 FCMPE d_#0, s_#0 -------- */
13800 /* 31 23 20 15 9 4
13801 000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm
13802 000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0
13803 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm
13804 000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0
13806 000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm
13807 000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0
13808 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm
13809 000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0
13811 FCMPE generates Invalid Operation exn if either arg is any kind
13812 of NaN. FCMP generates Invalid Operation exn if either arg is a
13813 signalling NaN. We ignore this detail here and produce the same
13814 IR for both.
13816 Bool isD = (ty & 1) == 1;
13817 Bool isCMPE = (opcode2 & 16) == 16;
13818 Bool cmpZero = (opcode2 & 8) == 8;
13819 IRType ity = isD ? Ity_F64 : Ity_F32;
13820 Bool valid = True;
13821 if (cmpZero && mm != 0) valid = False;
13822 if (valid) {
13823 IRTemp argL = newTemp(ity);
13824 IRTemp argR = newTemp(ity);
13825 IRTemp irRes = newTemp(Ity_I32);
13826 assign(argL, getQRegLO(nn, ity));
13827 assign(argR,
13828 cmpZero
13829 ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
13830 : getQRegLO(mm, ity));
13831 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
13832 mkexpr(argL), mkexpr(argR)));
13833 IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
13834 IRTemp nzcv_28x0 = newTemp(Ity_I64);
13835 assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
13836 setFlags_COPY(nzcv_28x0);
13837 DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ity),
13838 cmpZero ? "#0.0" : nameQRegLO(mm, ity));
13839 return True;
13841 return False;
13844 return False;
13845 # undef INSN
13849 static
13850 Bool dis_AdvSIMD_fp_conditional_compare(/*MB_OUT*/DisResult* dres, UInt insn)
13852 /* 31 28 23 21 20 15 11 9 4 3
13853 000 11110 ty 1 m cond 01 n op nzcv
13854 The first 3 bits are really "M 0 S", but M and S are always zero.
13855 Decode fields are: ty,op
13857 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13858 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13859 || INSN(21,21) != 1 || INSN(11,10) != BITS2(0,1)) {
13860 return False;
13862 UInt ty = INSN(23,22);
13863 UInt mm = INSN(20,16);
13864 UInt cond = INSN(15,12);
13865 UInt nn = INSN(9,5);
13866 UInt op = INSN(4,4);
13867 UInt nzcv = INSN(3,0);
13868 vassert(ty < 4 && op <= 1);
13870 if (ty <= BITS2(0,1)) {
13871 /* -------- 00,0 FCCMP s_s -------- */
13872 /* -------- 00,1 FCCMPE s_s -------- */
13873 /* -------- 01,0 FCCMP d_d -------- */
13874 /* -------- 01,1 FCCMPE d_d -------- */
13876 /* FCCMPE generates Invalid Operation exn if either arg is any kind
13877 of NaN. FCCMP generates Invalid Operation exn if either arg is a
13878 signalling NaN. We ignore this detail here and produce the same
13879 IR for both.
13881 Bool isD = (ty & 1) == 1;
13882 Bool isCMPE = op == 1;
13883 IRType ity = isD ? Ity_F64 : Ity_F32;
13884 IRTemp argL = newTemp(ity);
13885 IRTemp argR = newTemp(ity);
13886 IRTemp irRes = newTemp(Ity_I32);
13887 assign(argL, getQRegLO(nn, ity));
13888 assign(argR, getQRegLO(mm, ity));
13889 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
13890 mkexpr(argL), mkexpr(argR)));
13891 IRTemp condT = newTemp(Ity_I1);
13892 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
13893 IRTemp nzcvT = mk_convert_IRCmpF64Result_to_NZCV(irRes);
13895 IRTemp nzcvT_28x0 = newTemp(Ity_I64);
13896 assign(nzcvT_28x0, binop(Iop_Shl64, mkexpr(nzcvT), mkU8(28)));
13898 IRExpr* nzcvF_28x0 = mkU64(((ULong)nzcv) << 28);
13900 IRTemp nzcv_28x0 = newTemp(Ity_I64);
13901 assign(nzcv_28x0, IRExpr_ITE(mkexpr(condT),
13902 mkexpr(nzcvT_28x0), nzcvF_28x0));
13903 setFlags_COPY(nzcv_28x0);
13904 DIP("fccmp%s %s, %s, #%u, %s\n", isCMPE ? "e" : "",
13905 nameQRegLO(nn, ity), nameQRegLO(mm, ity), nzcv, nameCC(cond));
13906 return True;
13909 return False;
13910 # undef INSN
13914 static
13915 Bool dis_AdvSIMD_fp_conditional_select(/*MB_OUT*/DisResult* dres, UInt insn)
13917 /* 31 23 21 20 15 11 9 5
13918 000 11110 ty 1 m cond 11 n d
13919 The first 3 bits are really "M 0 S", but M and S are always zero.
13920 Decode fields: ty
13922 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13923 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) || INSN(21,21) != 1
13924 || INSN(11,10) != BITS2(1,1)) {
13925 return False;
13927 UInt ty = INSN(23,22);
13928 UInt mm = INSN(20,16);
13929 UInt cond = INSN(15,12);
13930 UInt nn = INSN(9,5);
13931 UInt dd = INSN(4,0);
13932 if (ty <= X01) {
13933 /* -------- 00: FCSEL s_s -------- */
13934 /* -------- 00: FCSEL d_d -------- */
13935 IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
13936 IRTemp srcT = newTemp(ity);
13937 IRTemp srcF = newTemp(ity);
13938 IRTemp res = newTemp(ity);
13939 assign(srcT, getQRegLO(nn, ity));
13940 assign(srcF, getQRegLO(mm, ity));
13941 assign(res, IRExpr_ITE(
13942 unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
13943 mkexpr(srcT), mkexpr(srcF)));
13944 putQReg128(dd, mkV128(0x0000));
13945 putQRegLO(dd, mkexpr(res));
13946 DIP("fcsel %s, %s, %s, %s\n",
13947 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity),
13948 nameCC(cond));
13949 return True;
13951 return False;
13952 # undef INSN
13956 static
13957 Bool dis_AdvSIMD_fp_data_proc_1_source(/*MB_OUT*/DisResult* dres, UInt insn)
13959 /* 31 28 23 21 20 14 9 4
13960 000 11110 ty 1 opcode 10000 n d
13961 The first 3 bits are really "M 0 S", but M and S are always zero.
13962 Decode fields: ty,opcode
13964 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13965 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13966 || INSN(21,21) != 1 || INSN(14,10) != BITS5(1,0,0,0,0)) {
13967 return False;
13969 UInt ty = INSN(23,22);
13970 UInt opcode = INSN(20,15);
13971 UInt nn = INSN(9,5);
13972 UInt dd = INSN(4,0);
13974 if (ty <= X01 && opcode <= BITS6(0,0,0,0,1,1)) {
13975 /* -------- 0x,000000: FMOV d_d, s_s -------- */
13976 /* -------- 0x,000001: FABS d_d, s_s -------- */
13977 /* -------- 0x,000010: FNEG d_d, s_s -------- */
13978 /* -------- 0x,000011: FSQRT d_d, s_s -------- */
13979 IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
13980 IRTemp src = newTemp(ity);
13981 IRTemp res = newTemp(ity);
13982 const HChar* nm = "??";
13983 assign(src, getQRegLO(nn, ity));
13984 switch (opcode) {
13985 case BITS6(0,0,0,0,0,0):
13986 nm = "fmov"; assign(res, mkexpr(src)); break;
13987 case BITS6(0,0,0,0,0,1):
13988 nm = "fabs"; assign(res, unop(mkABSF(ity), mkexpr(src))); break;
13989 case BITS6(0,0,0,0,1,0):
13990 nm = "fabs"; assign(res, unop(mkNEGF(ity), mkexpr(src))); break;
13991 case BITS6(0,0,0,0,1,1):
13992 nm = "fsqrt";
13993 assign(res, binop(mkSQRTF(ity),
13994 mkexpr(mk_get_IR_rounding_mode()),
13995 mkexpr(src))); break;
13996 default:
13997 vassert(0);
13999 putQReg128(dd, mkV128(0x0000));
14000 putQRegLO(dd, mkexpr(res));
14001 DIP("%s %s, %s\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
14002 return True;
14005 if ( (ty == X11 && (opcode == BITS6(0,0,0,1,0,0)
14006 || opcode == BITS6(0,0,0,1,0,1)))
14007 || (ty == X00 && (opcode == BITS6(0,0,0,1,1,1)
14008 || opcode == BITS6(0,0,0,1,0,1)))
14009 || (ty == X01 && (opcode == BITS6(0,0,0,1,1,1)
14010 || opcode == BITS6(0,0,0,1,0,0)))) {
14011 /* -------- 11,000100: FCVT s_h -------- */
14012 /* -------- 11,000101: FCVT d_h -------- */
14013 /* -------- 00,000111: FCVT h_s -------- */
14014 /* -------- 00,000101: FCVT d_s -------- */
14015 /* -------- 01,000111: FCVT h_d -------- */
14016 /* -------- 01,000100: FCVT s_d -------- */
14017 /* 31 23 21 16 14 9 4
14018 000 11110 11 10001 00 10000 n d FCVT Sd, Hn
14019 --------- 11 ----- 01 --------- FCVT Dd, Hn
14020 --------- 00 ----- 11 --------- FCVT Hd, Sn
14021 --------- 00 ----- 01 --------- FCVT Dd, Sn
14022 --------- 01 ----- 11 --------- FCVT Hd, Dn
14023 --------- 01 ----- 00 --------- FCVT Sd, Dn
14024 Rounding, when dst is smaller than src, is per the FPCR.
14026 UInt b2322 = ty;
14027 UInt b1615 = opcode & BITS2(1,1);
14028 switch ((b2322 << 2) | b1615) {
14029 case BITS4(0,0,0,1): // S -> D
14030 case BITS4(1,1,0,1): { // H -> D
14031 Bool srcIsH = b2322 == BITS2(1,1);
14032 IRType srcTy = srcIsH ? Ity_F16 : Ity_F32;
14033 IRTemp res = newTemp(Ity_F64);
14034 assign(res, unop(srcIsH ? Iop_F16toF64 : Iop_F32toF64,
14035 getQRegLO(nn, srcTy)));
14036 putQReg128(dd, mkV128(0x0000));
14037 putQRegLO(dd, mkexpr(res));
14038 DIP("fcvt %s, %s\n",
14039 nameQRegLO(dd, Ity_F64), nameQRegLO(nn, srcTy));
14040 return True;
14042 case BITS4(0,1,0,0): // D -> S
14043 case BITS4(0,1,1,1): { // D -> H
14044 Bool dstIsH = b1615 == BITS2(1,1);
14045 IRType dstTy = dstIsH ? Ity_F16 : Ity_F32;
14046 IRTemp res = newTemp(dstTy);
14047 assign(res, binop(dstIsH ? Iop_F64toF16 : Iop_F64toF32,
14048 mkexpr(mk_get_IR_rounding_mode()),
14049 getQRegLO(nn, Ity_F64)));
14050 putQReg128(dd, mkV128(0x0000));
14051 putQRegLO(dd, mkexpr(res));
14052 DIP("fcvt %s, %s\n",
14053 nameQRegLO(dd, dstTy), nameQRegLO(nn, Ity_F64));
14054 return True;
14056 case BITS4(0,0,1,1): // S -> H
14057 case BITS4(1,1,0,0): { // H -> S
14058 Bool toH = b1615 == BITS2(1,1);
14059 IRType srcTy = toH ? Ity_F32 : Ity_F16;
14060 IRType dstTy = toH ? Ity_F16 : Ity_F32;
14061 IRTemp res = newTemp(dstTy);
14062 if (toH) {
14063 assign(res, binop(Iop_F32toF16,
14064 mkexpr(mk_get_IR_rounding_mode()),
14065 getQRegLO(nn, srcTy)));
14067 } else {
14068 assign(res, unop(Iop_F16toF32,
14069 getQRegLO(nn, srcTy)));
14071 putQReg128(dd, mkV128(0x0000));
14072 putQRegLO(dd, mkexpr(res));
14073 DIP("fcvt %s, %s\n",
14074 nameQRegLO(dd, dstTy), nameQRegLO(nn, srcTy));
14075 return True;
14077 default:
14078 break;
14080 /* else unhandled */
14081 return False;
14084 if (ty <= X01
14085 && opcode >= BITS6(0,0,1,0,0,0) && opcode <= BITS6(0,0,1,1,1,1)
14086 && opcode != BITS6(0,0,1,1,0,1)) {
14087 /* -------- 0x,001000 FRINTN d_d, s_s -------- */
14088 /* -------- 0x,001001 FRINTP d_d, s_s -------- */
14089 /* -------- 0x,001010 FRINTM d_d, s_s -------- */
14090 /* -------- 0x,001011 FRINTZ d_d, s_s -------- */
14091 /* -------- 0x,001100 FRINTA d_d, s_s -------- */
14092 /* -------- 0x,001110 FRINTX d_d, s_s -------- */
14093 /* -------- 0x,001111 FRINTI d_d, s_s -------- */
14094 /* 31 23 21 17 14 9 4
14095 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR)
14097 x==0 => S-registers, x==1 => D-registers
14098 rm (17:15) encodings:
14099 111 per FPCR (FRINTI)
14100 001 +inf (FRINTP)
14101 010 -inf (FRINTM)
14102 011 zero (FRINTZ)
14103 000 tieeven (FRINTN) -- !! FIXME KLUDGED !!
14104 100 tieaway (FRINTA) -- !! FIXME KLUDGED !!
14105 110 per FPCR + "exact = TRUE" (FRINTX)
14106 101 unallocated
14108 Bool isD = (ty & 1) == 1;
14109 UInt rm = opcode & BITS6(0,0,0,1,1,1);
14110 IRType ity = isD ? Ity_F64 : Ity_F32;
14111 IRExpr* irrmE = NULL;
14112 UChar ch = '?';
14113 switch (rm) {
14114 case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
14115 case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
14116 case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
14117 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
14118 case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
14119 // I am unsure about the following, due to the "integral exact"
14120 // description in the manual. What does it mean? (frintx, that is)
14121 case BITS3(1,1,0):
14122 ch = 'x'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
14123 case BITS3(1,1,1):
14124 ch = 'i'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
14125 // The following is a kludge. There's no Irrm_ value to represent
14126 // this ("to nearest, with ties to even")
14127 case BITS3(0,0,0): ch = 'n'; irrmE = mkU32(Irrm_NEAREST); break;
14128 default: break;
14130 if (irrmE) {
14131 IRTemp src = newTemp(ity);
14132 IRTemp dst = newTemp(ity);
14133 assign(src, getQRegLO(nn, ity));
14134 assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
14135 irrmE, mkexpr(src)));
14136 putQReg128(dd, mkV128(0x0000));
14137 putQRegLO(dd, mkexpr(dst));
14138 DIP("frint%c %s, %s\n",
14139 ch, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
14140 return True;
14142 return False;
14145 return False;
14146 # undef INSN
14150 static
14151 Bool dis_AdvSIMD_fp_data_proc_2_source(/*MB_OUT*/DisResult* dres, UInt insn)
14153 /* 31 28 23 21 20 15 11 9 4
14154 000 11110 ty 1 m opcode 10 n d
14155 The first 3 bits are really "M 0 S", but M and S are always zero.
14156 Decode fields: ty, opcode
14158 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14159 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
14160 || INSN(21,21) != 1 || INSN(11,10) != BITS2(1,0)) {
14161 return False;
14163 UInt ty = INSN(23,22);
14164 UInt mm = INSN(20,16);
14165 UInt opcode = INSN(15,12);
14166 UInt nn = INSN(9,5);
14167 UInt dd = INSN(4,0);
14169 if (ty <= X01 && opcode <= BITS4(0,1,1,1)) {
14170 /* ------- 0x,0000: FMUL d_d, s_s ------- */
14171 /* ------- 0x,0001: FDIV d_d, s_s ------- */
14172 /* ------- 0x,0010: FADD d_d, s_s ------- */
14173 /* ------- 0x,0011: FSUB d_d, s_s ------- */
14174 /* ------- 0x,0100: FMAX d_d, s_s ------- */
14175 /* ------- 0x,0101: FMIN d_d, s_s ------- */
14176 /* ------- 0x,0110: FMAXNM d_d, s_s ------- (FIXME KLUDGED) */
14177 /* ------- 0x,0111: FMINNM d_d, s_s ------- (FIXME KLUDGED) */
14178 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
14179 IROp iop = Iop_INVALID;
14180 const HChar* nm = "???";
14181 switch (opcode) {
14182 case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ity); break;
14183 case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ity); break;
14184 case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ity); break;
14185 case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ity); break;
14186 case BITS4(0,1,0,0): nm = "fmax"; iop = mkVecMAXF(ty+2); break;
14187 case BITS4(0,1,0,1): nm = "fmin"; iop = mkVecMINF(ty+2); break;
14188 case BITS4(0,1,1,0): nm = "fmaxnm"; iop = mkVecMAXF(ty+2); break; //!!
14189 case BITS4(0,1,1,1): nm = "fminnm"; iop = mkVecMINF(ty+2); break; //!!
14190 default: vassert(0);
14192 if (opcode <= BITS4(0,0,1,1)) {
14193 // This is really not good code. TODO: avoid width-changing
14194 IRTemp res = newTemp(ity);
14195 assign(res, triop(iop, mkexpr(mk_get_IR_rounding_mode()),
14196 getQRegLO(nn, ity), getQRegLO(mm, ity)));
14197 putQReg128(dd, mkV128(0));
14198 putQRegLO(dd, mkexpr(res));
14199 } else {
14200 putQReg128(dd, unop(mkVecZEROHIxxOFV128(ty+2),
14201 binop(iop, getQReg128(nn), getQReg128(mm))));
14203 DIP("%s %s, %s, %s\n",
14204 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
14205 return True;
14208 if (ty <= X01 && opcode == BITS4(1,0,0,0)) {
14209 /* ------- 0x,1000: FNMUL d_d, s_s ------- */
14210 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
14211 IROp iop = mkMULF(ity);
14212 IROp iopn = mkNEGF(ity);
14213 const HChar* nm = "fnmul";
14214 IRExpr* resE = unop(iopn,
14215 triop(iop, mkexpr(mk_get_IR_rounding_mode()),
14216 getQRegLO(nn, ity), getQRegLO(mm, ity)));
14217 IRTemp res = newTemp(ity);
14218 assign(res, resE);
14219 putQReg128(dd, mkV128(0));
14220 putQRegLO(dd, mkexpr(res));
14221 DIP("%s %s, %s, %s\n",
14222 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
14223 return True;
14226 return False;
14227 # undef INSN
14231 static
14232 Bool dis_AdvSIMD_fp_data_proc_3_source(/*MB_OUT*/DisResult* dres, UInt insn)
14234 /* 31 28 23 21 20 15 14 9 4
14235 000 11111 ty o1 m o0 a n d
14236 The first 3 bits are really "M 0 S", but M and S are always zero.
14237 Decode fields: ty,o1,o0
14239 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14240 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,1)) {
14241 return False;
14243 UInt ty = INSN(23,22);
14244 UInt bitO1 = INSN(21,21);
14245 UInt mm = INSN(20,16);
14246 UInt bitO0 = INSN(15,15);
14247 UInt aa = INSN(14,10);
14248 UInt nn = INSN(9,5);
14249 UInt dd = INSN(4,0);
14250 vassert(ty < 4);
14252 if (ty <= X01) {
14253 /* -------- 0x,0,0 FMADD d_d_d_d, s_s_s_s -------- */
14254 /* -------- 0x,0,1 FMSUB d_d_d_d, s_s_s_s -------- */
14255 /* -------- 0x,1,0 FNMADD d_d_d_d, s_s_s_s -------- */
14256 /* -------- 0x,1,1 FNMSUB d_d_d_d, s_s_s_s -------- */
14257 /* -------------------- F{N}M{ADD,SUB} -------------------- */
14258 /* 31 22 20 15 14 9 4 ix
14259 000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa
14260 000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa
14261 000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa
14262 000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa
14263 where Fx=Dx when sz=1, Fx=Sx when sz=0
14265 -----SPEC------ ----IMPL----
14266 fmadd a + n * m a + n * m
14267 fmsub a + (-n) * m a - n * m
14268 fnmadd (-a) + (-n) * m -(a + n * m)
14269 fnmsub (-a) + n * m -(a - n * m)
14271 Bool isD = (ty & 1) == 1;
14272 UInt ix = (bitO1 << 1) | bitO0;
14273 IRType ity = isD ? Ity_F64 : Ity_F32;
14274 IROp opADD = mkADDF(ity);
14275 IROp opSUB = mkSUBF(ity);
14276 IROp opMUL = mkMULF(ity);
14277 IROp opNEG = mkNEGF(ity);
14278 IRTemp res = newTemp(ity);
14279 IRExpr* eA = getQRegLO(aa, ity);
14280 IRExpr* eN = getQRegLO(nn, ity);
14281 IRExpr* eM = getQRegLO(mm, ity);
14282 IRExpr* rm = mkexpr(mk_get_IR_rounding_mode());
14283 IRExpr* eNxM = triop(opMUL, rm, eN, eM);
14284 switch (ix) {
14285 case 0: assign(res, triop(opADD, rm, eA, eNxM)); break;
14286 case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break;
14287 case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
14288 case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
14289 default: vassert(0);
14291 putQReg128(dd, mkV128(0x0000));
14292 putQRegLO(dd, mkexpr(res));
14293 const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
14294 DIP("%s %s, %s, %s, %s\n",
14295 names[ix], nameQRegLO(dd, ity), nameQRegLO(nn, ity),
14296 nameQRegLO(mm, ity), nameQRegLO(aa, ity));
14297 return True;
14300 return False;
14301 # undef INSN
14305 static
14306 Bool dis_AdvSIMD_fp_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
14308 /* 31 28 23 21 20 12 9 4
14309 000 11110 ty 1 imm8 100 imm5 d
14310 The first 3 bits are really "M 0 S", but M and S are always zero.
14312 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14313 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
14314 || INSN(21,21) != 1 || INSN(12,10) != BITS3(1,0,0)) {
14315 return False;
14317 UInt ty = INSN(23,22);
14318 UInt imm8 = INSN(20,13);
14319 UInt imm5 = INSN(9,5);
14320 UInt dd = INSN(4,0);
14322 /* ------- 00,00000: FMOV s_imm ------- */
14323 /* ------- 01,00000: FMOV d_imm ------- */
14324 if (ty <= X01 && imm5 == BITS5(0,0,0,0,0)) {
14325 Bool isD = (ty & 1) == 1;
14326 ULong imm = VFPExpandImm(imm8, isD ? 64 : 32);
14327 if (!isD) {
14328 vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
14330 putQReg128(dd, mkV128(0));
14331 putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
14332 DIP("fmov %s, #0x%llx\n",
14333 nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
14334 return True;
14337 return False;
14338 # undef INSN
14342 static
14343 Bool dis_AdvSIMD_fp_to_from_fixedp_conv(/*MB_OUT*/DisResult* dres, UInt insn)
14345 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14346 /* 31 30 29 28 23 21 20 18 15 9 4
14347 sf 0 0 11110 type 0 rmode opcode scale n d
14348 The first 3 bits are really "sf 0 S", but S is always zero.
14349 Decode fields: sf,type,rmode,opcode
14351 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14352 if (INSN(30,29) != BITS2(0,0)
14353 || INSN(28,24) != BITS5(1,1,1,1,0)
14354 || INSN(21,21) != 0) {
14355 return False;
14357 UInt bitSF = INSN(31,31);
14358 UInt ty = INSN(23,22); // type
14359 UInt rm = INSN(20,19); // rmode
14360 UInt op = INSN(18,16); // opcode
14361 UInt sc = INSN(15,10); // scale
14362 UInt nn = INSN(9,5);
14363 UInt dd = INSN(4,0);
14365 if (ty <= X01 && rm == X11
14366 && (op == BITS3(0,0,0) || op == BITS3(0,0,1))) {
14367 /* -------- (ix) sf ty rm opc -------- */
14368 /* -------- 0 0 00 11 000: FCVTZS w_s_#fbits -------- */
14369 /* -------- 1 0 01 11 000: FCVTZS w_d_#fbits -------- */
14370 /* -------- 2 1 00 11 000: FCVTZS x_s_#fbits -------- */
14371 /* -------- 3 1 01 11 000: FCVTZS x_d_#fbits -------- */
14373 /* -------- 4 0 00 11 001: FCVTZU w_s_#fbits -------- */
14374 /* -------- 5 0 01 11 001: FCVTZU w_d_#fbits -------- */
14375 /* -------- 6 1 00 11 001: FCVTZU x_s_#fbits -------- */
14376 /* -------- 7 1 01 11 001: FCVTZU x_d_#fbits -------- */
14377 Bool isI64 = bitSF == 1;
14378 Bool isF64 = (ty & 1) == 1;
14379 Bool isU = (op & 1) == 1;
14380 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
14382 Int fbits = 64 - sc;
14383 vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
14385 Double scale = two_to_the_plus(fbits);
14386 IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
14387 : IRExpr_Const(IRConst_F32( (Float)scale ));
14388 IROp opMUL = isF64 ? Iop_MulF64 : Iop_MulF32;
14390 const IROp ops[8]
14391 = { Iop_F32toI32S, Iop_F64toI32S, Iop_F32toI64S, Iop_F64toI64S,
14392 Iop_F32toI32U, Iop_F64toI32U, Iop_F32toI64U, Iop_F64toI64U };
14393 IRTemp irrm = newTemp(Ity_I32);
14394 assign(irrm, mkU32(Irrm_ZERO));
14396 IRExpr* src = getQRegLO(nn, isF64 ? Ity_F64 : Ity_F32);
14397 IRExpr* res = binop(ops[ix], mkexpr(irrm),
14398 triop(opMUL, mkexpr(irrm), src, scaleE));
14399 putIRegOrZR(isI64, dd, res);
14401 DIP("fcvtz%c %s, %s, #%d\n",
14402 isU ? 'u' : 's', nameIRegOrZR(isI64, dd),
14403 nameQRegLO(nn, isF64 ? Ity_F64 : Ity_F32), fbits);
14404 return True;
14407 /* ------ sf,ty,rm,opc ------ */
14408 /* ------ x,0x,00,010 SCVTF s/d, w/x, #fbits ------ */
14409 /* ------ x,0x,00,011 UCVTF s/d, w/x, #fbits ------ */
14410 /* (ix) sf S 28 ty rm opc 15 9 4
14411 0 0 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Wn, #fbits
14412 1 0 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Wn, #fbits
14413 2 1 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Xn, #fbits
14414 3 1 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Xn, #fbits
14416 4 0 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Wn, #fbits
14417 5 0 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Wn, #fbits
14418 6 1 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Xn, #fbits
14419 7 1 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Xn, #fbits
14421 These are signed/unsigned conversion from integer registers to
14422 FP registers, all 4 32/64-bit combinations, rounded per FPCR,
14423 scaled per |scale|.
14425 if (ty <= X01 && rm == X00
14426 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))
14427 && (bitSF == 1 || ((sc >> 5) & 1) == 1)) {
14428 Bool isI64 = bitSF == 1;
14429 Bool isF64 = (ty & 1) == 1;
14430 Bool isU = (op & 1) == 1;
14431 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
14433 Int fbits = 64 - sc;
14434 vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
14436 Double scale = two_to_the_minus(fbits);
14437 IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
14438 : IRExpr_Const(IRConst_F32( (Float)scale ));
14439 IROp opMUL = isF64 ? Iop_MulF64 : Iop_MulF32;
14441 const IROp ops[8]
14442 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
14443 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
14444 IRExpr* src = getIRegOrZR(isI64, nn);
14445 IRExpr* res = (isF64 && !isI64)
14446 ? unop(ops[ix], src)
14447 : binop(ops[ix],
14448 mkexpr(mk_get_IR_rounding_mode()), src);
14449 putQReg128(dd, mkV128(0));
14450 putQRegLO(dd, triop(opMUL, mkU32(Irrm_NEAREST), res, scaleE));
14452 DIP("%ccvtf %s, %s, #%d\n",
14453 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
14454 nameIRegOrZR(isI64, nn), fbits);
14455 return True;
14458 return False;
14459 # undef INSN
14463 static
14464 Bool dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult* dres, UInt insn)
14466 /* 31 30 29 28 23 21 20 18 15 9 4
14467 sf 0 0 11110 type 1 rmode opcode 000000 n d
14468 The first 3 bits are really "sf 0 S", but S is always zero.
14469 Decode fields: sf,type,rmode,opcode
14471 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14472 if (INSN(30,29) != BITS2(0,0)
14473 || INSN(28,24) != BITS5(1,1,1,1,0)
14474 || INSN(21,21) != 1
14475 || INSN(15,10) != BITS6(0,0,0,0,0,0)) {
14476 return False;
14478 UInt bitSF = INSN(31,31);
14479 UInt ty = INSN(23,22); // type
14480 UInt rm = INSN(20,19); // rmode
14481 UInt op = INSN(18,16); // opcode
14482 UInt nn = INSN(9,5);
14483 UInt dd = INSN(4,0);
14485 // op = 000, 001
14486 /* -------- FCVT{N,P,M,Z,A}{S,U} (scalar, integer) -------- */
14487 /* 30 23 20 18 15 9 4
14488 sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to
14489 sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest)
14490 ---------------- 01 -------------- FCVTP-------- (round to +inf)
14491 ---------------- 10 -------------- FCVTM-------- (round to -inf)
14492 ---------------- 11 -------------- FCVTZ-------- (round to zero)
14493 ---------------- 00 100 ---------- FCVTAS------- (nearest, ties away)
14494 ---------------- 00 101 ---------- FCVTAU------- (nearest, ties away)
14496 Rd is Xd when sf==1, Wd when sf==0
14497 Fn is Dn when x==1, Sn when x==0
14498 20:19 carry the rounding mode, using the same encoding as FPCR
14500 if (ty <= X01
14501 && ( ((op == BITS3(0,0,0) || op == BITS3(0,0,1)) && True)
14502 || ((op == BITS3(1,0,0) || op == BITS3(1,0,1)) && rm == BITS2(0,0))
14505 Bool isI64 = bitSF == 1;
14506 Bool isF64 = (ty & 1) == 1;
14507 Bool isU = (op & 1) == 1;
14508 /* Decide on the IR rounding mode to use. */
14509 IRRoundingMode irrm = 8; /*impossible*/
14510 HChar ch = '?';
14511 if (op == BITS3(0,0,0) || op == BITS3(0,0,1)) {
14512 switch (rm) {
14513 case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
14514 case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
14515 case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
14516 case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
14517 default: vassert(0);
14519 } else {
14520 vassert(op == BITS3(1,0,0) || op == BITS3(1,0,1));
14521 switch (rm) {
14522 case BITS2(0,0): ch = 'a'; irrm = Irrm_NEAREST; break;
14523 default: vassert(0);
14526 vassert(irrm != 8);
14527 /* Decide on the conversion primop, based on the source size,
14528 dest size and signedness (8 possibilities). Case coding:
14529 F32 ->s I32 0
14530 F32 ->u I32 1
14531 F32 ->s I64 2
14532 F32 ->u I64 3
14533 F64 ->s I32 4
14534 F64 ->u I32 5
14535 F64 ->s I64 6
14536 F64 ->u I64 7
14538 UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0);
14539 vassert(ix < 8);
14540 const IROp iops[8]
14541 = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
14542 Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
14543 IROp iop = iops[ix];
14544 // A bit of ATCery: bounce all cases we haven't seen an example of.
14545 if (/* F32toI32S */
14546 (iop == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */
14547 || (iop == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
14548 || (iop == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
14549 || (iop == Iop_F32toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,S */
14550 /* F32toI32U */
14551 || (iop == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */
14552 || (iop == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
14553 || (iop == Iop_F32toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Sn */
14554 || (iop == Iop_F32toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,S */
14555 /* F32toI64S */
14556 || (iop == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */
14557 || (iop == Iop_F32toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Sn */
14558 || (iop == Iop_F32toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Sn */
14559 || (iop == Iop_F32toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,S */
14560 /* F32toI64U */
14561 || (iop == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */
14562 || (iop == Iop_F32toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Sn */
14563 || (iop == Iop_F32toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Sn */
14564 || (iop == Iop_F32toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,S */
14565 /* F64toI32S */
14566 || (iop == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */
14567 || (iop == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
14568 || (iop == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
14569 || (iop == Iop_F64toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,D */
14570 /* F64toI32U */
14571 || (iop == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */
14572 || (iop == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
14573 || (iop == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
14574 || (iop == Iop_F64toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,D */
14575 /* F64toI64S */
14576 || (iop == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */
14577 || (iop == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
14578 || (iop == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
14579 || (iop == Iop_F64toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,D */
14580 /* F64toI64U */
14581 || (iop == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */
14582 || (iop == Iop_F64toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Dn */
14583 || (iop == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
14584 || (iop == Iop_F64toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,D */
14586 /* validated */
14587 } else {
14588 return False;
14590 IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
14591 IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
14592 IRTemp src = newTemp(srcTy);
14593 IRTemp dst = newTemp(dstTy);
14594 assign(src, getQRegLO(nn, srcTy));
14595 assign(dst, binop(iop, mkU32(irrm), mkexpr(src)));
14596 putIRegOrZR(isI64, dd, mkexpr(dst));
14597 DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
14598 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
14599 return True;
14602 // op = 010, 011
14603 /* -------------- {S,U}CVTF (scalar, integer) -------------- */
14604 /* (ix) sf S 28 ty rm op 15 9 4
14605 0 0 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
14606 1 0 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
14607 2 1 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
14608 3 1 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
14610 4 0 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
14611 5 0 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
14612 6 1 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
14613 7 1 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
14615 These are signed/unsigned conversion from integer registers to
14616 FP registers, all 4 32/64-bit combinations, rounded per FPCR.
14618 if (ty <= X01 && rm == X00 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))) {
14619 Bool isI64 = bitSF == 1;
14620 Bool isF64 = (ty & 1) == 1;
14621 Bool isU = (op & 1) == 1;
14622 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
14623 const IROp ops[8]
14624 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
14625 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
14626 IRExpr* src = getIRegOrZR(isI64, nn);
14627 IRExpr* res = (isF64 && !isI64)
14628 ? unop(ops[ix], src)
14629 : binop(ops[ix],
14630 mkexpr(mk_get_IR_rounding_mode()), src);
14631 putQReg128(dd, mkV128(0));
14632 putQRegLO(dd, res);
14633 DIP("%ccvtf %s, %s\n",
14634 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
14635 nameIRegOrZR(isI64, nn));
14636 return True;
14639 // op = 110, 111
14640 /* -------- FMOV (general) -------- */
14641 /* case sf S ty rm op 15 9 4
14642 (1) 0 0 0 11110 00 1 00 111 000000 n d FMOV Sd, Wn
14643 (2) 1 0 0 11110 01 1 00 111 000000 n d FMOV Dd, Xn
14644 (3) 1 0 0 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn
14646 (4) 0 0 0 11110 00 1 00 110 000000 n d FMOV Wd, Sn
14647 (5) 1 0 0 11110 01 1 00 110 000000 n d FMOV Xd, Dn
14648 (6) 1 0 0 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1]
14650 if (1) {
14651 UInt ix = 0; // case
14652 if (bitSF == 0) {
14653 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
14654 ix = 1;
14655 else
14656 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
14657 ix = 4;
14658 } else {
14659 vassert(bitSF == 1);
14660 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
14661 ix = 2;
14662 else
14663 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
14664 ix = 5;
14665 else
14666 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
14667 ix = 3;
14668 else
14669 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
14670 ix = 6;
14672 if (ix > 0) {
14673 switch (ix) {
14674 case 1:
14675 putQReg128(dd, mkV128(0));
14676 putQRegLO(dd, getIReg32orZR(nn));
14677 DIP("fmov s%u, w%u\n", dd, nn);
14678 break;
14679 case 2:
14680 putQReg128(dd, mkV128(0));
14681 putQRegLO(dd, getIReg64orZR(nn));
14682 DIP("fmov d%u, x%u\n", dd, nn);
14683 break;
14684 case 3:
14685 putQRegHI64(dd, getIReg64orZR(nn));
14686 DIP("fmov v%u.d[1], x%u\n", dd, nn);
14687 break;
14688 case 4:
14689 putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
14690 DIP("fmov w%u, s%u\n", dd, nn);
14691 break;
14692 case 5:
14693 putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
14694 DIP("fmov x%u, d%u\n", dd, nn);
14695 break;
14696 case 6:
14697 putIReg64orZR(dd, getQRegHI64(nn));
14698 DIP("fmov x%u, v%u.d[1]\n", dd, nn);
14699 break;
14700 default:
14701 vassert(0);
14703 return True;
14705 /* undecodable; fall through */
14708 return False;
14709 # undef INSN
14713 static
14714 Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
14716 Bool ok;
14717 ok = dis_AdvSIMD_EXT(dres, insn);
14718 if (UNLIKELY(ok)) return True;
14719 ok = dis_AdvSIMD_TBL_TBX(dres, insn);
14720 if (UNLIKELY(ok)) return True;
14721 ok = dis_AdvSIMD_ZIP_UZP_TRN(dres, insn);
14722 if (UNLIKELY(ok)) return True;
14723 ok = dis_AdvSIMD_across_lanes(dres, insn);
14724 if (UNLIKELY(ok)) return True;
14725 ok = dis_AdvSIMD_copy(dres, insn);
14726 if (UNLIKELY(ok)) return True;
14727 ok = dis_AdvSIMD_modified_immediate(dres, insn);
14728 if (UNLIKELY(ok)) return True;
14729 ok = dis_AdvSIMD_scalar_copy(dres, insn);
14730 if (UNLIKELY(ok)) return True;
14731 ok = dis_AdvSIMD_scalar_pairwise(dres, insn);
14732 if (UNLIKELY(ok)) return True;
14733 ok = dis_AdvSIMD_scalar_shift_by_imm(dres, insn);
14734 if (UNLIKELY(ok)) return True;
14735 ok = dis_AdvSIMD_scalar_three_different(dres, insn);
14736 if (UNLIKELY(ok)) return True;
14737 ok = dis_AdvSIMD_scalar_three_same(dres, insn);
14738 if (UNLIKELY(ok)) return True;
14739 ok = dis_AdvSIMD_scalar_three_same_extra(dres, insn);
14740 if (UNLIKELY(ok)) return True;
14741 ok = dis_AdvSIMD_scalar_two_reg_misc(dres, insn);
14742 if (UNLIKELY(ok)) return True;
14743 ok = dis_AdvSIMD_scalar_x_indexed_element(dres, insn);
14744 if (UNLIKELY(ok)) return True;
14745 ok = dis_AdvSIMD_shift_by_immediate(dres, insn);
14746 if (UNLIKELY(ok)) return True;
14747 ok = dis_AdvSIMD_three_different(dres, insn);
14748 if (UNLIKELY(ok)) return True;
14749 ok = dis_AdvSIMD_three_same(dres, insn);
14750 if (UNLIKELY(ok)) return True;
14751 ok = dis_AdvSIMD_three_same_extra(dres, insn);
14752 if (UNLIKELY(ok)) return True;
14753 ok = dis_AdvSIMD_two_reg_misc(dres, insn);
14754 if (UNLIKELY(ok)) return True;
14755 ok = dis_AdvSIMD_vector_x_indexed_elem(dres, insn);
14756 if (UNLIKELY(ok)) return True;
14757 ok = dis_AdvSIMD_crypto_aes(dres, insn);
14758 if (UNLIKELY(ok)) return True;
14759 ok = dis_AdvSIMD_crypto_three_reg_sha(dres, insn);
14760 if (UNLIKELY(ok)) return True;
14761 ok = dis_AdvSIMD_crypto_two_reg_sha(dres, insn);
14762 if (UNLIKELY(ok)) return True;
14763 ok = dis_AdvSIMD_fp_compare(dres, insn);
14764 if (UNLIKELY(ok)) return True;
14765 ok = dis_AdvSIMD_fp_conditional_compare(dres, insn);
14766 if (UNLIKELY(ok)) return True;
14767 ok = dis_AdvSIMD_fp_conditional_select(dres, insn);
14768 if (UNLIKELY(ok)) return True;
14769 ok = dis_AdvSIMD_fp_data_proc_1_source(dres, insn);
14770 if (UNLIKELY(ok)) return True;
14771 ok = dis_AdvSIMD_fp_data_proc_2_source(dres, insn);
14772 if (UNLIKELY(ok)) return True;
14773 ok = dis_AdvSIMD_fp_data_proc_3_source(dres, insn);
14774 if (UNLIKELY(ok)) return True;
14775 ok = dis_AdvSIMD_fp_immediate(dres, insn);
14776 if (UNLIKELY(ok)) return True;
14777 ok = dis_AdvSIMD_fp_to_from_fixedp_conv(dres, insn);
14778 if (UNLIKELY(ok)) return True;
14779 ok = dis_AdvSIMD_fp_to_from_int_conv(dres, insn);
14780 if (UNLIKELY(ok)) return True;
14781 return False;
14785 /*------------------------------------------------------------*/
14786 /*--- Disassemble a single ARM64 instruction ---*/
14787 /*------------------------------------------------------------*/
14789 /* Disassemble a single ARM64 instruction into IR. The instruction
14790 has is located at |guest_instr| and has guest IP of
14791 |guest_PC_curr_instr|, which will have been set before the call
14792 here. Returns True iff the instruction was decoded, in which case
14793 *dres will be set accordingly, or False, in which case *dres should
14794 be ignored by the caller. */
14796 static
14797 Bool disInstr_ARM64_WRK (
14798 /*MB_OUT*/DisResult* dres,
14799 const UChar* guest_instr,
14800 const VexArchInfo* archinfo,
14801 const VexAbiInfo* abiinfo
14804 // A macro to fish bits out of 'insn'.
14805 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14807 //ZZ DisResult dres;
14808 //ZZ UInt insn;
14809 //ZZ //Bool allow_VFP = False;
14810 //ZZ //UInt hwcaps = archinfo->hwcaps;
14811 //ZZ IRTemp condT; /* :: Ity_I32 */
14812 //ZZ UInt summary;
14813 //ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text
14814 //ZZ
14815 //ZZ /* What insn variants are we supporting today? */
14816 //ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
14817 //ZZ // etc etc
14819 /* Set result defaults. */
14820 dres->whatNext = Dis_Continue;
14821 dres->len = 4;
14822 dres->jk_StopHere = Ijk_INVALID;
14823 dres->hint = Dis_HintNone;
14825 /* At least this is simple on ARM64: insns are all 4 bytes long, and
14826 4-aligned. So just fish the whole thing out of memory right now
14827 and have done. */
14828 UInt insn = getUIntLittleEndianly( guest_instr );
14830 if (0) vex_printf("insn: 0x%x\n", insn);
14832 DIP("\t(arm64) 0x%llx: ", (ULong)guest_PC_curr_instr);
14834 vassert(0 == (guest_PC_curr_instr & 3ULL));
14836 /* ----------------------------------------------------------- */
14838 /* Spot "Special" instructions (see comment at top of file). */
14840 const UChar* code = guest_instr;
14841 /* Spot the 16-byte preamble:
14842 93CC0D8C ror x12, x12, #3
14843 93CC358C ror x12, x12, #13
14844 93CCCD8C ror x12, x12, #51
14845 93CCF58C ror x12, x12, #61
14847 UInt word1 = 0x93CC0D8C;
14848 UInt word2 = 0x93CC358C;
14849 UInt word3 = 0x93CCCD8C;
14850 UInt word4 = 0x93CCF58C;
14851 if (getUIntLittleEndianly(code+ 0) == word1 &&
14852 getUIntLittleEndianly(code+ 4) == word2 &&
14853 getUIntLittleEndianly(code+ 8) == word3 &&
14854 getUIntLittleEndianly(code+12) == word4) {
14855 /* Got a "Special" instruction preamble. Which one is it? */
14856 if (getUIntLittleEndianly(code+16) == 0xAA0A014A
14857 /* orr x10,x10,x10 */) {
14858 /* X3 = client_request ( X4 ) */
14859 DIP("x3 = client_request ( x4 )\n");
14860 putPC(mkU64( guest_PC_curr_instr + 20 ));
14861 dres->jk_StopHere = Ijk_ClientReq;
14862 dres->whatNext = Dis_StopHere;
14863 return True;
14865 else
14866 if (getUIntLittleEndianly(code+16) == 0xAA0B016B
14867 /* orr x11,x11,x11 */) {
14868 /* X3 = guest_NRADDR */
14869 DIP("x3 = guest_NRADDR\n");
14870 dres->len = 20;
14871 putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
14872 return True;
14874 else
14875 if (getUIntLittleEndianly(code+16) == 0xAA0C018C
14876 /* orr x12,x12,x12 */) {
14877 /* branch-and-link-to-noredir X8 */
14878 DIP("branch-and-link-to-noredir x8\n");
14879 putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
14880 putPC(getIReg64orZR(8));
14881 dres->jk_StopHere = Ijk_NoRedir;
14882 dres->whatNext = Dis_StopHere;
14883 return True;
14885 else
14886 if (getUIntLittleEndianly(code+16) == 0xAA090129
14887 /* orr x9,x9,x9 */) {
14888 /* IR injection */
14889 DIP("IR injection\n");
14890 vex_inject_ir(irsb, Iend_LE);
14891 // Invalidate the current insn. The reason is that the IRop we're
14892 // injecting here can change. In which case the translation has to
14893 // be redone. For ease of handling, we simply invalidate all the
14894 // time.
14895 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr)));
14896 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(20)));
14897 putPC(mkU64( guest_PC_curr_instr + 20 ));
14898 dres->whatNext = Dis_StopHere;
14899 dres->jk_StopHere = Ijk_InvalICache;
14900 return True;
14902 /* We don't know what it is. */
14903 return False;
14904 /*NOTREACHED*/
14908 /* ----------------------------------------------------------- */
14910 /* Main ARM64 instruction decoder starts here. */
14912 Bool ok = False;
14914 /* insn[28:25] determines the top-level grouping, so let's start
14915 off with that.
14917 For all of these dis_ARM64_ functions, we pass *dres with the
14918 normal default results "insn OK, 4 bytes long, keep decoding" so
14919 they don't need to change it. However, decodes of control-flow
14920 insns may cause *dres to change.
14922 switch (INSN(28,25)) {
14923 case BITS4(1,0,0,0): case BITS4(1,0,0,1):
14924 // Data processing - immediate
14925 ok = dis_ARM64_data_processing_immediate(dres, insn);
14926 break;
14927 case BITS4(1,0,1,0): case BITS4(1,0,1,1):
14928 // Branch, exception generation and system instructions
14929 ok = dis_ARM64_branch_etc(dres, insn, archinfo, abiinfo);
14930 break;
14931 case BITS4(0,1,0,0): case BITS4(0,1,1,0):
14932 case BITS4(1,1,0,0): case BITS4(1,1,1,0):
14933 // Loads and stores
14934 ok = dis_ARM64_load_store(dres, insn, abiinfo);
14935 break;
14936 case BITS4(0,1,0,1): case BITS4(1,1,0,1):
14937 // Data processing - register
14938 ok = dis_ARM64_data_processing_register(dres, insn);
14939 break;
14940 case BITS4(0,1,1,1): case BITS4(1,1,1,1):
14941 // Data processing - SIMD and floating point
14942 ok = dis_ARM64_simd_and_fp(dres, insn);
14943 break;
14944 case BITS4(0,0,0,0): case BITS4(0,0,0,1):
14945 case BITS4(0,0,1,0): case BITS4(0,0,1,1):
14946 // UNALLOCATED
14947 break;
14948 default:
14949 vassert(0); /* Can't happen */
14952 /* If the next-level down decoders failed, make sure |dres| didn't
14953 get changed. */
14954 if (!ok) {
14955 vassert(dres->whatNext == Dis_Continue);
14956 vassert(dres->len == 4);
14957 vassert(dres->jk_StopHere == Ijk_INVALID);
14960 return ok;
14962 # undef INSN
14966 /*------------------------------------------------------------*/
14967 /*--- Top-level fn ---*/
14968 /*------------------------------------------------------------*/
14970 /* Disassemble a single instruction into IR. The instruction
14971 is located in host memory at &guest_code[delta]. */
14973 DisResult disInstr_ARM64 ( IRSB* irsb_IN,
14974 const UChar* guest_code_IN,
14975 Long delta_IN,
14976 Addr guest_IP,
14977 VexArch guest_arch,
14978 const VexArchInfo* archinfo,
14979 const VexAbiInfo* abiinfo,
14980 VexEndness host_endness_IN,
14981 Bool sigill_diag_IN )
14983 DisResult dres;
14984 vex_bzero(&dres, sizeof(dres));
14986 /* Set globals (see top of this file) */
14987 vassert(guest_arch == VexArchARM64);
14989 irsb = irsb_IN;
14990 host_endness = host_endness_IN;
14991 guest_PC_curr_instr = (Addr64)guest_IP;
14993 /* Sanity checks */
14994 /* (x::UInt - 2) <= 15 === x >= 2 && x <= 17 (I hope) */
14995 vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15);
14996 vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15);
14998 /* Try to decode */
14999 Bool ok = disInstr_ARM64_WRK( &dres,
15000 &guest_code_IN[delta_IN],
15001 archinfo, abiinfo );
15002 if (ok) {
15003 /* All decode successes end up here. */
15004 vassert(dres.len == 4 || dres.len == 20);
15005 switch (dres.whatNext) {
15006 case Dis_Continue:
15007 putPC( mkU64(dres.len + guest_PC_curr_instr) );
15008 break;
15009 case Dis_StopHere:
15010 break;
15011 default:
15012 vassert(0);
15014 DIP("\n");
15015 } else {
15016 /* All decode failures end up here. */
15017 if (sigill_diag_IN) {
15018 Int i, j;
15019 UChar buf[64];
15020 UInt insn
15021 = getUIntLittleEndianly( &guest_code_IN[delta_IN] );
15022 vex_bzero(buf, sizeof(buf));
15023 for (i = j = 0; i < 32; i++) {
15024 if (i > 0) {
15025 if ((i & 7) == 0) buf[j++] = ' ';
15026 else if ((i & 3) == 0) buf[j++] = '\'';
15028 buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
15030 vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
15031 vex_printf("disInstr(arm64): %s\n", buf);
15034 /* Tell the dispatcher that this insn cannot be decoded, and so
15035 has not been executed, and (is currently) the next to be
15036 executed. PC should be up-to-date since it is made so at the
15037 start of each insn, but nevertheless be paranoid and update
15038 it again right now. */
15039 putPC( mkU64(guest_PC_curr_instr) );
15040 dres.len = 0;
15041 dres.whatNext = Dis_StopHere;
15042 dres.jk_StopHere = Ijk_NoDecode;
15044 return dres;
15048 /*--------------------------------------------------------------------*/
15049 /*--- end guest_arm64_toIR.c ---*/
15050 /*--------------------------------------------------------------------*/