1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*--------------------------------------------------------------------*/
4 /*--- begin guest_arm64_toIR.c ---*/
5 /*--------------------------------------------------------------------*/
8 This file is part of Valgrind, a dynamic binary instrumentation
11 Copyright (C) 2013-2017 OpenWorks
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
27 The GNU General Public License is contained in the file COPYING.
30 /* KNOWN LIMITATIONS 2014-Nov-16
32 * Correctness: FMAXNM, FMINNM are implemented the same as FMAX/FMIN.
34 Also FP comparison "unordered" .. is implemented as normal FP
37 Both should be fixed. They behave incorrectly in the presence of
40 FMULX is treated the same as FMUL. That's also not correct.
42 * Floating multiply-add (etc) insns. Are split into a multiply and
43 an add, and so suffer double rounding and hence sometimes the
44 least significant mantissa bit is incorrect. Fix: use the IR
45 multiply-add IROps instead.
47 * FRINTA, FRINTN are kludged .. they just round to nearest. No special
48 handling for the "ties" case. FRINTX might be dubious too.
50 * Ditto FCVTXN. No idea what "round to odd" means. This implementation
51 just rounds to nearest.
54 /* "Special" instructions.
56 This instruction decoder can decode four special instructions
57 which mean nothing natively (are no-ops as far as regs/mem are
58 concerned) but have meaning for supporting Valgrind. A special
59 instruction is flagged by a 16-byte preamble:
61 93CC0D8C 93CC358C 93CCCD8C 93CCF58C
62 (ror x12, x12, #3; ror x12, x12, #13
63 ror x12, x12, #51; ror x12, x12, #61)
65 Following that, one of the following 3 are allowed
66 (standard interpretation in parentheses):
68 AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 )
69 AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR
70 AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8
71 AA090129 (orr x9,x9,x9) IR injection
73 Any other bytes following the 16-byte preamble are illegal and
74 constitute a failure in instruction decoding. This all assumes
75 that the preamble will never occur except in specific code
76 fragments designed for Valgrind to catch.
79 /* Translates ARM64 code to IR. */
81 #include "libvex_basictypes.h"
82 #include "libvex_ir.h"
84 #include "libvex_guest_arm64.h"
86 #include "main_util.h"
87 #include "main_globals.h"
88 #include "guest_generic_bb_to_IR.h"
89 #include "guest_arm64_defs.h"
92 /*------------------------------------------------------------*/
94 /*------------------------------------------------------------*/
96 /* These are set at the start of the translation of a instruction, so
97 that we don't have to pass them around endlessly. CONST means does
98 not change during translation of the instruction.
101 /* CONST: what is the host's endianness? We need to know this in
102 order to do sub-register accesses to the SIMD/FP registers
104 static VexEndness host_endness
;
106 /* CONST: The guest address for the instruction currently being
108 static Addr64 guest_PC_curr_instr
;
110 /* MOD: The IRSB* into which we're generating code. */
114 /*------------------------------------------------------------*/
115 /*--- Debugging output ---*/
116 /*------------------------------------------------------------*/
118 #define DIP(format, args...) \
119 if (vex_traceflags & VEX_TRACE_FE) \
120 vex_printf(format, ## args)
122 #define DIS(buf, format, args...) \
123 if (vex_traceflags & VEX_TRACE_FE) \
124 vex_sprintf(buf, format, ## args)
127 /*------------------------------------------------------------*/
128 /*--- Helper bits and pieces for deconstructing the ---*/
129 /*--- arm insn stream. ---*/
130 /*------------------------------------------------------------*/
132 /* Do a little-endian load of a 32-bit word, regardless of the
133 endianness of the underlying host. */
134 static inline UInt
getUIntLittleEndianly ( const UChar
* p
)
144 /* Sign extend a N-bit value up to 64 bits, by copying
145 bit N-1 into all higher positions. */
146 static ULong
sx_to_64 ( ULong x
, UInt n
)
148 vassert(n
> 1 && n
< 64);
155 //ZZ /* Do a little-endian load of a 16-bit word, regardless of the
156 //ZZ endianness of the underlying host. */
157 //ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
160 //ZZ w = (w << 8) | p[1];
161 //ZZ w = (w << 8) | p[0];
165 //ZZ static UInt ROR32 ( UInt x, UInt sh ) {
166 //ZZ vassert(sh >= 0 && sh < 32);
170 //ZZ return (x << (32-sh)) | (x >> sh);
173 //ZZ static Int popcount32 ( UInt x )
176 //ZZ for (i = 0; i < 32; i++) {
183 //ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
185 //ZZ UInt mask = 1 << ix;
187 //ZZ x |= ((b << ix) & mask);
191 #define BITS2(_b1,_b0) \
192 (((_b1) << 1) | (_b0))
194 #define BITS3(_b2,_b1,_b0) \
195 (((_b2) << 2) | ((_b1) << 1) | (_b0))
197 #define BITS4(_b3,_b2,_b1,_b0) \
198 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
200 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
201 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
202 | BITS4((_b3),(_b2),(_b1),(_b0)))
204 #define BITS5(_b4,_b3,_b2,_b1,_b0) \
205 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
206 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \
207 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
208 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
209 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
211 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
213 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
215 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
216 (((_b9) << 9) | ((_b8) << 8) \
217 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
219 #define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
221 | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
223 #define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
225 | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
227 #define X00 BITS2(0,0)
228 #define X01 BITS2(0,1)
229 #define X10 BITS2(1,0)
230 #define X11 BITS2(1,1)
232 // produces _uint[_bMax:_bMin]
233 #define SLICE_UInt(_uint,_bMax,_bMin) \
234 (( ((UInt)(_uint)) >> (_bMin)) \
235 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
238 /*------------------------------------------------------------*/
239 /*--- Helper bits and pieces for creating IR fragments. ---*/
240 /*------------------------------------------------------------*/
242 static IRExpr
* mkV128 ( UShort w
)
244 return IRExpr_Const(IRConst_V128(w
));
247 static IRExpr
* mkU64 ( ULong i
)
249 return IRExpr_Const(IRConst_U64(i
));
252 static IRExpr
* mkU32 ( UInt i
)
254 return IRExpr_Const(IRConst_U32(i
));
257 static IRExpr
* mkU16 ( UInt i
)
260 return IRExpr_Const(IRConst_U16(i
));
263 static IRExpr
* mkU8 ( UInt i
)
266 return IRExpr_Const(IRConst_U8( (UChar
)i
));
269 static IRExpr
* mkexpr ( IRTemp tmp
)
271 return IRExpr_RdTmp(tmp
);
274 static IRExpr
* unop ( IROp op
, IRExpr
* a
)
276 return IRExpr_Unop(op
, a
);
279 static IRExpr
* binop ( IROp op
, IRExpr
* a1
, IRExpr
* a2
)
281 return IRExpr_Binop(op
, a1
, a2
);
284 static IRExpr
* triop ( IROp op
, IRExpr
* a1
, IRExpr
* a2
, IRExpr
* a3
)
286 return IRExpr_Triop(op
, a1
, a2
, a3
);
289 static IRExpr
* loadLE ( IRType ty
, IRExpr
* addr
)
291 return IRExpr_Load(Iend_LE
, ty
, addr
);
294 /* Add a statement to the list held by "irbb". */
295 static void stmt ( IRStmt
* st
)
297 addStmtToIRSB( irsb
, st
);
300 static void assign ( IRTemp dst
, IRExpr
* e
)
302 stmt( IRStmt_WrTmp(dst
, e
) );
305 static void storeLE ( IRExpr
* addr
, IRExpr
* data
)
307 stmt( IRStmt_Store(Iend_LE
, addr
, data
) );
310 //ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
312 //ZZ if (guardT == IRTemp_INVALID) {
313 //ZZ /* unconditional */
314 //ZZ storeLE(addr, data);
316 //ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data,
317 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
321 //ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
322 //ZZ IRExpr* addr, IRExpr* alt,
323 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
325 //ZZ if (guardT == IRTemp_INVALID) {
326 //ZZ /* unconditional */
327 //ZZ IRExpr* loaded = NULL;
329 //ZZ case ILGop_Ident32:
330 //ZZ loaded = loadLE(Ity_I32, addr); break;
331 //ZZ case ILGop_8Uto32:
332 //ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
333 //ZZ case ILGop_8Sto32:
334 //ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
335 //ZZ case ILGop_16Uto32:
336 //ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
337 //ZZ case ILGop_16Sto32:
338 //ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
342 //ZZ vassert(loaded != NULL);
343 //ZZ assign(dst, loaded);
345 //ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the
346 //ZZ loaded data before putting the data in 'dst'. If the load
347 //ZZ does not take place, 'alt' is placed directly in 'dst'. */
348 //ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
349 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
353 /* Generate a new temporary of the given type. */
354 static IRTemp
newTemp ( IRType ty
)
356 vassert(isPlausibleIRType(ty
));
357 return newIRTemp( irsb
->tyenv
, ty
);
360 /* This is used in many places, so the brevity is an advantage. */
361 static IRTemp
newTempV128(void)
363 return newTemp(Ity_V128
);
366 /* Initialise V128 temporaries en masse. */
368 void newTempsV128_2(IRTemp
* t1
, IRTemp
* t2
)
370 vassert(t1
&& *t1
== IRTemp_INVALID
);
371 vassert(t2
&& *t2
== IRTemp_INVALID
);
377 void newTempsV128_3(IRTemp
* t1
, IRTemp
* t2
, IRTemp
* t3
)
379 vassert(t1
&& *t1
== IRTemp_INVALID
);
380 vassert(t2
&& *t2
== IRTemp_INVALID
);
381 vassert(t3
&& *t3
== IRTemp_INVALID
);
388 void newTempsV128_4(IRTemp
* t1
, IRTemp
* t2
, IRTemp
* t3
, IRTemp
* t4
)
390 vassert(t1
&& *t1
== IRTemp_INVALID
);
391 vassert(t2
&& *t2
== IRTemp_INVALID
);
392 vassert(t3
&& *t3
== IRTemp_INVALID
);
393 vassert(t4
&& *t4
== IRTemp_INVALID
);
401 void newTempsV128_7(IRTemp
* t1
, IRTemp
* t2
, IRTemp
* t3
,
402 IRTemp
* t4
, IRTemp
* t5
, IRTemp
* t6
, IRTemp
* t7
)
404 vassert(t1
&& *t1
== IRTemp_INVALID
);
405 vassert(t2
&& *t2
== IRTemp_INVALID
);
406 vassert(t3
&& *t3
== IRTemp_INVALID
);
407 vassert(t4
&& *t4
== IRTemp_INVALID
);
408 vassert(t5
&& *t5
== IRTemp_INVALID
);
409 vassert(t6
&& *t6
== IRTemp_INVALID
);
410 vassert(t7
&& *t7
== IRTemp_INVALID
);
420 //ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
421 //ZZ IRRoundingMode. */
422 //ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
424 //ZZ return mkU32(Irrm_NEAREST);
427 //ZZ /* Generate an expression for SRC rotated right by ROT. */
428 //ZZ static IRExpr* genROR32( IRTemp src, Int rot )
430 //ZZ vassert(rot >= 0 && rot < 32);
432 //ZZ return mkexpr(src);
435 //ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
436 //ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
439 //ZZ static IRExpr* mkU128 ( ULong i )
441 //ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
444 //ZZ /* Generate a 4-aligned version of the given expression if
445 //ZZ the given condition is true. Else return it unchanged. */
446 //ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
449 //ZZ return binop(Iop_And32, e, mkU32(~3));
454 /* Other IR construction helpers. */
455 static IROp
mkAND ( IRType ty
) {
457 case Ity_I32
: return Iop_And32
;
458 case Ity_I64
: return Iop_And64
;
459 default: vpanic("mkAND");
463 static IROp
mkOR ( IRType ty
) {
465 case Ity_I32
: return Iop_Or32
;
466 case Ity_I64
: return Iop_Or64
;
467 default: vpanic("mkOR");
471 static IROp
mkXOR ( IRType ty
) {
473 case Ity_I32
: return Iop_Xor32
;
474 case Ity_I64
: return Iop_Xor64
;
475 default: vpanic("mkXOR");
479 static IROp
mkSHL ( IRType ty
) {
481 case Ity_I32
: return Iop_Shl32
;
482 case Ity_I64
: return Iop_Shl64
;
483 default: vpanic("mkSHL");
487 static IROp
mkSHR ( IRType ty
) {
489 case Ity_I32
: return Iop_Shr32
;
490 case Ity_I64
: return Iop_Shr64
;
491 default: vpanic("mkSHR");
495 static IROp
mkSAR ( IRType ty
) {
497 case Ity_I32
: return Iop_Sar32
;
498 case Ity_I64
: return Iop_Sar64
;
499 default: vpanic("mkSAR");
503 static IROp
mkNOT ( IRType ty
) {
505 case Ity_I32
: return Iop_Not32
;
506 case Ity_I64
: return Iop_Not64
;
507 default: vpanic("mkNOT");
511 static IROp
mkADD ( IRType ty
) {
513 case Ity_I32
: return Iop_Add32
;
514 case Ity_I64
: return Iop_Add64
;
515 default: vpanic("mkADD");
519 static IROp
mkSUB ( IRType ty
) {
521 case Ity_I32
: return Iop_Sub32
;
522 case Ity_I64
: return Iop_Sub64
;
523 default: vpanic("mkSUB");
527 static IROp
mkADDF ( IRType ty
) {
529 case Ity_F32
: return Iop_AddF32
;
530 case Ity_F64
: return Iop_AddF64
;
531 default: vpanic("mkADDF");
535 static IROp
mkSUBF ( IRType ty
) {
537 case Ity_F32
: return Iop_SubF32
;
538 case Ity_F64
: return Iop_SubF64
;
539 default: vpanic("mkSUBF");
543 static IROp
mkMULF ( IRType ty
) {
545 case Ity_F32
: return Iop_MulF32
;
546 case Ity_F64
: return Iop_MulF64
;
547 default: vpanic("mkMULF");
551 static IROp
mkDIVF ( IRType ty
) {
553 case Ity_F32
: return Iop_DivF32
;
554 case Ity_F64
: return Iop_DivF64
;
555 default: vpanic("mkMULF");
559 static IROp
mkNEGF ( IRType ty
) {
561 case Ity_F32
: return Iop_NegF32
;
562 case Ity_F64
: return Iop_NegF64
;
563 default: vpanic("mkNEGF");
567 static IROp
mkABSF ( IRType ty
) {
569 case Ity_F32
: return Iop_AbsF32
;
570 case Ity_F64
: return Iop_AbsF64
;
571 default: vpanic("mkNEGF");
575 static IROp
mkSQRTF ( IRType ty
) {
577 case Ity_F32
: return Iop_SqrtF32
;
578 case Ity_F64
: return Iop_SqrtF64
;
579 default: vpanic("mkNEGF");
583 static IROp
mkVecADD ( UInt size
) {
585 = { Iop_Add8x16
, Iop_Add16x8
, Iop_Add32x4
, Iop_Add64x2
};
590 static IROp
mkVecQADDU ( UInt size
) {
592 = { Iop_QAdd8Ux16
, Iop_QAdd16Ux8
, Iop_QAdd32Ux4
, Iop_QAdd64Ux2
};
597 static IROp
mkVecQADDS ( UInt size
) {
599 = { Iop_QAdd8Sx16
, Iop_QAdd16Sx8
, Iop_QAdd32Sx4
, Iop_QAdd64Sx2
};
604 static IROp
mkVecQADDEXTSUSATUU ( UInt size
) {
606 = { Iop_QAddExtSUsatUU8x16
, Iop_QAddExtSUsatUU16x8
,
607 Iop_QAddExtSUsatUU32x4
, Iop_QAddExtSUsatUU64x2
};
612 static IROp
mkVecQADDEXTUSSATSS ( UInt size
) {
614 = { Iop_QAddExtUSsatSS8x16
, Iop_QAddExtUSsatSS16x8
,
615 Iop_QAddExtUSsatSS32x4
, Iop_QAddExtUSsatSS64x2
};
620 static IROp
mkVecSUB ( UInt size
) {
622 = { Iop_Sub8x16
, Iop_Sub16x8
, Iop_Sub32x4
, Iop_Sub64x2
};
627 static IROp
mkVecQSUBU ( UInt size
) {
629 = { Iop_QSub8Ux16
, Iop_QSub16Ux8
, Iop_QSub32Ux4
, Iop_QSub64Ux2
};
634 static IROp
mkVecQSUBS ( UInt size
) {
636 = { Iop_QSub8Sx16
, Iop_QSub16Sx8
, Iop_QSub32Sx4
, Iop_QSub64Sx2
};
641 static IROp
mkVecSARN ( UInt size
) {
643 = { Iop_SarN8x16
, Iop_SarN16x8
, Iop_SarN32x4
, Iop_SarN64x2
};
648 static IROp
mkVecSHRN ( UInt size
) {
650 = { Iop_ShrN8x16
, Iop_ShrN16x8
, Iop_ShrN32x4
, Iop_ShrN64x2
};
655 static IROp
mkVecSHLN ( UInt size
) {
657 = { Iop_ShlN8x16
, Iop_ShlN16x8
, Iop_ShlN32x4
, Iop_ShlN64x2
};
662 static IROp
mkVecCATEVENLANES ( UInt size
) {
664 = { Iop_CatEvenLanes8x16
, Iop_CatEvenLanes16x8
,
665 Iop_CatEvenLanes32x4
, Iop_InterleaveLO64x2
};
670 static IROp
mkVecCATODDLANES ( UInt size
) {
672 = { Iop_CatOddLanes8x16
, Iop_CatOddLanes16x8
,
673 Iop_CatOddLanes32x4
, Iop_InterleaveHI64x2
};
678 static IROp
mkVecINTERLEAVELO ( UInt size
) {
680 = { Iop_InterleaveLO8x16
, Iop_InterleaveLO16x8
,
681 Iop_InterleaveLO32x4
, Iop_InterleaveLO64x2
};
686 static IROp
mkVecINTERLEAVEHI ( UInt size
) {
688 = { Iop_InterleaveHI8x16
, Iop_InterleaveHI16x8
,
689 Iop_InterleaveHI32x4
, Iop_InterleaveHI64x2
};
694 static IROp
mkVecMAXU ( UInt size
) {
696 = { Iop_Max8Ux16
, Iop_Max16Ux8
, Iop_Max32Ux4
, Iop_Max64Ux2
};
701 static IROp
mkVecMAXS ( UInt size
) {
703 = { Iop_Max8Sx16
, Iop_Max16Sx8
, Iop_Max32Sx4
, Iop_Max64Sx2
};
708 static IROp
mkVecMINU ( UInt size
) {
710 = { Iop_Min8Ux16
, Iop_Min16Ux8
, Iop_Min32Ux4
, Iop_Min64Ux2
};
715 static IROp
mkVecMINS ( UInt size
) {
717 = { Iop_Min8Sx16
, Iop_Min16Sx8
, Iop_Min32Sx4
, Iop_Min64Sx2
};
722 static IROp
mkVecMUL ( UInt size
) {
724 = { Iop_Mul8x16
, Iop_Mul16x8
, Iop_Mul32x4
, Iop_INVALID
};
729 static IROp
mkVecMULLU ( UInt sizeNarrow
) {
731 = { Iop_Mull8Ux8
, Iop_Mull16Ux4
, Iop_Mull32Ux2
, Iop_INVALID
};
732 vassert(sizeNarrow
< 3);
733 return ops
[sizeNarrow
];
736 static IROp
mkVecMULLS ( UInt sizeNarrow
) {
738 = { Iop_Mull8Sx8
, Iop_Mull16Sx4
, Iop_Mull32Sx2
, Iop_INVALID
};
739 vassert(sizeNarrow
< 3);
740 return ops
[sizeNarrow
];
743 static IROp
mkVecQDMULLS ( UInt sizeNarrow
) {
745 = { Iop_INVALID
, Iop_QDMull16Sx4
, Iop_QDMull32Sx2
, Iop_INVALID
};
746 vassert(sizeNarrow
< 3);
747 return ops
[sizeNarrow
];
750 static IROp
mkVecCMPEQ ( UInt size
) {
752 = { Iop_CmpEQ8x16
, Iop_CmpEQ16x8
, Iop_CmpEQ32x4
, Iop_CmpEQ64x2
};
757 static IROp
mkVecCMPGTU ( UInt size
) {
759 = { Iop_CmpGT8Ux16
, Iop_CmpGT16Ux8
, Iop_CmpGT32Ux4
, Iop_CmpGT64Ux2
};
764 static IROp
mkVecCMPGTS ( UInt size
) {
766 = { Iop_CmpGT8Sx16
, Iop_CmpGT16Sx8
, Iop_CmpGT32Sx4
, Iop_CmpGT64Sx2
};
771 static IROp
mkVecABS ( UInt size
) {
773 = { Iop_Abs8x16
, Iop_Abs16x8
, Iop_Abs32x4
, Iop_Abs64x2
};
778 static IROp
mkVecZEROHIxxOFV128 ( UInt size
) {
780 = { Iop_ZeroHI120ofV128
, Iop_ZeroHI112ofV128
,
781 Iop_ZeroHI96ofV128
, Iop_ZeroHI64ofV128
};
786 static IRExpr
* mkU ( IRType ty
, ULong imm
) {
788 case Ity_I32
: return mkU32((UInt
)(imm
& 0xFFFFFFFFULL
));
789 case Ity_I64
: return mkU64(imm
);
790 default: vpanic("mkU");
794 static IROp
mkVecQDMULHIS ( UInt size
) {
796 = { Iop_INVALID
, Iop_QDMulHi16Sx8
, Iop_QDMulHi32Sx4
, Iop_INVALID
};
801 static IROp
mkVecQRDMULHIS ( UInt size
) {
803 = { Iop_INVALID
, Iop_QRDMulHi16Sx8
, Iop_QRDMulHi32Sx4
, Iop_INVALID
};
808 static IROp
mkVecQANDUQSH ( UInt size
) {
810 = { Iop_QandUQsh8x16
, Iop_QandUQsh16x8
,
811 Iop_QandUQsh32x4
, Iop_QandUQsh64x2
};
816 static IROp
mkVecQANDSQSH ( UInt size
) {
818 = { Iop_QandSQsh8x16
, Iop_QandSQsh16x8
,
819 Iop_QandSQsh32x4
, Iop_QandSQsh64x2
};
824 static IROp
mkVecQANDUQRSH ( UInt size
) {
826 = { Iop_QandUQRsh8x16
, Iop_QandUQRsh16x8
,
827 Iop_QandUQRsh32x4
, Iop_QandUQRsh64x2
};
832 static IROp
mkVecQANDSQRSH ( UInt size
) {
834 = { Iop_QandSQRsh8x16
, Iop_QandSQRsh16x8
,
835 Iop_QandSQRsh32x4
, Iop_QandSQRsh64x2
};
840 static IROp
mkVecSHU ( UInt size
) {
842 = { Iop_Sh8Ux16
, Iop_Sh16Ux8
, Iop_Sh32Ux4
, Iop_Sh64Ux2
};
847 static IROp
mkVecSHS ( UInt size
) {
849 = { Iop_Sh8Sx16
, Iop_Sh16Sx8
, Iop_Sh32Sx4
, Iop_Sh64Sx2
};
854 static IROp
mkVecRSHU ( UInt size
) {
856 = { Iop_Rsh8Ux16
, Iop_Rsh16Ux8
, Iop_Rsh32Ux4
, Iop_Rsh64Ux2
};
861 static IROp
mkVecRSHS ( UInt size
) {
863 = { Iop_Rsh8Sx16
, Iop_Rsh16Sx8
, Iop_Rsh32Sx4
, Iop_Rsh64Sx2
};
868 static IROp
mkVecNARROWUN ( UInt sizeNarrow
) {
870 = { Iop_NarrowUn16to8x8
, Iop_NarrowUn32to16x4
,
871 Iop_NarrowUn64to32x2
, Iop_INVALID
};
872 vassert(sizeNarrow
< 4);
873 return ops
[sizeNarrow
];
876 static IROp
mkVecQNARROWUNSU ( UInt sizeNarrow
) {
878 = { Iop_QNarrowUn16Sto8Ux8
, Iop_QNarrowUn32Sto16Ux4
,
879 Iop_QNarrowUn64Sto32Ux2
, Iop_INVALID
};
880 vassert(sizeNarrow
< 4);
881 return ops
[sizeNarrow
];
884 static IROp
mkVecQNARROWUNSS ( UInt sizeNarrow
) {
886 = { Iop_QNarrowUn16Sto8Sx8
, Iop_QNarrowUn32Sto16Sx4
,
887 Iop_QNarrowUn64Sto32Sx2
, Iop_INVALID
};
888 vassert(sizeNarrow
< 4);
889 return ops
[sizeNarrow
];
892 static IROp
mkVecQNARROWUNUU ( UInt sizeNarrow
) {
894 = { Iop_QNarrowUn16Uto8Ux8
, Iop_QNarrowUn32Uto16Ux4
,
895 Iop_QNarrowUn64Uto32Ux2
, Iop_INVALID
};
896 vassert(sizeNarrow
< 4);
897 return ops
[sizeNarrow
];
900 static IROp
mkVecQANDqshrNNARROWUU ( UInt sizeNarrow
) {
902 = { Iop_QandQShrNnarrow16Uto8Ux8
, Iop_QandQShrNnarrow32Uto16Ux4
,
903 Iop_QandQShrNnarrow64Uto32Ux2
, Iop_INVALID
};
904 vassert(sizeNarrow
< 4);
905 return ops
[sizeNarrow
];
908 static IROp
mkVecQANDqsarNNARROWSS ( UInt sizeNarrow
) {
910 = { Iop_QandQSarNnarrow16Sto8Sx8
, Iop_QandQSarNnarrow32Sto16Sx4
,
911 Iop_QandQSarNnarrow64Sto32Sx2
, Iop_INVALID
};
912 vassert(sizeNarrow
< 4);
913 return ops
[sizeNarrow
];
916 static IROp
mkVecQANDqsarNNARROWSU ( UInt sizeNarrow
) {
918 = { Iop_QandQSarNnarrow16Sto8Ux8
, Iop_QandQSarNnarrow32Sto16Ux4
,
919 Iop_QandQSarNnarrow64Sto32Ux2
, Iop_INVALID
};
920 vassert(sizeNarrow
< 4);
921 return ops
[sizeNarrow
];
924 static IROp
mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow
) {
926 = { Iop_QandQRShrNnarrow16Uto8Ux8
, Iop_QandQRShrNnarrow32Uto16Ux4
,
927 Iop_QandQRShrNnarrow64Uto32Ux2
, Iop_INVALID
};
928 vassert(sizeNarrow
< 4);
929 return ops
[sizeNarrow
];
932 static IROp
mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow
) {
934 = { Iop_QandQRSarNnarrow16Sto8Sx8
, Iop_QandQRSarNnarrow32Sto16Sx4
,
935 Iop_QandQRSarNnarrow64Sto32Sx2
, Iop_INVALID
};
936 vassert(sizeNarrow
< 4);
937 return ops
[sizeNarrow
];
940 static IROp
mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow
) {
942 = { Iop_QandQRSarNnarrow16Sto8Ux8
, Iop_QandQRSarNnarrow32Sto16Ux4
,
943 Iop_QandQRSarNnarrow64Sto32Ux2
, Iop_INVALID
};
944 vassert(sizeNarrow
< 4);
945 return ops
[sizeNarrow
];
948 static IROp
mkVecQSHLNSATUU ( UInt size
) {
950 = { Iop_QShlNsatUU8x16
, Iop_QShlNsatUU16x8
,
951 Iop_QShlNsatUU32x4
, Iop_QShlNsatUU64x2
};
956 static IROp
mkVecQSHLNSATSS ( UInt size
) {
958 = { Iop_QShlNsatSS8x16
, Iop_QShlNsatSS16x8
,
959 Iop_QShlNsatSS32x4
, Iop_QShlNsatSS64x2
};
964 static IROp
mkVecQSHLNSATSU ( UInt size
) {
966 = { Iop_QShlNsatSU8x16
, Iop_QShlNsatSU16x8
,
967 Iop_QShlNsatSU32x4
, Iop_QShlNsatSU64x2
};
972 static IROp
mkVecADDF ( UInt size
) {
974 = { Iop_INVALID
, Iop_INVALID
, Iop_Add32Fx4
, Iop_Add64Fx2
};
979 static IROp
mkVecMAXF ( UInt size
) {
981 = { Iop_INVALID
, Iop_INVALID
, Iop_Max32Fx4
, Iop_Max64Fx2
};
986 static IROp
mkVecMINF ( UInt size
) {
988 = { Iop_INVALID
, Iop_INVALID
, Iop_Min32Fx4
, Iop_Min64Fx2
};
993 /* Generate IR to create 'arg rotated right by imm', for sane values
994 of 'ty' and 'imm'. */
995 static IRTemp
mathROR ( IRType ty
, IRTemp arg
, UInt imm
)
1001 vassert(ty
== Ity_I32
);
1009 IRTemp res
= newTemp(ty
);
1010 assign(res
, binop(mkOR(ty
),
1011 binop(mkSHL(ty
), mkexpr(arg
), mkU8(w
- imm
)),
1012 binop(mkSHR(ty
), mkexpr(arg
), mkU8(imm
)) ));
1016 /* Generate IR to set the returned temp to either all-zeroes or
1017 all ones, as a copy of arg<imm>. */
1018 static IRTemp
mathREPLICATE ( IRType ty
, IRTemp arg
, UInt imm
)
1021 if (ty
== Ity_I64
) {
1024 vassert(ty
== Ity_I32
);
1029 IRTemp res
= newTemp(ty
);
1030 assign(res
, binop(mkSAR(ty
),
1031 binop(mkSHL(ty
), mkexpr(arg
), mkU8(w
- 1 - imm
)),
1036 /* S-widen 8/16/32/64 bit int expr to 64. */
1037 static IRExpr
* widenSto64 ( IRType srcTy
, IRExpr
* e
)
1040 case Ity_I64
: return e
;
1041 case Ity_I32
: return unop(Iop_32Sto64
, e
);
1042 case Ity_I16
: return unop(Iop_16Sto64
, e
);
1043 case Ity_I8
: return unop(Iop_8Sto64
, e
);
1044 default: vpanic("widenSto64(arm64)");
1048 /* U-widen 8/16/32/64 bit int expr to 64. */
1049 static IRExpr
* widenUto64 ( IRType srcTy
, IRExpr
* e
)
1052 case Ity_I64
: return e
;
1053 case Ity_I32
: return unop(Iop_32Uto64
, e
);
1054 case Ity_I16
: return unop(Iop_16Uto64
, e
);
1055 case Ity_I8
: return unop(Iop_8Uto64
, e
);
1056 default: vpanic("widenUto64(arm64)");
1060 /* Narrow 64 bit int expr to 8/16/32/64. Clearly only some
1061 of these combinations make sense. */
1062 static IRExpr
* narrowFrom64 ( IRType dstTy
, IRExpr
* e
)
1065 case Ity_I64
: return e
;
1066 case Ity_I32
: return unop(Iop_64to32
, e
);
1067 case Ity_I16
: return unop(Iop_64to16
, e
);
1068 case Ity_I8
: return unop(Iop_64to8
, e
);
1069 default: vpanic("narrowFrom64(arm64)");
1074 /*------------------------------------------------------------*/
1075 /*--- Helpers for accessing guest registers. ---*/
1076 /*------------------------------------------------------------*/
1078 #define OFFB_X0 offsetof(VexGuestARM64State,guest_X0)
1079 #define OFFB_X1 offsetof(VexGuestARM64State,guest_X1)
1080 #define OFFB_X2 offsetof(VexGuestARM64State,guest_X2)
1081 #define OFFB_X3 offsetof(VexGuestARM64State,guest_X3)
1082 #define OFFB_X4 offsetof(VexGuestARM64State,guest_X4)
1083 #define OFFB_X5 offsetof(VexGuestARM64State,guest_X5)
1084 #define OFFB_X6 offsetof(VexGuestARM64State,guest_X6)
1085 #define OFFB_X7 offsetof(VexGuestARM64State,guest_X7)
1086 #define OFFB_X8 offsetof(VexGuestARM64State,guest_X8)
1087 #define OFFB_X9 offsetof(VexGuestARM64State,guest_X9)
1088 #define OFFB_X10 offsetof(VexGuestARM64State,guest_X10)
1089 #define OFFB_X11 offsetof(VexGuestARM64State,guest_X11)
1090 #define OFFB_X12 offsetof(VexGuestARM64State,guest_X12)
1091 #define OFFB_X13 offsetof(VexGuestARM64State,guest_X13)
1092 #define OFFB_X14 offsetof(VexGuestARM64State,guest_X14)
1093 #define OFFB_X15 offsetof(VexGuestARM64State,guest_X15)
1094 #define OFFB_X16 offsetof(VexGuestARM64State,guest_X16)
1095 #define OFFB_X17 offsetof(VexGuestARM64State,guest_X17)
1096 #define OFFB_X18 offsetof(VexGuestARM64State,guest_X18)
1097 #define OFFB_X19 offsetof(VexGuestARM64State,guest_X19)
1098 #define OFFB_X20 offsetof(VexGuestARM64State,guest_X20)
1099 #define OFFB_X21 offsetof(VexGuestARM64State,guest_X21)
1100 #define OFFB_X22 offsetof(VexGuestARM64State,guest_X22)
1101 #define OFFB_X23 offsetof(VexGuestARM64State,guest_X23)
1102 #define OFFB_X24 offsetof(VexGuestARM64State,guest_X24)
1103 #define OFFB_X25 offsetof(VexGuestARM64State,guest_X25)
1104 #define OFFB_X26 offsetof(VexGuestARM64State,guest_X26)
1105 #define OFFB_X27 offsetof(VexGuestARM64State,guest_X27)
1106 #define OFFB_X28 offsetof(VexGuestARM64State,guest_X28)
1107 #define OFFB_X29 offsetof(VexGuestARM64State,guest_X29)
1108 #define OFFB_X30 offsetof(VexGuestARM64State,guest_X30)
1110 #define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP)
1111 #define OFFB_PC offsetof(VexGuestARM64State,guest_PC)
1113 #define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP)
1114 #define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1)
1115 #define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2)
1116 #define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP)
1118 #define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
1119 #define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR)
1121 #define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0)
1122 #define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1)
1123 #define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2)
1124 #define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3)
1125 #define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4)
1126 #define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5)
1127 #define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6)
1128 #define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7)
1129 #define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8)
1130 #define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9)
1131 #define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10)
1132 #define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11)
1133 #define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12)
1134 #define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13)
1135 #define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14)
1136 #define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15)
1137 #define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16)
1138 #define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17)
1139 #define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18)
1140 #define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19)
1141 #define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20)
1142 #define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21)
1143 #define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22)
1144 #define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23)
1145 #define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24)
1146 #define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25)
1147 #define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26)
1148 #define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27)
1149 #define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28)
1150 #define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29)
1151 #define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30)
1152 #define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31)
1154 #define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR)
1155 #define OFFB_QCFLAG offsetof(VexGuestARM64State,guest_QCFLAG)
1157 #define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART)
1158 #define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN)
1160 #define OFFB_LLSC_SIZE offsetof(VexGuestARM64State,guest_LLSC_SIZE)
1161 #define OFFB_LLSC_ADDR offsetof(VexGuestARM64State,guest_LLSC_ADDR)
1162 #define OFFB_LLSC_DATA offsetof(VexGuestARM64State,guest_LLSC_DATA)
1165 /* ---------------- Integer registers ---------------- */
1167 static Int
offsetIReg64 ( UInt iregNo
)
1169 /* Do we care about endianness here? We do if sub-parts of integer
1170 registers are accessed. */
1172 case 0: return OFFB_X0
;
1173 case 1: return OFFB_X1
;
1174 case 2: return OFFB_X2
;
1175 case 3: return OFFB_X3
;
1176 case 4: return OFFB_X4
;
1177 case 5: return OFFB_X5
;
1178 case 6: return OFFB_X6
;
1179 case 7: return OFFB_X7
;
1180 case 8: return OFFB_X8
;
1181 case 9: return OFFB_X9
;
1182 case 10: return OFFB_X10
;
1183 case 11: return OFFB_X11
;
1184 case 12: return OFFB_X12
;
1185 case 13: return OFFB_X13
;
1186 case 14: return OFFB_X14
;
1187 case 15: return OFFB_X15
;
1188 case 16: return OFFB_X16
;
1189 case 17: return OFFB_X17
;
1190 case 18: return OFFB_X18
;
1191 case 19: return OFFB_X19
;
1192 case 20: return OFFB_X20
;
1193 case 21: return OFFB_X21
;
1194 case 22: return OFFB_X22
;
1195 case 23: return OFFB_X23
;
1196 case 24: return OFFB_X24
;
1197 case 25: return OFFB_X25
;
1198 case 26: return OFFB_X26
;
1199 case 27: return OFFB_X27
;
1200 case 28: return OFFB_X28
;
1201 case 29: return OFFB_X29
;
1202 case 30: return OFFB_X30
;
1204 default: vassert(0);
1208 static Int
offsetIReg64orSP ( UInt iregNo
)
1210 return iregNo
== 31 ? OFFB_XSP
: offsetIReg64(iregNo
);
1213 static const HChar
* nameIReg64orZR ( UInt iregNo
)
1215 vassert(iregNo
< 32);
1216 static const HChar
* names
[32]
1217 = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
1218 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
1219 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
1220 "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
1221 return names
[iregNo
];
1224 static const HChar
* nameIReg64orSP ( UInt iregNo
)
1229 vassert(iregNo
< 31);
1230 return nameIReg64orZR(iregNo
);
1233 static IRExpr
* getIReg64orSP ( UInt iregNo
)
1235 vassert(iregNo
< 32);
1236 return IRExpr_Get( offsetIReg64orSP(iregNo
), Ity_I64
);
1239 static IRExpr
* getIReg64orZR ( UInt iregNo
)
1244 vassert(iregNo
< 31);
1245 return IRExpr_Get( offsetIReg64orSP(iregNo
), Ity_I64
);
1248 static void putIReg64orSP ( UInt iregNo
, IRExpr
* e
)
1250 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I64
);
1251 stmt( IRStmt_Put(offsetIReg64orSP(iregNo
), e
) );
1254 static void putIReg64orZR ( UInt iregNo
, IRExpr
* e
)
1256 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I64
);
1260 vassert(iregNo
< 31);
1261 stmt( IRStmt_Put(offsetIReg64orSP(iregNo
), e
) );
1264 static const HChar
* nameIReg32orZR ( UInt iregNo
)
1266 vassert(iregNo
< 32);
1267 static const HChar
* names
[32]
1268 = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
1269 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
1270 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
1271 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
1272 return names
[iregNo
];
1275 static const HChar
* nameIReg32orSP ( UInt iregNo
)
1280 vassert(iregNo
< 31);
1281 return nameIReg32orZR(iregNo
);
1284 static IRExpr
* getIReg32orSP ( UInt iregNo
)
1286 vassert(iregNo
< 32);
1287 return unop(Iop_64to32
,
1288 IRExpr_Get( offsetIReg64orSP(iregNo
), Ity_I64
));
1291 static IRExpr
* getIReg32orZR ( UInt iregNo
)
1296 vassert(iregNo
< 31);
1297 return unop(Iop_64to32
,
1298 IRExpr_Get( offsetIReg64orSP(iregNo
), Ity_I64
));
1301 static void putIReg32orSP ( UInt iregNo
, IRExpr
* e
)
1303 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I32
);
1304 stmt( IRStmt_Put(offsetIReg64orSP(iregNo
), unop(Iop_32Uto64
, e
)) );
1307 static void putIReg32orZR ( UInt iregNo
, IRExpr
* e
)
1309 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I32
);
1313 vassert(iregNo
< 31);
1314 stmt( IRStmt_Put(offsetIReg64orSP(iregNo
), unop(Iop_32Uto64
, e
)) );
1317 static const HChar
* nameIRegOrSP ( Bool is64
, UInt iregNo
)
1319 vassert(is64
== True
|| is64
== False
);
1320 return is64
? nameIReg64orSP(iregNo
) : nameIReg32orSP(iregNo
);
1323 static const HChar
* nameIRegOrZR ( Bool is64
, UInt iregNo
)
1325 vassert(is64
== True
|| is64
== False
);
1326 return is64
? nameIReg64orZR(iregNo
) : nameIReg32orZR(iregNo
);
1329 static IRExpr
* getIRegOrZR ( Bool is64
, UInt iregNo
)
1331 vassert(is64
== True
|| is64
== False
);
1332 return is64
? getIReg64orZR(iregNo
) : getIReg32orZR(iregNo
);
1335 static void putIRegOrZR ( Bool is64
, UInt iregNo
, IRExpr
* e
)
1337 vassert(is64
== True
|| is64
== False
);
1338 if (is64
) putIReg64orZR(iregNo
, e
); else putIReg32orZR(iregNo
, e
);
1341 static void putPC ( IRExpr
* e
)
1343 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I64
);
1344 stmt( IRStmt_Put(OFFB_PC
, e
) );
1348 /* ---------------- Vector (Q) registers ---------------- */
1350 static Int
offsetQReg128 ( UInt qregNo
)
1352 /* We don't care about endianness at this point. It only becomes
1353 relevant when dealing with sections of these registers.*/
1355 case 0: return OFFB_Q0
;
1356 case 1: return OFFB_Q1
;
1357 case 2: return OFFB_Q2
;
1358 case 3: return OFFB_Q3
;
1359 case 4: return OFFB_Q4
;
1360 case 5: return OFFB_Q5
;
1361 case 6: return OFFB_Q6
;
1362 case 7: return OFFB_Q7
;
1363 case 8: return OFFB_Q8
;
1364 case 9: return OFFB_Q9
;
1365 case 10: return OFFB_Q10
;
1366 case 11: return OFFB_Q11
;
1367 case 12: return OFFB_Q12
;
1368 case 13: return OFFB_Q13
;
1369 case 14: return OFFB_Q14
;
1370 case 15: return OFFB_Q15
;
1371 case 16: return OFFB_Q16
;
1372 case 17: return OFFB_Q17
;
1373 case 18: return OFFB_Q18
;
1374 case 19: return OFFB_Q19
;
1375 case 20: return OFFB_Q20
;
1376 case 21: return OFFB_Q21
;
1377 case 22: return OFFB_Q22
;
1378 case 23: return OFFB_Q23
;
1379 case 24: return OFFB_Q24
;
1380 case 25: return OFFB_Q25
;
1381 case 26: return OFFB_Q26
;
1382 case 27: return OFFB_Q27
;
1383 case 28: return OFFB_Q28
;
1384 case 29: return OFFB_Q29
;
1385 case 30: return OFFB_Q30
;
1386 case 31: return OFFB_Q31
;
1387 default: vassert(0);
1391 /* Write to a complete Qreg. */
1392 static void putQReg128 ( UInt qregNo
, IRExpr
* e
)
1394 vassert(qregNo
< 32);
1395 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_V128
);
1396 stmt( IRStmt_Put(offsetQReg128(qregNo
), e
) );
1399 /* Read a complete Qreg. */
1400 static IRExpr
* getQReg128 ( UInt qregNo
)
1402 vassert(qregNo
< 32);
1403 return IRExpr_Get(offsetQReg128(qregNo
), Ity_V128
);
1406 /* Produce the IR type for some sub-part of a vector. For 32- and 64-
1407 bit sub-parts we can choose either integer or float types, and
1408 choose float on the basis that that is the common use case and so
1409 will give least interference with Put-to-Get forwarding later
1411 static IRType
preferredVectorSubTypeFromSize ( UInt szB
)
1414 case 1: return Ity_I8
;
1415 case 2: return Ity_I16
;
1416 case 4: return Ity_I32
; //Ity_F32;
1417 case 8: return Ity_F64
;
1418 case 16: return Ity_V128
;
1419 default: vassert(0);
1423 /* Find the offset of the laneNo'th lane of type laneTy in the given
1424 Qreg. Since the host is little-endian, the least significant lane
1425 has the lowest offset. */
1426 static Int
offsetQRegLane ( UInt qregNo
, IRType laneTy
, UInt laneNo
)
1428 vassert(host_endness
== VexEndnessLE
);
1429 Int base
= offsetQReg128(qregNo
);
1430 /* Since the host is little-endian, the least significant lane
1431 will be at the lowest address. */
1432 /* Restrict this to known types, so as to avoid silently accepting
1436 case Ity_I8
: laneSzB
= 1; break;
1437 case Ity_F16
: case Ity_I16
: laneSzB
= 2; break;
1438 case Ity_F32
: case Ity_I32
: laneSzB
= 4; break;
1439 case Ity_F64
: case Ity_I64
: laneSzB
= 8; break;
1440 case Ity_V128
: laneSzB
= 16; break;
1443 vassert(laneSzB
> 0);
1444 UInt minOff
= laneNo
* laneSzB
;
1445 UInt maxOff
= minOff
+ laneSzB
- 1;
1446 vassert(maxOff
< 16);
1447 return base
+ minOff
;
1450 /* Put to the least significant lane of a Qreg. */
1451 static void putQRegLO ( UInt qregNo
, IRExpr
* e
)
1453 IRType ty
= typeOfIRExpr(irsb
->tyenv
, e
);
1454 Int off
= offsetQRegLane(qregNo
, ty
, 0);
1456 case Ity_I8
: case Ity_I16
: case Ity_I32
: case Ity_I64
:
1457 case Ity_F16
: case Ity_F32
: case Ity_F64
: case Ity_V128
:
1460 vassert(0); // Other cases are probably invalid
1462 stmt(IRStmt_Put(off
, e
));
1465 /* Get from the least significant lane of a Qreg. */
1466 static IRExpr
* getQRegLO ( UInt qregNo
, IRType ty
)
1468 Int off
= offsetQRegLane(qregNo
, ty
, 0);
1471 case Ity_F16
: case Ity_I16
:
1472 case Ity_I32
: case Ity_I64
:
1473 case Ity_F32
: case Ity_F64
: case Ity_V128
:
1476 vassert(0); // Other cases are ATC
1478 return IRExpr_Get(off
, ty
);
1481 static const HChar
* nameQRegLO ( UInt qregNo
, IRType laneTy
)
1483 static const HChar
* namesQ
[32]
1484 = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1485 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
1486 "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
1487 "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
1488 static const HChar
* namesD
[32]
1489 = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
1490 "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
1491 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
1492 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
1493 static const HChar
* namesS
[32]
1494 = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
1495 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
1496 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
1497 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
1498 static const HChar
* namesH
[32]
1499 = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
1500 "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
1501 "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
1502 "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
1503 static const HChar
* namesB
[32]
1504 = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
1505 "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
1506 "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
1507 "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
1508 vassert(qregNo
< 32);
1509 switch (sizeofIRType(laneTy
)) {
1510 case 1: return namesB
[qregNo
];
1511 case 2: return namesH
[qregNo
];
1512 case 4: return namesS
[qregNo
];
1513 case 8: return namesD
[qregNo
];
1514 case 16: return namesQ
[qregNo
];
1515 default: vassert(0);
1520 static const HChar
* nameQReg128 ( UInt qregNo
)
1522 return nameQRegLO(qregNo
, Ity_V128
);
1525 /* Find the offset of the most significant half (8 bytes) of the given
1526 Qreg. This requires knowing the endianness of the host. */
1527 static Int
offsetQRegHI64 ( UInt qregNo
)
1529 return offsetQRegLane(qregNo
, Ity_I64
, 1);
1532 static IRExpr
* getQRegHI64 ( UInt qregNo
)
1534 return IRExpr_Get(offsetQRegHI64(qregNo
), Ity_I64
);
1537 static void putQRegHI64 ( UInt qregNo
, IRExpr
* e
)
1539 IRType ty
= typeOfIRExpr(irsb
->tyenv
, e
);
1540 Int off
= offsetQRegHI64(qregNo
);
1542 case Ity_I64
: case Ity_F64
:
1545 vassert(0); // Other cases are plain wrong
1547 stmt(IRStmt_Put(off
, e
));
1550 /* Put to a specified lane of a Qreg. */
1551 static void putQRegLane ( UInt qregNo
, UInt laneNo
, IRExpr
* e
)
1553 IRType laneTy
= typeOfIRExpr(irsb
->tyenv
, e
);
1554 Int off
= offsetQRegLane(qregNo
, laneTy
, laneNo
);
1556 case Ity_F64
: case Ity_I64
:
1557 case Ity_I32
: case Ity_F32
:
1558 case Ity_I16
: case Ity_F16
:
1562 vassert(0); // Other cases are ATC
1564 stmt(IRStmt_Put(off
, e
));
1567 /* Get from a specified lane of a Qreg. */
1568 static IRExpr
* getQRegLane ( UInt qregNo
, UInt laneNo
, IRType laneTy
)
1570 Int off
= offsetQRegLane(qregNo
, laneTy
, laneNo
);
1572 case Ity_I64
: case Ity_I32
: case Ity_I16
: case Ity_I8
:
1573 case Ity_F64
: case Ity_F32
: case Ity_F16
:
1576 vassert(0); // Other cases are ATC
1578 return IRExpr_Get(off
, laneTy
);
1582 //ZZ /* ---------------- Misc registers ---------------- */
1584 //ZZ static void putMiscReg32 ( UInt gsoffset,
1585 //ZZ IRExpr* e, /* :: Ity_I32 */
1586 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
1588 //ZZ switch (gsoffset) {
1589 //ZZ case OFFB_FPSCR: break;
1590 //ZZ case OFFB_QFLAG32: break;
1591 //ZZ case OFFB_GEFLAG0: break;
1592 //ZZ case OFFB_GEFLAG1: break;
1593 //ZZ case OFFB_GEFLAG2: break;
1594 //ZZ case OFFB_GEFLAG3: break;
1595 //ZZ default: vassert(0); /* awaiting more cases */
1597 //ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1599 //ZZ if (guardT == IRTemp_INVALID) {
1600 //ZZ /* unconditional write */
1601 //ZZ stmt(IRStmt_Put(gsoffset, e));
1603 //ZZ stmt(IRStmt_Put(
1605 //ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
1606 //ZZ e, IRExpr_Get(gsoffset, Ity_I32) )
1611 //ZZ static IRTemp get_ITSTATE ( void )
1613 //ZZ ASSERT_IS_THUMB;
1614 //ZZ IRTemp t = newTemp(Ity_I32);
1615 //ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
1619 //ZZ static void put_ITSTATE ( IRTemp t )
1621 //ZZ ASSERT_IS_THUMB;
1622 //ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
1625 //ZZ static IRTemp get_QFLAG32 ( void )
1627 //ZZ IRTemp t = newTemp(Ity_I32);
1628 //ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
1632 //ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
1634 //ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
1637 //ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
1638 //ZZ Status Register) to indicate that overflow or saturation occurred.
1639 //ZZ Nb: t must be zero to denote no saturation, and any nonzero
1640 //ZZ value to indicate saturation. */
1641 //ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
1643 //ZZ IRTemp old = get_QFLAG32();
1644 //ZZ IRTemp nyu = newTemp(Ity_I32);
1645 //ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
1646 //ZZ put_QFLAG32(nyu, condT);
1650 /* ---------------- FPCR stuff ---------------- */
1652 /* Generate IR to get hold of the rounding mode bits in FPCR, and
1653 convert them to IR format. Bind the final result to the
1655 static IRTemp
/* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1657 /* The ARMvfp encoding for rounding mode bits is:
1662 We need to convert that to the IR encoding:
1663 00 to nearest (the default)
1667 Which can be done by swapping bits 0 and 1.
1668 The rmode bits are at 23:22 in FPSCR.
1670 IRTemp armEncd
= newTemp(Ity_I32
);
1671 IRTemp swapped
= newTemp(Ity_I32
);
1672 /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that
1673 we don't zero out bits 24 and above, since the assignment to
1674 'swapped' will mask them out anyway. */
1676 binop(Iop_Shr32
, IRExpr_Get(OFFB_FPCR
, Ity_I32
), mkU8(22)));
1677 /* Now swap them. */
1681 binop(Iop_Shl32
, mkexpr(armEncd
), mkU8(1)),
1684 binop(Iop_Shr32
, mkexpr(armEncd
), mkU8(1)),
1691 /*------------------------------------------------------------*/
1692 /*--- Helpers for flag handling and conditional insns ---*/
1693 /*------------------------------------------------------------*/
1695 static const HChar
* nameARM64Condcode ( ARM64Condcode cond
)
1698 case ARM64CondEQ
: return "eq";
1699 case ARM64CondNE
: return "ne";
1700 case ARM64CondCS
: return "cs"; // or 'hs'
1701 case ARM64CondCC
: return "cc"; // or 'lo'
1702 case ARM64CondMI
: return "mi";
1703 case ARM64CondPL
: return "pl";
1704 case ARM64CondVS
: return "vs";
1705 case ARM64CondVC
: return "vc";
1706 case ARM64CondHI
: return "hi";
1707 case ARM64CondLS
: return "ls";
1708 case ARM64CondGE
: return "ge";
1709 case ARM64CondLT
: return "lt";
1710 case ARM64CondGT
: return "gt";
1711 case ARM64CondLE
: return "le";
1712 case ARM64CondAL
: return "al";
1713 case ARM64CondNV
: return "nv";
1714 default: vpanic("name_ARM64Condcode");
1718 /* and a handy shorthand for it */
1719 static const HChar
* nameCC ( ARM64Condcode cond
) {
1720 return nameARM64Condcode(cond
);
1724 /* Build IR to calculate some particular condition from stored
1725 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1726 Ity_I64, suitable for narrowing. Although the return type is
1727 Ity_I64, the returned value is either 0 or 1. 'cond' must be
1728 :: Ity_I64 and must denote the condition to compute in
1729 bits 7:4, and be zero everywhere else.
1731 static IRExpr
* mk_arm64g_calculate_condition_dyn ( IRExpr
* cond
)
1733 vassert(typeOfIRExpr(irsb
->tyenv
, cond
) == Ity_I64
);
1734 /* And 'cond' had better produce a value in which only bits 7:4 are
1735 nonzero. However, obviously we can't assert for that. */
1737 /* So what we're constructing for the first argument is
1738 "(cond << 4) | stored-operation".
1739 However, as per comments above, 'cond' must be supplied
1740 pre-shifted to this function.
1742 This pairing scheme requires that the ARM64_CC_OP_ values all fit
1743 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest
1744 8 bits of the first argument. */
1747 binop(Iop_Or64
, IRExpr_Get(OFFB_CC_OP
, Ity_I64
), cond
),
1748 IRExpr_Get(OFFB_CC_DEP1
, Ity_I64
),
1749 IRExpr_Get(OFFB_CC_DEP2
, Ity_I64
),
1750 IRExpr_Get(OFFB_CC_NDEP
, Ity_I64
)
1756 "arm64g_calculate_condition", &arm64g_calculate_condition
,
1760 /* Exclude the requested condition, OP and NDEP from definedness
1761 checking. We're only interested in DEP1 and DEP2. */
1762 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<3);
1767 /* Build IR to calculate some particular condition from stored
1768 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1769 Ity_I64, suitable for narrowing. Although the return type is
1770 Ity_I64, the returned value is either 0 or 1.
1772 static IRExpr
* mk_arm64g_calculate_condition ( ARM64Condcode cond
)
1774 /* First arg is "(cond << 4) | condition". This requires that the
1775 ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a
1776 (COND, OP) pair in the lowest 8 bits of the first argument. */
1777 vassert(cond
>= 0 && cond
<= 15);
1778 return mk_arm64g_calculate_condition_dyn( mkU64(cond
<< 4) );
1782 /* Build IR to calculate just the carry flag from stored
1783 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1785 static IRExpr
* mk_arm64g_calculate_flag_c ( void )
1788 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP
, Ity_I64
),
1789 IRExpr_Get(OFFB_CC_DEP1
, Ity_I64
),
1790 IRExpr_Get(OFFB_CC_DEP2
, Ity_I64
),
1791 IRExpr_Get(OFFB_CC_NDEP
, Ity_I64
) );
1796 "arm64g_calculate_flag_c", &arm64g_calculate_flag_c
,
1799 /* Exclude OP and NDEP from definedness checking. We're only
1800 interested in DEP1 and DEP2. */
1801 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<3);
1806 //ZZ /* Build IR to calculate just the overflow flag from stored
1807 //ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1809 //ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
1812 //ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
1813 //ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1814 //ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1815 //ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1817 //ZZ = mkIRExprCCall(
1820 //ZZ "armg_calculate_flag_v", &armg_calculate_flag_v,
1823 //ZZ /* Exclude OP and NDEP from definedness checking. We're only
1824 //ZZ interested in DEP1 and DEP2. */
1825 //ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1830 /* Build IR to calculate N Z C V in bits 31:28 of the
1832 static IRExpr
* mk_arm64g_calculate_flags_nzcv ( void )
1835 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP
, Ity_I64
),
1836 IRExpr_Get(OFFB_CC_DEP1
, Ity_I64
),
1837 IRExpr_Get(OFFB_CC_DEP2
, Ity_I64
),
1838 IRExpr_Get(OFFB_CC_NDEP
, Ity_I64
) );
1843 "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv
,
1846 /* Exclude OP and NDEP from definedness checking. We're only
1847 interested in DEP1 and DEP2. */
1848 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<3);
1853 /* Build IR to set the flags thunk, in the most general case. */
1855 void setFlags_D1_D2_ND ( UInt cc_op
,
1856 IRTemp t_dep1
, IRTemp t_dep2
, IRTemp t_ndep
)
1858 vassert(typeOfIRTemp(irsb
->tyenv
, t_dep1
== Ity_I64
));
1859 vassert(typeOfIRTemp(irsb
->tyenv
, t_dep2
== Ity_I64
));
1860 vassert(typeOfIRTemp(irsb
->tyenv
, t_ndep
== Ity_I64
));
1861 vassert(cc_op
>= ARM64G_CC_OP_COPY
&& cc_op
< ARM64G_CC_OP_NUMBER
);
1862 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(cc_op
) ));
1863 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(t_dep1
) ));
1864 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkexpr(t_dep2
) ));
1865 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkexpr(t_ndep
) ));
1868 /* Build IR to set the flags thunk after ADD or SUB. */
1870 void setFlags_ADD_SUB ( Bool is64
, Bool isSUB
, IRTemp argL
, IRTemp argR
)
1872 IRTemp argL64
= IRTemp_INVALID
;
1873 IRTemp argR64
= IRTemp_INVALID
;
1874 IRTemp z64
= newTemp(Ity_I64
);
1879 argL64
= newTemp(Ity_I64
);
1880 argR64
= newTemp(Ity_I64
);
1881 assign(argL64
, unop(Iop_32Uto64
, mkexpr(argL
)));
1882 assign(argR64
, unop(Iop_32Uto64
, mkexpr(argR
)));
1884 assign(z64
, mkU64(0));
1885 UInt cc_op
= ARM64G_CC_OP_NUMBER
;
1886 /**/ if ( isSUB
&& is64
) { cc_op
= ARM64G_CC_OP_SUB64
; }
1887 else if ( isSUB
&& !is64
) { cc_op
= ARM64G_CC_OP_SUB32
; }
1888 else if (!isSUB
&& is64
) { cc_op
= ARM64G_CC_OP_ADD64
; }
1889 else if (!isSUB
&& !is64
) { cc_op
= ARM64G_CC_OP_ADD32
; }
1890 else { vassert(0); }
1891 setFlags_D1_D2_ND(cc_op
, argL64
, argR64
, z64
);
1894 /* Build IR to set the flags thunk after ADC or SBC. */
1896 void setFlags_ADC_SBC ( Bool is64
, Bool isSBC
,
1897 IRTemp argL
, IRTemp argR
, IRTemp oldC
)
1899 IRTemp argL64
= IRTemp_INVALID
;
1900 IRTemp argR64
= IRTemp_INVALID
;
1901 IRTemp oldC64
= IRTemp_INVALID
;
1907 argL64
= newTemp(Ity_I64
);
1908 argR64
= newTemp(Ity_I64
);
1909 oldC64
= newTemp(Ity_I64
);
1910 assign(argL64
, unop(Iop_32Uto64
, mkexpr(argL
)));
1911 assign(argR64
, unop(Iop_32Uto64
, mkexpr(argR
)));
1912 assign(oldC64
, unop(Iop_32Uto64
, mkexpr(oldC
)));
1914 UInt cc_op
= ARM64G_CC_OP_NUMBER
;
1915 /**/ if ( isSBC
&& is64
) { cc_op
= ARM64G_CC_OP_SBC64
; }
1916 else if ( isSBC
&& !is64
) { cc_op
= ARM64G_CC_OP_SBC32
; }
1917 else if (!isSBC
&& is64
) { cc_op
= ARM64G_CC_OP_ADC64
; }
1918 else if (!isSBC
&& !is64
) { cc_op
= ARM64G_CC_OP_ADC32
; }
1919 else { vassert(0); }
1920 setFlags_D1_D2_ND(cc_op
, argL64
, argR64
, oldC64
);
1923 /* Build IR to set the flags thunk after ADD or SUB, if the given
1924 condition evaluates to True at run time. If not, the flags are set
1925 to the specified NZCV value. */
1927 void setFlags_ADD_SUB_conditionally (
1928 Bool is64
, Bool isSUB
,
1929 IRTemp cond
, IRTemp argL
, IRTemp argR
, UInt nzcv
1932 /* Generate IR as follows:
1933 CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
1934 CC_DEP1 = ITE(cond, argL64, nzcv << 28)
1935 CC_DEP2 = ITE(cond, argR64, 0)
1939 IRTemp z64
= newTemp(Ity_I64
);
1940 assign(z64
, mkU64(0));
1942 /* Establish the operation and operands for the True case. */
1943 IRTemp t_dep1
= IRTemp_INVALID
;
1944 IRTemp t_dep2
= IRTemp_INVALID
;
1945 UInt t_op
= ARM64G_CC_OP_NUMBER
;
1946 /**/ if ( isSUB
&& is64
) { t_op
= ARM64G_CC_OP_SUB64
; }
1947 else if ( isSUB
&& !is64
) { t_op
= ARM64G_CC_OP_SUB32
; }
1948 else if (!isSUB
&& is64
) { t_op
= ARM64G_CC_OP_ADD64
; }
1949 else if (!isSUB
&& !is64
) { t_op
= ARM64G_CC_OP_ADD32
; }
1950 else { vassert(0); }
1956 t_dep1
= newTemp(Ity_I64
);
1957 t_dep2
= newTemp(Ity_I64
);
1958 assign(t_dep1
, unop(Iop_32Uto64
, mkexpr(argL
)));
1959 assign(t_dep2
, unop(Iop_32Uto64
, mkexpr(argR
)));
1962 /* Establish the operation and operands for the False case. */
1963 IRTemp f_dep1
= newTemp(Ity_I64
);
1964 IRTemp f_dep2
= z64
;
1965 UInt f_op
= ARM64G_CC_OP_COPY
;
1966 assign(f_dep1
, mkU64(nzcv
<< 28));
1968 /* Final thunk values */
1969 IRTemp dep1
= newTemp(Ity_I64
);
1970 IRTemp dep2
= newTemp(Ity_I64
);
1971 IRTemp op
= newTemp(Ity_I64
);
1973 assign(op
, IRExpr_ITE(mkexpr(cond
), mkU64(t_op
), mkU64(f_op
)));
1974 assign(dep1
, IRExpr_ITE(mkexpr(cond
), mkexpr(t_dep1
), mkexpr(f_dep1
)));
1975 assign(dep2
, IRExpr_ITE(mkexpr(cond
), mkexpr(t_dep2
), mkexpr(f_dep2
)));
1978 stmt( IRStmt_Put( OFFB_CC_OP
, mkexpr(op
) ));
1979 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(dep1
) ));
1980 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkexpr(dep2
) ));
1981 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkexpr(z64
) ));
1984 /* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
1986 void setFlags_LOGIC ( Bool is64
, IRTemp res
)
1988 IRTemp res64
= IRTemp_INVALID
;
1989 IRTemp z64
= newTemp(Ity_I64
);
1990 UInt cc_op
= ARM64G_CC_OP_NUMBER
;
1993 cc_op
= ARM64G_CC_OP_LOGIC64
;
1995 res64
= newTemp(Ity_I64
);
1996 assign(res64
, unop(Iop_32Uto64
, mkexpr(res
)));
1997 cc_op
= ARM64G_CC_OP_LOGIC32
;
1999 assign(z64
, mkU64(0));
2000 setFlags_D1_D2_ND(cc_op
, res64
, z64
, z64
);
2003 /* Build IR to set the flags thunk to a given NZCV value. NZCV is
2004 located in bits 31:28 of the supplied value. */
2006 void setFlags_COPY ( IRTemp nzcv_28x0
)
2008 IRTemp z64
= newTemp(Ity_I64
);
2009 assign(z64
, mkU64(0));
2010 setFlags_D1_D2_ND(ARM64G_CC_OP_COPY
, nzcv_28x0
, z64
, z64
);
2014 //ZZ /* Minor variant of the above that sets NDEP to zero (if it
2015 //ZZ sets it at all) */
2016 //ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
2018 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2020 //ZZ IRTemp z32 = newTemp(Ity_I32);
2021 //ZZ assign( z32, mkU32(0) );
2022 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
2026 //ZZ /* Minor variant of the above that sets DEP2 to zero (if it
2027 //ZZ sets it at all) */
2028 //ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
2030 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2032 //ZZ IRTemp z32 = newTemp(Ity_I32);
2033 //ZZ assign( z32, mkU32(0) );
2034 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
2038 //ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
2039 //ZZ sets them at all) */
2040 //ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
2041 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2043 //ZZ IRTemp z32 = newTemp(Ity_I32);
2044 //ZZ assign( z32, mkU32(0) );
2045 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
2049 /*------------------------------------------------------------*/
2050 /*--- Misc math helpers ---*/
2051 /*------------------------------------------------------------*/
2053 /* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
2054 static IRTemp
math_SWAPHELPER ( IRTemp x
, ULong mask
, Int sh
)
2056 IRTemp maskT
= newTemp(Ity_I64
);
2057 IRTemp res
= newTemp(Ity_I64
);
2058 vassert(sh
>= 1 && sh
<= 63);
2059 assign(maskT
, mkU64(mask
));
2063 binop(Iop_And64
,mkexpr(x
),mkexpr(maskT
)),
2066 binop(Iop_Shl64
,mkexpr(x
),mkU8(sh
)),
2073 /* Generates byte swaps within 32-bit lanes. */
2074 static IRTemp
math_UINTSWAP64 ( IRTemp src
)
2077 res
= math_SWAPHELPER(src
, 0xFF00FF00FF00FF00ULL
, 8);
2078 res
= math_SWAPHELPER(res
, 0xFFFF0000FFFF0000ULL
, 16);
2082 /* Generates byte swaps within 16-bit lanes. */
2083 static IRTemp
math_USHORTSWAP64 ( IRTemp src
)
2086 res
= math_SWAPHELPER(src
, 0xFF00FF00FF00FF00ULL
, 8);
2090 /* Generates a 64-bit byte swap. */
2091 static IRTemp
math_BYTESWAP64 ( IRTemp src
)
2094 res
= math_SWAPHELPER(src
, 0xFF00FF00FF00FF00ULL
, 8);
2095 res
= math_SWAPHELPER(res
, 0xFFFF0000FFFF0000ULL
, 16);
2096 res
= math_SWAPHELPER(res
, 0xFFFFFFFF00000000ULL
, 32);
2100 /* Generates a 64-bit bit swap. */
2101 static IRTemp
math_BITSWAP64 ( IRTemp src
)
2104 res
= math_SWAPHELPER(src
, 0xAAAAAAAAAAAAAAAAULL
, 1);
2105 res
= math_SWAPHELPER(res
, 0xCCCCCCCCCCCCCCCCULL
, 2);
2106 res
= math_SWAPHELPER(res
, 0xF0F0F0F0F0F0F0F0ULL
, 4);
2107 return math_BYTESWAP64(res
);
2110 /* Duplicates the bits at the bottom of the given word to fill the
2111 whole word. src :: Ity_I64 is assumed to have zeroes everywhere
2112 except for the bottom bits. */
2113 static IRTemp
math_DUP_TO_64 ( IRTemp src
, IRType srcTy
)
2115 if (srcTy
== Ity_I8
) {
2116 IRTemp t16
= newTemp(Ity_I64
);
2117 assign(t16
, binop(Iop_Or64
, mkexpr(src
),
2118 binop(Iop_Shl64
, mkexpr(src
), mkU8(8))));
2119 IRTemp t32
= newTemp(Ity_I64
);
2120 assign(t32
, binop(Iop_Or64
, mkexpr(t16
),
2121 binop(Iop_Shl64
, mkexpr(t16
), mkU8(16))));
2122 IRTemp t64
= newTemp(Ity_I64
);
2123 assign(t64
, binop(Iop_Or64
, mkexpr(t32
),
2124 binop(Iop_Shl64
, mkexpr(t32
), mkU8(32))));
2127 if (srcTy
== Ity_I16
) {
2128 IRTemp t32
= newTemp(Ity_I64
);
2129 assign(t32
, binop(Iop_Or64
, mkexpr(src
),
2130 binop(Iop_Shl64
, mkexpr(src
), mkU8(16))));
2131 IRTemp t64
= newTemp(Ity_I64
);
2132 assign(t64
, binop(Iop_Or64
, mkexpr(t32
),
2133 binop(Iop_Shl64
, mkexpr(t32
), mkU8(32))));
2136 if (srcTy
== Ity_I32
) {
2137 IRTemp t64
= newTemp(Ity_I64
);
2138 assign(t64
, binop(Iop_Or64
, mkexpr(src
),
2139 binop(Iop_Shl64
, mkexpr(src
), mkU8(32))));
2142 if (srcTy
== Ity_I64
) {
2149 /* Duplicates the src element exactly so as to fill a V128 value. */
2150 static IRTemp
math_DUP_TO_V128 ( IRTemp src
, IRType srcTy
)
2152 IRTemp res
= newTempV128();
2153 if (srcTy
== Ity_F64
) {
2154 IRTemp i64
= newTemp(Ity_I64
);
2155 assign(i64
, unop(Iop_ReinterpF64asI64
, mkexpr(src
)));
2156 assign(res
, binop(Iop_64HLtoV128
, mkexpr(i64
), mkexpr(i64
)));
2159 if (srcTy
== Ity_F32
) {
2160 IRTemp i64a
= newTemp(Ity_I64
);
2161 assign(i64a
, unop(Iop_32Uto64
, unop(Iop_ReinterpF32asI32
, mkexpr(src
))));
2162 IRTemp i64b
= newTemp(Ity_I64
);
2163 assign(i64b
, binop(Iop_Or64
, binop(Iop_Shl64
, mkexpr(i64a
), mkU8(32)),
2165 assign(res
, binop(Iop_64HLtoV128
, mkexpr(i64b
), mkexpr(i64b
)));
2168 if (srcTy
== Ity_I64
) {
2169 assign(res
, binop(Iop_64HLtoV128
, mkexpr(src
), mkexpr(src
)));
2172 if (srcTy
== Ity_I32
|| srcTy
== Ity_I16
|| srcTy
== Ity_I8
) {
2173 IRTemp t1
= newTemp(Ity_I64
);
2174 assign(t1
, widenUto64(srcTy
, mkexpr(src
)));
2175 IRTemp t2
= math_DUP_TO_64(t1
, srcTy
);
2176 assign(res
, binop(Iop_64HLtoV128
, mkexpr(t2
), mkexpr(t2
)));
2183 /* |fullWidth| is a full V128 width result. Depending on bitQ,
2184 zero out the upper half. */
2185 static IRExpr
* math_MAYBE_ZERO_HI64 ( UInt bitQ
, IRTemp fullWidth
)
2187 if (bitQ
== 1) return mkexpr(fullWidth
);
2188 if (bitQ
== 0) return unop(Iop_ZeroHI64ofV128
, mkexpr(fullWidth
));
2192 /* The same, but from an expression instead. */
2193 static IRExpr
* math_MAYBE_ZERO_HI64_fromE ( UInt bitQ
, IRExpr
* fullWidth
)
2195 IRTemp fullWidthT
= newTempV128();
2196 assign(fullWidthT
, fullWidth
);
2197 return math_MAYBE_ZERO_HI64(bitQ
, fullWidthT
);
2201 /*------------------------------------------------------------*/
2202 /*--- FP comparison helpers ---*/
2203 /*------------------------------------------------------------*/
2205 /* irRes :: Ity_I32 holds a floating point comparison result encoded
2206 as an IRCmpF64Result. Generate code to convert it to an
2207 ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
2208 Assign a new temp to hold that value, and return the temp. */
2210 IRTemp
mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32
)
2212 IRTemp ix
= newTemp(Ity_I64
);
2213 IRTemp termL
= newTemp(Ity_I64
);
2214 IRTemp termR
= newTemp(Ity_I64
);
2215 IRTemp nzcv
= newTemp(Ity_I64
);
2216 IRTemp irRes
= newTemp(Ity_I64
);
2218 /* This is where the fun starts. We have to convert 'irRes' from
2219 an IR-convention return result (IRCmpF64Result) to an
2220 ARM-encoded (N,Z,C,V) group. The final result is in the bottom
2221 4 bits of 'nzcv'. */
2222 /* Map compare result from IR to ARM(nzcv) */
2224 FP cmp result | IR | ARM(nzcv)
2225 --------------------------------
2231 /* Now since you're probably wondering WTF ..
2233 ix fishes the useful bits out of the IR value, bits 6 and 0, and
2234 places them side by side, giving a number which is 0, 1, 2 or 3.
2236 termL is a sequence cooked up by GNU superopt. It converts ix
2237 into an almost correct value NZCV value (incredibly), except
2238 for the case of UN, where it produces 0100 instead of the
2241 termR is therefore a correction term, also computed from ix. It
2242 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get
2243 the final correct value, we subtract termR from termL.
2245 Don't take my word for it. There's a test program at the bottom
2246 of guest_arm_toIR.c, to try this out with.
2248 assign(irRes
, unop(Iop_32Uto64
, mkexpr(irRes32
)));
2254 binop(Iop_Shr64
, mkexpr(irRes
), mkU8(5)),
2256 binop(Iop_And64
, mkexpr(irRes
), mkU64(1))));
2264 binop(Iop_Xor64
, mkexpr(ix
), mkU64(1)),
2275 binop(Iop_Shr64
, mkexpr(ix
), mkU8(1))),
2278 assign(nzcv
, binop(Iop_Sub64
, mkexpr(termL
), mkexpr(termR
)));
2283 /*------------------------------------------------------------*/
2284 /*--- Data processing (immediate) ---*/
2285 /*------------------------------------------------------------*/
2287 /* Helper functions for supporting "DecodeBitMasks" */
2289 static ULong
dbm_ROR ( Int width
, ULong x
, Int rot
)
2291 vassert(width
> 0 && width
<= 64);
2292 vassert(rot
>= 0 && rot
< width
);
2293 if (rot
== 0) return x
;
2294 ULong res
= x
>> rot
;
2295 res
|= (x
<< (width
- rot
));
2297 res
&= ((1ULL << width
) - 1);
2301 static ULong
dbm_RepTo64( Int esize
, ULong x
)
2307 x
&= 0xFFFFFFFF; x
|= (x
<< 32);
2310 x
&= 0xFFFF; x
|= (x
<< 16); x
|= (x
<< 32);
2313 x
&= 0xFF; x
|= (x
<< 8); x
|= (x
<< 16); x
|= (x
<< 32);
2316 x
&= 0xF; x
|= (x
<< 4); x
|= (x
<< 8);
2317 x
|= (x
<< 16); x
|= (x
<< 32);
2320 x
&= 0x3; x
|= (x
<< 2); x
|= (x
<< 4); x
|= (x
<< 8);
2321 x
|= (x
<< 16); x
|= (x
<< 32);
2326 vpanic("dbm_RepTo64");
2331 static Int
dbm_highestSetBit ( ULong x
)
2334 for (i
= 63; i
>= 0; i
--) {
2335 if (x
& (1ULL << i
))
2343 Bool
dbm_DecodeBitMasks ( /*OUT*/ULong
* wmask
, /*OUT*/ULong
* tmask
,
2344 ULong immN
, ULong imms
, ULong immr
, Bool immediate
,
2345 UInt M
/*32 or 64*/)
2347 vassert(immN
< (1ULL << 1));
2348 vassert(imms
< (1ULL << 6));
2349 vassert(immr
< (1ULL << 6));
2350 vassert(immediate
== False
|| immediate
== True
);
2351 vassert(M
== 32 || M
== 64);
2353 Int len
= dbm_highestSetBit( ((immN
<< 6) & 64) | ((~imms
) & 63) );
2354 if (len
< 1) { /* printf("fail1\n"); */ return False
; }
2356 vassert(M
>= (1 << len
));
2358 vassert(len
>= 1 && len
<= 6);
2359 ULong levels
= // (zeroes(6 - len) << (6-len)) | ones(len);
2361 vassert(levels
>= 1 && levels
<= 63);
2363 if (immediate
&& ((imms
& levels
) == levels
)) {
2364 /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
2368 ULong S
= imms
& levels
;
2369 ULong R
= immr
& levels
;
2372 Int esize
= 1 << len
;
2373 vassert(2 <= esize
&& esize
<= 64);
2375 /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
2376 same below with d. S can be 63 in which case we have an out of
2377 range and hence undefined shift. */
2378 vassert(S
>= 0 && S
<= 63);
2379 vassert(esize
>= (S
+1));
2380 ULong elem_s
= // Zeroes(esize-(S+1)):Ones(S+1)
2381 //(1ULL << (S+1)) - 1;
2382 ((1ULL << S
) - 1) + (1ULL << S
);
2384 Int d
= // diff<len-1:0>
2385 diff
& ((1 << len
)-1);
2386 vassert(esize
>= (d
+1));
2387 vassert(d
>= 0 && d
<= 63);
2389 ULong elem_d
= // Zeroes(esize-(d+1)):Ones(d+1)
2390 //(1ULL << (d+1)) - 1;
2391 ((1ULL << d
) - 1) + (1ULL << d
);
2393 if (esize
!= 64) vassert(elem_s
< (1ULL << esize
));
2394 if (esize
!= 64) vassert(elem_d
< (1ULL << esize
));
2396 if (wmask
) *wmask
= dbm_RepTo64(esize
, dbm_ROR(esize
, elem_s
, R
));
2397 if (tmask
) *tmask
= dbm_RepTo64(esize
, elem_d
);
2404 Bool
dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult
* dres
,
2405 UInt insn
, Bool sigill_diag
)
2407 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2410 10000x PC-rel addressing
2411 10001x Add/subtract (immediate)
2412 100100 Logical (immediate)
2413 100101 Move Wide (immediate)
2418 /* ------------------ ADD/SUB{,S} imm12 ------------------ */
2419 if (INSN(28,24) == BITS5(1,0,0,0,1)) {
2420 Bool is64
= INSN(31,31) == 1;
2421 Bool isSub
= INSN(30,30) == 1;
2422 Bool setCC
= INSN(29,29) == 1;
2423 UInt sh
= INSN(23,22);
2424 UInt uimm12
= INSN(21,10);
2425 UInt nn
= INSN(9,5);
2426 UInt dd
= INSN(4,0);
2427 const HChar
* nm
= isSub
? "sub" : "add";
2429 /* Invalid; fall through */
2432 uimm12
<<= (12 * sh
);
2434 IRTemp argL
= newTemp(Ity_I64
);
2435 IRTemp argR
= newTemp(Ity_I64
);
2436 IRTemp res
= newTemp(Ity_I64
);
2437 assign(argL
, getIReg64orSP(nn
));
2438 assign(argR
, mkU64(uimm12
));
2439 assign(res
, binop(isSub
? Iop_Sub64
: Iop_Add64
,
2440 mkexpr(argL
), mkexpr(argR
)));
2442 putIReg64orZR(dd
, mkexpr(res
));
2443 setFlags_ADD_SUB(True
/*is64*/, isSub
, argL
, argR
);
2444 DIP("%ss %s, %s, 0x%x\n",
2445 nm
, nameIReg64orZR(dd
), nameIReg64orSP(nn
), uimm12
);
2447 putIReg64orSP(dd
, mkexpr(res
));
2448 DIP("%s %s, %s, 0x%x\n",
2449 nm
, nameIReg64orSP(dd
), nameIReg64orSP(nn
), uimm12
);
2452 IRTemp argL
= newTemp(Ity_I32
);
2453 IRTemp argR
= newTemp(Ity_I32
);
2454 IRTemp res
= newTemp(Ity_I32
);
2455 assign(argL
, getIReg32orSP(nn
));
2456 assign(argR
, mkU32(uimm12
));
2457 assign(res
, binop(isSub
? Iop_Sub32
: Iop_Add32
,
2458 mkexpr(argL
), mkexpr(argR
)));
2460 putIReg32orZR(dd
, mkexpr(res
));
2461 setFlags_ADD_SUB(False
/*!is64*/, isSub
, argL
, argR
);
2462 DIP("%ss %s, %s, 0x%x\n",
2463 nm
, nameIReg32orZR(dd
), nameIReg32orSP(nn
), uimm12
);
2465 putIReg32orSP(dd
, mkexpr(res
));
2466 DIP("%s %s, %s, 0x%x\n",
2467 nm
, nameIReg32orSP(dd
), nameIReg32orSP(nn
), uimm12
);
2474 /* -------------------- ADR/ADRP -------------------- */
2475 if (INSN(28,24) == BITS5(1,0,0,0,0)) {
2476 UInt bP
= INSN(31,31);
2477 UInt immLo
= INSN(30,29);
2478 UInt immHi
= INSN(23,5);
2479 UInt rD
= INSN(4,0);
2480 ULong uimm
= (immHi
<< 2) | immLo
;
2481 ULong simm
= sx_to_64(uimm
, 21);
2484 val
= (guest_PC_curr_instr
& 0xFFFFFFFFFFFFF000ULL
) + (simm
<< 12);
2486 val
= guest_PC_curr_instr
+ simm
;
2488 putIReg64orZR(rD
, mkU64(val
));
2489 DIP("adr%s %s, 0x%llx\n", bP
? "p" : "", nameIReg64orZR(rD
), val
);
2493 /* -------------------- LOGIC(imm) -------------------- */
2494 if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
2495 /* 31 30 28 22 21 15 9 4
2496 sf op 100100 N immr imms Rn Rd
2497 op=00: AND Rd|SP, Rn, #imm
2498 op=01: ORR Rd|SP, Rn, #imm
2499 op=10: EOR Rd|SP, Rn, #imm
2500 op=11: ANDS Rd|ZR, Rn, #imm
2502 Bool is64
= INSN(31,31) == 1;
2503 UInt op
= INSN(30,29);
2504 UInt N
= INSN(22,22);
2505 UInt immR
= INSN(21,16);
2506 UInt immS
= INSN(15,10);
2507 UInt nn
= INSN(9,5);
2508 UInt dd
= INSN(4,0);
2511 if (N
== 1 && !is64
)
2512 goto after_logic_imm
; /* not allowed; fall through */
2513 ok
= dbm_DecodeBitMasks(&imm
, NULL
,
2514 N
, immS
, immR
, True
, is64
? 64 : 32);
2516 goto after_logic_imm
;
2518 const HChar
* names
[4] = { "and", "orr", "eor", "ands" };
2519 const IROp ops64
[4] = { Iop_And64
, Iop_Or64
, Iop_Xor64
, Iop_And64
};
2520 const IROp ops32
[4] = { Iop_And32
, Iop_Or32
, Iop_Xor32
, Iop_And32
};
2524 IRExpr
* argL
= getIReg64orZR(nn
);
2525 IRExpr
* argR
= mkU64(imm
);
2526 IRTemp res
= newTemp(Ity_I64
);
2527 assign(res
, binop(ops64
[op
], argL
, argR
));
2529 putIReg64orSP(dd
, mkexpr(res
));
2530 DIP("%s %s, %s, 0x%llx\n", names
[op
],
2531 nameIReg64orSP(dd
), nameIReg64orZR(nn
), imm
);
2533 putIReg64orZR(dd
, mkexpr(res
));
2534 setFlags_LOGIC(True
/*is64*/, res
);
2535 DIP("%s %s, %s, 0x%llx\n", names
[op
],
2536 nameIReg64orZR(dd
), nameIReg64orZR(nn
), imm
);
2539 IRExpr
* argL
= getIReg32orZR(nn
);
2540 IRExpr
* argR
= mkU32((UInt
)imm
);
2541 IRTemp res
= newTemp(Ity_I32
);
2542 assign(res
, binop(ops32
[op
], argL
, argR
));
2544 putIReg32orSP(dd
, mkexpr(res
));
2545 DIP("%s %s, %s, 0x%x\n", names
[op
],
2546 nameIReg32orSP(dd
), nameIReg32orZR(nn
), (UInt
)imm
);
2548 putIReg32orZR(dd
, mkexpr(res
));
2549 setFlags_LOGIC(False
/*!is64*/, res
);
2550 DIP("%s %s, %s, 0x%x\n", names
[op
],
2551 nameIReg32orZR(dd
), nameIReg32orZR(nn
), (UInt
)imm
);
2558 /* -------------------- MOV{Z,N,K} -------------------- */
2559 if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
2562 sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw))
2563 sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw))
2564 sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw))
2566 Bool is64
= INSN(31,31) == 1;
2567 UInt subopc
= INSN(30,29);
2568 UInt hw
= INSN(22,21);
2569 UInt imm16
= INSN(20,5);
2570 UInt dd
= INSN(4,0);
2571 if (subopc
== BITS2(0,1) || (!is64
&& hw
>= 2)) {
2572 /* invalid; fall through */
2574 ULong imm64
= ((ULong
)imm16
) << (16 * hw
);
2576 vassert(imm64
< 0x100000000ULL
);
2578 case BITS2(1,0): // MOVZ
2579 putIRegOrZR(is64
, dd
, is64
? mkU64(imm64
) : mkU32((UInt
)imm64
));
2580 DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64
, dd
), imm64
);
2582 case BITS2(0,0): // MOVN
2585 imm64
&= 0xFFFFFFFFULL
;
2586 putIRegOrZR(is64
, dd
, is64
? mkU64(imm64
) : mkU32((UInt
)imm64
));
2587 DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64
, dd
), imm64
);
2589 case BITS2(1,1): // MOVK
2590 /* This is more complex. We are inserting a slice into
2591 the destination register, so we need to have the old
2594 IRTemp old
= newTemp(Ity_I64
);
2595 assign(old
, getIReg64orZR(dd
));
2596 ULong mask
= 0xFFFFULL
<< (16 * hw
);
2599 binop(Iop_And64
, mkexpr(old
), mkU64(~mask
)),
2601 putIReg64orZR(dd
, res
);
2602 DIP("movk %s, 0x%x, lsl %u\n",
2603 nameIReg64orZR(dd
), imm16
, 16*hw
);
2605 IRTemp old
= newTemp(Ity_I32
);
2606 assign(old
, getIReg32orZR(dd
));
2608 UInt mask
= ((UInt
)0xFFFF) << (16 * hw
);
2611 binop(Iop_And32
, mkexpr(old
), mkU32(~mask
)),
2612 mkU32((UInt
)imm64
));
2613 putIReg32orZR(dd
, res
);
2614 DIP("movk %s, 0x%x, lsl %u\n",
2615 nameIReg32orZR(dd
), imm16
, 16*hw
);
2625 /* -------------------- {U,S,}BFM -------------------- */
2626 /* 30 28 22 21 15 9 4
2628 sf 10 100110 N immr imms nn dd
2629 UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2630 UBFM Xd, Xn, #immr, #imms when sf=1, N=1
2632 sf 00 100110 N immr imms nn dd
2633 SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2634 SBFM Xd, Xn, #immr, #imms when sf=1, N=1
2636 sf 01 100110 N immr imms nn dd
2637 BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2638 BFM Xd, Xn, #immr, #imms when sf=1, N=1
2640 if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
2641 UInt sf
= INSN(31,31);
2642 UInt opc
= INSN(30,29);
2643 UInt N
= INSN(22,22);
2644 UInt immR
= INSN(21,16);
2645 UInt immS
= INSN(15,10);
2646 UInt nn
= INSN(9,5);
2647 UInt dd
= INSN(4,0);
2648 Bool inZero
= False
;
2649 Bool extend
= False
;
2650 const HChar
* nm
= "???";
2651 /* skip invalid combinations */
2654 inZero
= True
; extend
= True
; nm
= "sbfm"; break;
2656 inZero
= False
; extend
= False
; nm
= "bfm"; break;
2658 inZero
= True
; extend
= False
; nm
= "ubfm"; break;
2660 goto after_bfm
; /* invalid */
2664 if (sf
== 1 && N
!= 1) goto after_bfm
;
2665 if (sf
== 0 && (N
!= 0 || ((immR
>> 5) & 1) != 0
2666 || ((immS
>> 5) & 1) != 0)) goto after_bfm
;
2667 ULong wmask
= 0, tmask
= 0;
2668 Bool ok
= dbm_DecodeBitMasks(&wmask
, &tmask
,
2669 N
, immS
, immR
, False
, sf
== 1 ? 64 : 32);
2670 if (!ok
) goto after_bfm
; /* hmmm */
2672 Bool is64
= sf
== 1;
2673 IRType ty
= is64
? Ity_I64
: Ity_I32
;
2675 IRTemp dst
= newTemp(ty
);
2676 IRTemp src
= newTemp(ty
);
2677 IRTemp bot
= newTemp(ty
);
2678 IRTemp top
= newTemp(ty
);
2679 IRTemp res
= newTemp(ty
);
2680 assign(dst
, inZero
? mkU(ty
,0) : getIRegOrZR(is64
, dd
));
2681 assign(src
, getIRegOrZR(is64
, nn
));
2682 /* perform bitfield move on low bits */
2683 assign(bot
, binop(mkOR(ty
),
2684 binop(mkAND(ty
), mkexpr(dst
), mkU(ty
, ~wmask
)),
2685 binop(mkAND(ty
), mkexpr(mathROR(ty
, src
, immR
)),
2687 /* determine extension bits (sign, zero or dest register) */
2688 assign(top
, mkexpr(extend
? mathREPLICATE(ty
, src
, immS
) : dst
));
2689 /* combine extension bits and result bits */
2690 assign(res
, binop(mkOR(ty
),
2691 binop(mkAND(ty
), mkexpr(top
), mkU(ty
, ~tmask
)),
2692 binop(mkAND(ty
), mkexpr(bot
), mkU(ty
, tmask
))));
2693 putIRegOrZR(is64
, dd
, mkexpr(res
));
2694 DIP("%s %s, %s, immR=%u, immS=%u\n",
2695 nm
, nameIRegOrZR(is64
, dd
), nameIRegOrZR(is64
, nn
), immR
, immS
);
2700 /* ---------------------- EXTR ---------------------- */
2701 /* 30 28 22 20 15 9 4
2702 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6
2703 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
2705 if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
2706 Bool is64
= INSN(31,31) == 1;
2707 UInt mm
= INSN(20,16);
2708 UInt imm6
= INSN(15,10);
2709 UInt nn
= INSN(9,5);
2710 UInt dd
= INSN(4,0);
2712 if (INSN(31,31) != INSN(22,22))
2714 if (!is64
&& imm6
>= 32)
2716 if (!valid
) goto after_extr
;
2717 IRType ty
= is64
? Ity_I64
: Ity_I32
;
2718 IRTemp srcHi
= newTemp(ty
);
2719 IRTemp srcLo
= newTemp(ty
);
2720 IRTemp res
= newTemp(ty
);
2721 assign(srcHi
, getIRegOrZR(is64
, nn
));
2722 assign(srcLo
, getIRegOrZR(is64
, mm
));
2724 assign(res
, mkexpr(srcLo
));
2726 UInt szBits
= 8 * sizeofIRType(ty
);
2727 vassert(imm6
> 0 && imm6
< szBits
);
2728 assign(res
, binop(mkOR(ty
),
2729 binop(mkSHL(ty
), mkexpr(srcHi
), mkU8(szBits
-imm6
)),
2730 binop(mkSHR(ty
), mkexpr(srcLo
), mkU8(imm6
))));
2732 putIRegOrZR(is64
, dd
, mkexpr(res
));
2733 DIP("extr %s, %s, %s, #%u\n",
2734 nameIRegOrZR(is64
,dd
),
2735 nameIRegOrZR(is64
,nn
), nameIRegOrZR(is64
,mm
), imm6
);
2741 vex_printf("ARM64 front end: data_processing_immediate\n");
2748 /*------------------------------------------------------------*/
2749 /*--- Data processing (register) instructions ---*/
2750 /*------------------------------------------------------------*/
2752 static const HChar
* nameSH ( UInt sh
) {
2754 case 0: return "lsl";
2755 case 1: return "lsr";
2756 case 2: return "asr";
2757 case 3: return "ror";
2758 default: vassert(0);
2762 /* Generate IR to get a register value, possibly shifted by an
2763 immediate. Returns either a 32- or 64-bit temporary holding the
2764 result. After the shift, the value can optionally be NOT-ed
2767 sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be
2768 in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR
2769 isn't allowed, but it's the job of the caller to check that.
2771 static IRTemp
getShiftedIRegOrZR ( Bool is64
,
2772 UInt sh_how
, UInt sh_amt
, UInt regNo
,
2775 vassert(sh_how
< 4);
2776 vassert(sh_amt
< (is64
? 64 : 32));
2777 IRType ty
= is64
? Ity_I64
: Ity_I32
;
2778 IRTemp t0
= newTemp(ty
);
2779 assign(t0
, getIRegOrZR(is64
, regNo
));
2780 IRTemp t1
= newTemp(ty
);
2783 assign(t1
, binop(mkSHL(ty
), mkexpr(t0
), mkU8(sh_amt
)));
2786 assign(t1
, binop(mkSHR(ty
), mkexpr(t0
), mkU8(sh_amt
)));
2789 assign(t1
, binop(mkSAR(ty
), mkexpr(t0
), mkU8(sh_amt
)));
2792 assign(t1
, mkexpr(mathROR(ty
, t0
, sh_amt
)));
2798 IRTemp t2
= newTemp(ty
);
2799 assign(t2
, unop(mkNOT(ty
), mkexpr(t1
)));
2808 Bool
dis_ARM64_data_processing_register(/*MB_OUT*/DisResult
* dres
,
2809 UInt insn
, Bool sigill_diag
)
2811 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2813 /* ------------------- ADD/SUB(reg) ------------------- */
2814 /* x==0 => 32 bit op x==1 => 64 bit op
2815 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
2817 31 30 29 28 23 21 20 15 9 4
2819 x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6)
2820 x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6)
2821 x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6)
2822 x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6)
2824 if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
2825 UInt bX
= INSN(31,31);
2826 UInt bOP
= INSN(30,30); /* 0: ADD, 1: SUB */
2827 UInt bS
= INSN(29, 29); /* set flags? */
2828 UInt sh
= INSN(23,22);
2829 UInt rM
= INSN(20,16);
2830 UInt imm6
= INSN(15,10);
2831 UInt rN
= INSN(9,5);
2832 UInt rD
= INSN(4,0);
2833 Bool isSUB
= bOP
== 1;
2834 Bool is64
= bX
== 1;
2835 IRType ty
= is64
? Ity_I64
: Ity_I32
;
2836 if ((!is64
&& imm6
> 31) || sh
== BITS2(1,1)) {
2837 /* invalid; fall through */
2839 IRTemp argL
= newTemp(ty
);
2840 assign(argL
, getIRegOrZR(is64
, rN
));
2841 IRTemp argR
= getShiftedIRegOrZR(is64
, sh
, imm6
, rM
, False
);
2842 IROp op
= isSUB
? mkSUB(ty
) : mkADD(ty
);
2843 IRTemp res
= newTemp(ty
);
2844 assign(res
, binop(op
, mkexpr(argL
), mkexpr(argR
)));
2845 if (rD
!= 31) putIRegOrZR(is64
, rD
, mkexpr(res
));
2847 setFlags_ADD_SUB(is64
, isSUB
, argL
, argR
);
2849 DIP("%s%s %s, %s, %s, %s #%u\n",
2850 bOP
? "sub" : "add", bS
? "s" : "",
2851 nameIRegOrZR(is64
, rD
), nameIRegOrZR(is64
, rN
),
2852 nameIRegOrZR(is64
, rM
), nameSH(sh
), imm6
);
2857 /* ------------------- ADC/SBC(reg) ------------------- */
2858 /* x==0 => 32 bit op x==1 => 64 bit op
2860 31 30 29 28 23 21 20 15 9 4
2862 x 0 0 11010 00 0 Rm 000000 Rn Rd ADC Rd,Rn,Rm
2863 x 0 1 11010 00 0 Rm 000000 Rn Rd ADCS Rd,Rn,Rm
2864 x 1 0 11010 00 0 Rm 000000 Rn Rd SBC Rd,Rn,Rm
2865 x 1 1 11010 00 0 Rm 000000 Rn Rd SBCS Rd,Rn,Rm
2868 if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) {
2869 UInt bX
= INSN(31,31);
2870 UInt bOP
= INSN(30,30); /* 0: ADC, 1: SBC */
2871 UInt bS
= INSN(29,29); /* set flags */
2872 UInt rM
= INSN(20,16);
2873 UInt rN
= INSN(9,5);
2874 UInt rD
= INSN(4,0);
2876 Bool isSUB
= bOP
== 1;
2877 Bool is64
= bX
== 1;
2878 IRType ty
= is64
? Ity_I64
: Ity_I32
;
2880 IRTemp oldC
= newTemp(ty
);
2882 is64
? mk_arm64g_calculate_flag_c()
2883 : unop(Iop_64to32
, mk_arm64g_calculate_flag_c()) );
2885 IRTemp argL
= newTemp(ty
);
2886 assign(argL
, getIRegOrZR(is64
, rN
));
2887 IRTemp argR
= newTemp(ty
);
2888 assign(argR
, getIRegOrZR(is64
, rM
));
2890 IROp op
= isSUB
? mkSUB(ty
) : mkADD(ty
);
2891 IRTemp res
= newTemp(ty
);
2893 IRExpr
* one
= is64
? mkU64(1) : mkU32(1);
2894 IROp xorOp
= is64
? Iop_Xor64
: Iop_Xor32
;
2897 binop(op
, mkexpr(argL
), mkexpr(argR
)),
2898 binop(xorOp
, mkexpr(oldC
), one
)));
2902 binop(op
, mkexpr(argL
), mkexpr(argR
)),
2906 if (rD
!= 31) putIRegOrZR(is64
, rD
, mkexpr(res
));
2909 setFlags_ADC_SBC(is64
, isSUB
, argL
, argR
, oldC
);
2912 DIP("%s%s %s, %s, %s\n",
2913 bOP
? "sbc" : "adc", bS
? "s" : "",
2914 nameIRegOrZR(is64
, rD
), nameIRegOrZR(is64
, rN
),
2915 nameIRegOrZR(is64
, rM
));
2919 /* -------------------- LOGIC(reg) -------------------- */
2920 /* x==0 => 32 bit op x==1 => 64 bit op
2921 N==0 => inv? is no-op (no inversion)
2923 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
2925 31 30 28 23 21 20 15 9 4
2927 x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6))
2928 x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6))
2929 x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6))
2930 x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6))
2931 With N=1, the names are: BIC ORN EON BICS
2933 if (INSN(28,24) == BITS5(0,1,0,1,0)) {
2934 UInt bX
= INSN(31,31);
2935 UInt sh
= INSN(23,22);
2936 UInt bN
= INSN(21,21);
2937 UInt rM
= INSN(20,16);
2938 UInt imm6
= INSN(15,10);
2939 UInt rN
= INSN(9,5);
2940 UInt rD
= INSN(4,0);
2941 Bool is64
= bX
== 1;
2942 IRType ty
= is64
? Ity_I64
: Ity_I32
;
2943 if (!is64
&& imm6
> 31) {
2944 /* invalid; fall though */
2946 IRTemp argL
= newTemp(ty
);
2947 assign(argL
, getIRegOrZR(is64
, rN
));
2948 IRTemp argR
= getShiftedIRegOrZR(is64
, sh
, imm6
, rM
, bN
== 1);
2949 IROp op
= Iop_INVALID
;
2950 switch (INSN(30,29)) {
2951 case BITS2(0,0): case BITS2(1,1): op
= mkAND(ty
); break;
2952 case BITS2(0,1): op
= mkOR(ty
); break;
2953 case BITS2(1,0): op
= mkXOR(ty
); break;
2954 default: vassert(0);
2956 IRTemp res
= newTemp(ty
);
2957 assign(res
, binop(op
, mkexpr(argL
), mkexpr(argR
)));
2958 if (INSN(30,29) == BITS2(1,1)) {
2959 setFlags_LOGIC(is64
, res
);
2961 putIRegOrZR(is64
, rD
, mkexpr(res
));
2963 static const HChar
* names_op
[8]
2964 = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
2965 vassert(((bN
<< 2) | INSN(30,29)) < 8);
2966 const HChar
* nm_op
= names_op
[(bN
<< 2) | INSN(30,29)];
2967 /* Special-case the printing of "MOV" */
2968 if (rN
== 31/*zr*/ && sh
== 0/*LSL*/ && imm6
== 0 && bN
== 0) {
2969 DIP("mov %s, %s\n", nameIRegOrZR(is64
, rD
),
2970 nameIRegOrZR(is64
, rM
));
2972 DIP("%s %s, %s, %s, %s #%u\n", nm_op
,
2973 nameIRegOrZR(is64
, rD
), nameIRegOrZR(is64
, rN
),
2974 nameIRegOrZR(is64
, rM
), nameSH(sh
), imm6
);
2980 /* -------------------- {U,S}MULH -------------------- */
2981 /* 31 23 22 20 15 9 4
2982 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm
2983 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm
2985 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
2986 && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
2987 Bool isU
= INSN(23,23) == 1;
2988 UInt mm
= INSN(20,16);
2989 UInt nn
= INSN(9,5);
2990 UInt dd
= INSN(4,0);
2991 putIReg64orZR(dd
, unop(Iop_128HIto64
,
2992 binop(isU
? Iop_MullU64
: Iop_MullS64
,
2993 getIReg64orZR(nn
), getIReg64orZR(mm
))));
2994 DIP("%cmulh %s, %s, %s\n",
2996 nameIReg64orZR(dd
), nameIReg64orZR(nn
), nameIReg64orZR(mm
));
3000 /* -------------------- M{ADD,SUB} -------------------- */
3001 /* 31 30 20 15 14 9 4
3002 sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n
3003 sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n
3005 if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
3006 Bool is64
= INSN(31,31) == 1;
3007 UInt mm
= INSN(20,16);
3008 Bool isAdd
= INSN(15,15) == 0;
3009 UInt aa
= INSN(14,10);
3010 UInt nn
= INSN(9,5);
3011 UInt dd
= INSN(4,0);
3015 binop(isAdd
? Iop_Add64
: Iop_Sub64
,
3017 binop(Iop_Mul64
, getIReg64orZR(mm
), getIReg64orZR(nn
))));
3021 binop(isAdd
? Iop_Add32
: Iop_Sub32
,
3023 binop(Iop_Mul32
, getIReg32orZR(mm
), getIReg32orZR(nn
))));
3025 DIP("%s %s, %s, %s, %s\n",
3026 isAdd
? "madd" : "msub",
3027 nameIRegOrZR(is64
, dd
), nameIRegOrZR(is64
, nn
),
3028 nameIRegOrZR(is64
, mm
), nameIRegOrZR(is64
, aa
));
3032 /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
3033 /* 31 30 28 20 15 11 9 4
3034 sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm
3035 sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm
3036 sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm
3037 sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm
3038 In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
3040 if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
3041 Bool is64
= INSN(31,31) == 1;
3042 UInt b30
= INSN(30,30);
3043 UInt mm
= INSN(20,16);
3044 UInt cond
= INSN(15,12);
3045 UInt b10
= INSN(10,10);
3046 UInt nn
= INSN(9,5);
3047 UInt dd
= INSN(4,0);
3048 UInt op
= (b30
<< 1) | b10
; /* 00=id 01=inc 10=inv 11=neg */
3049 IRType ty
= is64
? Ity_I64
: Ity_I32
;
3050 IRExpr
* argL
= getIRegOrZR(is64
, nn
);
3051 IRExpr
* argR
= getIRegOrZR(is64
, mm
);
3056 argR
= binop(mkADD(ty
), argR
, mkU(ty
,1));
3059 argR
= unop(mkNOT(ty
), argR
);
3062 argR
= binop(mkSUB(ty
), mkU(ty
,0), argR
);
3069 IRExpr_ITE(unop(Iop_64to1
, mk_arm64g_calculate_condition(cond
)),
3072 const HChar
* op_nm
[4] = { "csel", "csinc", "csinv", "csneg" };
3073 DIP("%s %s, %s, %s, %s\n", op_nm
[op
],
3074 nameIRegOrZR(is64
, dd
), nameIRegOrZR(is64
, nn
),
3075 nameIRegOrZR(is64
, mm
), nameCC(cond
));
3079 /* -------------- ADD/SUB(extended reg) -------------- */
3081 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld
3082 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld
3084 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld
3085 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld
3087 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld
3088 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld
3090 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld
3091 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld
3093 The 'm' operand is extended per opt, thusly:
3096 001 Xm & 0xFFFF UXTH
3097 010 Xm & (2^32)-1 UXTW
3100 100 Xm sx from bit 7 SXTB
3101 101 Xm sx from bit 15 SXTH
3102 110 Xm sx from bit 31 SXTW
3105 In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
3106 operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX
3107 are the identity operation on Wm.
3109 After extension, the value is shifted left by imm3 bits, which
3110 may only be in the range 0 .. 4 inclusive.
3112 if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
3113 Bool is64
= INSN(31,31) == 1;
3114 Bool isSub
= INSN(30,30) == 1;
3115 Bool setCC
= INSN(29,29) == 1;
3116 UInt mm
= INSN(20,16);
3117 UInt opt
= INSN(15,13);
3118 UInt imm3
= INSN(12,10);
3119 UInt nn
= INSN(9,5);
3120 UInt dd
= INSN(4,0);
3121 const HChar
* nameExt
[8] = { "uxtb", "uxth", "uxtw", "uxtx",
3122 "sxtb", "sxth", "sxtw", "sxtx" };
3123 /* Do almost the same thing in the 32- and 64-bit cases. */
3124 IRTemp xN
= newTemp(Ity_I64
);
3125 IRTemp xM
= newTemp(Ity_I64
);
3126 assign(xN
, getIReg64orSP(nn
));
3127 assign(xM
, getIReg64orZR(mm
));
3128 IRExpr
* xMw
= mkexpr(xM
); /* "xM widened" */
3132 case BITS3(0,0,0): // UXTB
3133 xMw
= binop(Iop_And64
, xMw
, mkU64(0xFF)); break;
3134 case BITS3(0,0,1): // UXTH
3135 xMw
= binop(Iop_And64
, xMw
, mkU64(0xFFFF)); break;
3136 case BITS3(0,1,0): // UXTW -- noop for the 32bit case
3138 xMw
= unop(Iop_32Uto64
, unop(Iop_64to32
, xMw
));
3141 case BITS3(0,1,1): // UXTX -- always a noop
3143 case BITS3(1,0,0): // SXTB
3144 shSX
= 56; goto sxTo64
;
3145 case BITS3(1,0,1): // SXTH
3146 shSX
= 48; goto sxTo64
;
3147 case BITS3(1,1,0): // SXTW -- noop for the 32bit case
3149 shSX
= 32; goto sxTo64
;
3152 case BITS3(1,1,1): // SXTX -- always a noop
3155 vassert(shSX
>= 32);
3156 xMw
= binop(Iop_Sar64
, binop(Iop_Shl64
, xMw
, mkU8(shSX
)),
3164 IRTemp argR
= newTemp(Ity_I64
);
3165 assign(argR
, binop(Iop_Shl64
, xMw
, mkU8(imm3
)));
3166 IRTemp res
= newTemp(Ity_I64
);
3167 assign(res
, binop(isSub
? Iop_Sub64
: Iop_Add64
,
3168 mkexpr(argL
), mkexpr(argR
)));
3171 putIReg64orZR(dd
, mkexpr(res
));
3172 setFlags_ADD_SUB(True
/*is64*/, isSub
, argL
, argR
);
3174 putIReg64orSP(dd
, mkexpr(res
));
3178 IRTemp argL32
= newTemp(Ity_I32
);
3179 IRTemp argR32
= newTemp(Ity_I32
);
3180 putIReg32orZR(dd
, unop(Iop_64to32
, mkexpr(res
)));
3181 assign(argL32
, unop(Iop_64to32
, mkexpr(argL
)));
3182 assign(argR32
, unop(Iop_64to32
, mkexpr(argR
)));
3183 setFlags_ADD_SUB(False
/*!is64*/, isSub
, argL32
, argR32
);
3185 putIReg32orSP(dd
, unop(Iop_64to32
, mkexpr(res
)));
3188 DIP("%s%s %s, %s, %s %s lsl %u\n",
3189 isSub
? "sub" : "add", setCC
? "s" : "",
3190 setCC
? nameIRegOrZR(is64
, dd
) : nameIRegOrSP(is64
, dd
),
3191 nameIRegOrSP(is64
, nn
), nameIRegOrSP(is64
, mm
),
3192 nameExt
[opt
], imm3
);
3196 /* ---------------- CCMP/CCMN(imm) ---------------- */
3197 /* Bizarrely, these appear in the "data processing register"
3198 category, even though they are operations against an
3200 /* 31 29 20 15 11 9 3
3201 sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond
3202 sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond
3205 (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
3206 (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
3208 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3209 && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
3210 Bool is64
= INSN(31,31) == 1;
3211 Bool isSUB
= INSN(30,30) == 1;
3212 UInt imm5
= INSN(20,16);
3213 UInt cond
= INSN(15,12);
3214 UInt nn
= INSN(9,5);
3215 UInt nzcv
= INSN(3,0);
3217 IRTemp condT
= newTemp(Ity_I1
);
3218 assign(condT
, unop(Iop_64to1
, mk_arm64g_calculate_condition(cond
)));
3220 IRType ty
= is64
? Ity_I64
: Ity_I32
;
3221 IRTemp argL
= newTemp(ty
);
3222 IRTemp argR
= newTemp(ty
);
3225 assign(argL
, getIReg64orZR(nn
));
3226 assign(argR
, mkU64(imm5
));
3228 assign(argL
, getIReg32orZR(nn
));
3229 assign(argR
, mkU32(imm5
));
3231 setFlags_ADD_SUB_conditionally(is64
, isSUB
, condT
, argL
, argR
, nzcv
);
3233 DIP("ccm%c %s, #%u, #%u, %s\n",
3234 isSUB
? 'p' : 'n', nameIRegOrZR(is64
, nn
),
3235 imm5
, nzcv
, nameCC(cond
));
3239 /* ---------------- CCMP/CCMN(reg) ---------------- */
3240 /* 31 29 20 15 11 9 3
3241 sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond
3242 sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond
3244 (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
3245 (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
3247 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3248 && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
3249 Bool is64
= INSN(31,31) == 1;
3250 Bool isSUB
= INSN(30,30) == 1;
3251 UInt mm
= INSN(20,16);
3252 UInt cond
= INSN(15,12);
3253 UInt nn
= INSN(9,5);
3254 UInt nzcv
= INSN(3,0);
3256 IRTemp condT
= newTemp(Ity_I1
);
3257 assign(condT
, unop(Iop_64to1
, mk_arm64g_calculate_condition(cond
)));
3259 IRType ty
= is64
? Ity_I64
: Ity_I32
;
3260 IRTemp argL
= newTemp(ty
);
3261 IRTemp argR
= newTemp(ty
);
3264 assign(argL
, getIReg64orZR(nn
));
3265 assign(argR
, getIReg64orZR(mm
));
3267 assign(argL
, getIReg32orZR(nn
));
3268 assign(argR
, getIReg32orZR(mm
));
3270 setFlags_ADD_SUB_conditionally(is64
, isSUB
, condT
, argL
, argR
, nzcv
);
3272 DIP("ccm%c %s, %s, #%u, %s\n",
3273 isSUB
? 'p' : 'n', nameIRegOrZR(is64
, nn
),
3274 nameIRegOrZR(is64
, mm
), nzcv
, nameCC(cond
));
3279 /* -------------- REV/REV16/REV32/RBIT -------------- */
3280 /* 31 30 28 20 15 11 9 4
3282 1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn
3283 0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn
3285 1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn
3286 0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn
3288 1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn
3289 0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn
3291 1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn
3293 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3294 && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
3295 UInt b31
= INSN(31,31);
3296 UInt opc
= INSN(11,10);
3299 /**/ if (b31
== 1 && opc
== BITS2(1,1)) ix
= 1;
3300 else if (b31
== 0 && opc
== BITS2(1,0)) ix
= 2;
3301 else if (b31
== 1 && opc
== BITS2(0,0)) ix
= 3;
3302 else if (b31
== 0 && opc
== BITS2(0,0)) ix
= 4;
3303 else if (b31
== 1 && opc
== BITS2(0,1)) ix
= 5;
3304 else if (b31
== 0 && opc
== BITS2(0,1)) ix
= 6;
3305 else if (b31
== 1 && opc
== BITS2(1,0)) ix
= 7;
3306 if (ix
>= 1 && ix
<= 7) {
3307 Bool is64
= ix
== 1 || ix
== 3 || ix
== 5 || ix
== 7;
3308 UInt nn
= INSN(9,5);
3309 UInt dd
= INSN(4,0);
3310 IRTemp src
= newTemp(Ity_I64
);
3311 IRTemp dst
= IRTemp_INVALID
;
3312 IRTemp (*math
)(IRTemp
) = NULL
;
3314 case 1: case 2: math
= math_BYTESWAP64
; break;
3315 case 3: case 4: math
= math_BITSWAP64
; break;
3316 case 5: case 6: math
= math_USHORTSWAP64
; break;
3317 case 7: math
= math_UINTSWAP64
; break;
3318 default: vassert(0);
3320 const HChar
* names
[7]
3321 = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
3322 const HChar
* nm
= names
[ix
-1];
3325 /* This has to be special cased, since the logic below doesn't
3326 handle it correctly. */
3327 assign(src
, getIReg64orZR(nn
));
3330 unop(Iop_32Uto64
, unop(Iop_64to32
, mkexpr(dst
))));
3332 assign(src
, getIReg64orZR(nn
));
3334 putIReg64orZR(dd
, mkexpr(dst
));
3336 assign(src
, binop(Iop_Shl64
, getIReg64orZR(nn
), mkU8(32)));
3338 putIReg32orZR(dd
, unop(Iop_64to32
, mkexpr(dst
)));
3340 DIP("%s %s, %s\n", nm
,
3341 nameIRegOrZR(is64
,dd
), nameIRegOrZR(is64
,nn
));
3344 /* else fall through */
3347 /* -------------------- CLZ/CLS -------------------- */
3348 /* 30 28 24 20 15 9 4
3349 sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn
3350 sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn
3352 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3353 && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
3354 Bool is64
= INSN(31,31) == 1;
3355 Bool isCLS
= INSN(10,10) == 1;
3356 UInt nn
= INSN(9,5);
3357 UInt dd
= INSN(4,0);
3358 IRTemp src
= newTemp(Ity_I64
);
3359 IRTemp srcZ
= newTemp(Ity_I64
);
3360 IRTemp dst
= newTemp(Ity_I64
);
3361 /* Get the argument, widened out to 64 bit */
3363 assign(src
, getIReg64orZR(nn
));
3365 assign(src
, binop(Iop_Shl64
,
3366 unop(Iop_32Uto64
, getIReg32orZR(nn
)), mkU8(32)));
3368 /* If this is CLS, mash the arg around accordingly */
3370 IRExpr
* one
= mkU8(1);
3373 binop(Iop_Shl64
, mkexpr(src
), one
),
3374 binop(Iop_Shl64
, binop(Iop_Shr64
, mkexpr(src
), one
), one
)));
3376 assign(srcZ
, mkexpr(src
));
3378 /* And compute CLZ. */
3380 assign(dst
, IRExpr_ITE(binop(Iop_CmpEQ64
, mkexpr(srcZ
), mkU64(0)),
3381 mkU64(isCLS
? 63 : 64),
3382 unop(Iop_Clz64
, mkexpr(srcZ
))));
3383 putIReg64orZR(dd
, mkexpr(dst
));
3385 assign(dst
, IRExpr_ITE(binop(Iop_CmpEQ64
, mkexpr(srcZ
), mkU64(0)),
3386 mkU64(isCLS
? 31 : 32),
3387 unop(Iop_Clz64
, mkexpr(srcZ
))));
3388 putIReg32orZR(dd
, unop(Iop_64to32
, mkexpr(dst
)));
3390 DIP("cl%c %s, %s\n", isCLS
? 's' : 'z',
3391 nameIRegOrZR(is64
, dd
), nameIRegOrZR(is64
, nn
));
3395 /* ------------------ LSLV/LSRV/ASRV/RORV ------------------ */
3396 /* 30 28 20 15 11 9 4
3397 sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm
3398 sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm
3399 sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm
3400 sf 00 1101 0110 m 0010 11 n d RORV Rd,Rn,Rm
3402 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3403 && INSN(15,12) == BITS4(0,0,1,0)) {
3404 Bool is64
= INSN(31,31) == 1;
3405 UInt mm
= INSN(20,16);
3406 UInt op
= INSN(11,10);
3407 UInt nn
= INSN(9,5);
3408 UInt dd
= INSN(4,0);
3409 IRType ty
= is64
? Ity_I64
: Ity_I32
;
3410 IRTemp srcL
= newTemp(ty
);
3411 IRTemp srcR
= newTemp(Ity_I64
);
3412 IRTemp res
= newTemp(ty
);
3413 IROp iop
= Iop_INVALID
;
3414 assign(srcL
, getIRegOrZR(is64
, nn
));
3415 assign(srcR
, binop(Iop_And64
, getIReg64orZR(mm
),
3416 mkU64(is64
? 63 : 31)));
3420 case BITS2(0,0): iop
= mkSHL(ty
); break;
3421 case BITS2(0,1): iop
= mkSHR(ty
); break;
3422 case BITS2(1,0): iop
= mkSAR(ty
); break;
3423 default: vassert(0);
3425 assign(res
, binop(iop
, mkexpr(srcL
),
3426 unop(Iop_64to8
, mkexpr(srcR
))));
3429 IROp opSHL
= mkSHL(ty
);
3430 IROp opSHR
= mkSHR(ty
);
3431 IROp opOR
= mkOR(ty
);
3432 IRExpr
* width
= mkU64(is64
? 64: 32);
3436 binop(Iop_CmpEQ64
, mkexpr(srcR
), mkU64(0)),
3441 unop(Iop_64to8
, binop(Iop_Sub64
, width
,
3444 mkexpr(srcL
), unop(Iop_64to8
, mkexpr(srcR
))))
3447 putIRegOrZR(is64
, dd
, mkexpr(res
));
3449 const HChar
* names
[4] = { "lslv", "lsrv", "asrv", "rorv" };
3450 DIP("%s %s, %s, %s\n",
3451 names
[op
], nameIRegOrZR(is64
,dd
),
3452 nameIRegOrZR(is64
,nn
), nameIRegOrZR(is64
,mm
));
3456 /* -------------------- SDIV/UDIV -------------------- */
3457 /* 30 28 20 15 10 9 4
3458 sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm
3459 sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm
3461 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3462 && INSN(15,11) == BITS5(0,0,0,0,1)) {
3463 Bool is64
= INSN(31,31) == 1;
3464 UInt mm
= INSN(20,16);
3465 Bool isS
= INSN(10,10) == 1;
3466 UInt nn
= INSN(9,5);
3467 UInt dd
= INSN(4,0);
3469 putIRegOrZR(is64
, dd
, binop(is64
? Iop_DivS64
: Iop_DivS32
,
3470 getIRegOrZR(is64
, nn
),
3471 getIRegOrZR(is64
, mm
)));
3473 putIRegOrZR(is64
, dd
, binop(is64
? Iop_DivU64
: Iop_DivU32
,
3474 getIRegOrZR(is64
, nn
),
3475 getIRegOrZR(is64
, mm
)));
3477 DIP("%cdiv %s, %s, %s\n", isS
? 's' : 'u',
3478 nameIRegOrZR(is64
, dd
),
3479 nameIRegOrZR(is64
, nn
), nameIRegOrZR(is64
, mm
));
3483 /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
3484 /* 31 23 20 15 14 9 4
3485 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa
3486 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa
3487 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa
3488 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa
3490 Xd = Xa +/- (Wn *u/s Wm)
3492 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
3493 Bool isU
= INSN(23,23) == 1;
3494 UInt mm
= INSN(20,16);
3495 Bool isAdd
= INSN(15,15) == 0;
3496 UInt aa
= INSN(14,10);
3497 UInt nn
= INSN(9,5);
3498 UInt dd
= INSN(4,0);
3499 IRTemp wN
= newTemp(Ity_I32
);
3500 IRTemp wM
= newTemp(Ity_I32
);
3501 IRTemp xA
= newTemp(Ity_I64
);
3502 IRTemp muld
= newTemp(Ity_I64
);
3503 IRTemp res
= newTemp(Ity_I64
);
3504 assign(wN
, getIReg32orZR(nn
));
3505 assign(wM
, getIReg32orZR(mm
));
3506 assign(xA
, getIReg64orZR(aa
));
3507 assign(muld
, binop(isU
? Iop_MullU32
: Iop_MullS32
,
3508 mkexpr(wN
), mkexpr(wM
)));
3509 assign(res
, binop(isAdd
? Iop_Add64
: Iop_Sub64
,
3510 mkexpr(xA
), mkexpr(muld
)));
3511 putIReg64orZR(dd
, mkexpr(res
));
3512 DIP("%cm%sl %s, %s, %s, %s\n", isU
? 'u' : 's', isAdd
? "add" : "sub",
3513 nameIReg64orZR(dd
), nameIReg32orZR(nn
),
3514 nameIReg32orZR(mm
), nameIReg64orZR(aa
));
3518 /* -------------------- CRC32/CRC32C -------------------- */
3519 /* 31 30 20 15 11 9 4
3520 sf 00 1101 0110 m 0100 sz n d CRC32<sz> Wd, Wn, Wm|Xm
3521 sf 00 1101 0110 m 0101 sz n d CRC32C<sz> Wd, Wn, Wm|Xm
3523 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3524 && INSN(15,13) == BITS3(0,1,0)) {
3525 UInt bitSF
= INSN(31,31);
3526 UInt mm
= INSN(20,16);
3527 UInt bitC
= INSN(12,12);
3528 UInt sz
= INSN(11,10);
3529 UInt nn
= INSN(9,5);
3530 UInt dd
= INSN(4,0);
3531 vassert(sz
>= 0 && sz
<= 3);
3532 if ((bitSF
== 0 && sz
<= BITS2(1,0))
3533 || (bitSF
== 1 && sz
== BITS2(1,1))) {
3534 UInt ix
= (bitC
== 1 ? 4 : 0) | sz
;
3536 = { &arm64g_calc_crc32b
, &arm64g_calc_crc32h
,
3537 &arm64g_calc_crc32w
, &arm64g_calc_crc32x
,
3538 &arm64g_calc_crc32cb
, &arm64g_calc_crc32ch
,
3539 &arm64g_calc_crc32cw
, &arm64g_calc_crc32cx
};
3540 const HChar
* hNames
[8]
3541 = { "arm64g_calc_crc32b", "arm64g_calc_crc32h",
3542 "arm64g_calc_crc32w", "arm64g_calc_crc32x",
3543 "arm64g_calc_crc32cb", "arm64g_calc_crc32ch",
3544 "arm64g_calc_crc32cw", "arm64g_calc_crc32cx" };
3545 const HChar
* iNames
[8]
3546 = { "crc32b", "crc32h", "crc32w", "crc32x",
3547 "crc32cb", "crc32ch", "crc32cw", "crc32cx" };
3549 IRTemp srcN
= newTemp(Ity_I64
);
3550 assign(srcN
, unop(Iop_32Uto64
, unop(Iop_64to32
, getIReg64orZR(nn
))));
3552 IRTemp srcM
= newTemp(Ity_I64
);
3553 IRExpr
* at64
= getIReg64orZR(mm
);
3556 assign(srcM
, binop(Iop_And64
, at64
, mkU64(0xFF))); break;
3558 assign(srcM
, binop(Iop_And64
, at64
, mkU64(0xFFFF))); break;
3560 assign(srcM
, binop(Iop_And64
, at64
, mkU64(0xFFFFFFFF))); break;
3562 assign(srcM
, at64
); break;
3567 vassert(ix
>= 0 && ix
<= 7);
3573 mkIRExprCCall(Ity_I64
, 0/*regparm*/,
3574 hNames
[ix
], helpers
[ix
],
3575 mkIRExprVec_2(mkexpr(srcN
),
3578 DIP("%s %s, %s, %s\n", iNames
[ix
],
3580 nameIReg32orZR(nn
), nameIRegOrZR(bitSF
== 1, mm
));
3587 vex_printf("ARM64 front end: data_processing_register\n");
3594 /*------------------------------------------------------------*/
3595 /*--- Math helpers for vector interleave/deinterleave ---*/
3596 /*------------------------------------------------------------*/
3600 #define SL(_hi128,_lo128,_nbytes) \
3603 : triop(Iop_SliceV128,(_hi128),(_lo128),mkU8(_nbytes)) )
3604 #define ROR(_v128,_nbytes) \
3605 SL((_v128),(_v128),(_nbytes))
3606 #define ROL(_v128,_nbytes) \
3607 SL((_v128),(_v128),16-(_nbytes))
3608 #define SHR(_v128,_nbytes) \
3609 binop(Iop_ShrV128,(_v128),mkU8(8*(_nbytes)))
3610 #define SHL(_v128,_nbytes) \
3611 binop(Iop_ShlV128,(_v128),mkU8(8*(_nbytes)))
3612 #define ILO64x2(_argL,_argR) \
3613 binop(Iop_InterleaveLO64x2,(_argL),(_argR))
3614 #define IHI64x2(_argL,_argR) \
3615 binop(Iop_InterleaveHI64x2,(_argL),(_argR))
3616 #define ILO32x4(_argL,_argR) \
3617 binop(Iop_InterleaveLO32x4,(_argL),(_argR))
3618 #define IHI32x4(_argL,_argR) \
3619 binop(Iop_InterleaveHI32x4,(_argL),(_argR))
3620 #define ILO16x8(_argL,_argR) \
3621 binop(Iop_InterleaveLO16x8,(_argL),(_argR))
3622 #define IHI16x8(_argL,_argR) \
3623 binop(Iop_InterleaveHI16x8,(_argL),(_argR))
3624 #define ILO8x16(_argL,_argR) \
3625 binop(Iop_InterleaveLO8x16,(_argL),(_argR))
3626 #define IHI8x16(_argL,_argR) \
3627 binop(Iop_InterleaveHI8x16,(_argL),(_argR))
3628 #define CEV32x4(_argL,_argR) \
3629 binop(Iop_CatEvenLanes32x4,(_argL),(_argR))
3630 #define COD32x4(_argL,_argR) \
3631 binop(Iop_CatOddLanes32x4,(_argL),(_argR))
3632 #define COD16x8(_argL,_argR) \
3633 binop(Iop_CatOddLanes16x8,(_argL),(_argR))
3634 #define COD8x16(_argL,_argR) \
3635 binop(Iop_CatOddLanes8x16,(_argL),(_argR))
3636 #define CEV8x16(_argL,_argR) \
3637 binop(Iop_CatEvenLanes8x16,(_argL),(_argR))
3638 #define AND(_arg1,_arg2) \
3639 binop(Iop_AndV128,(_arg1),(_arg2))
3640 #define OR2(_arg1,_arg2) \
3641 binop(Iop_OrV128,(_arg1),(_arg2))
3642 #define OR3(_arg1,_arg2,_arg3) \
3643 binop(Iop_OrV128,(_arg1),binop(Iop_OrV128,(_arg2),(_arg3)))
3644 #define OR4(_arg1,_arg2,_arg3,_arg4) \
3646 binop(Iop_OrV128,(_arg1),(_arg2)), \
3647 binop(Iop_OrV128,(_arg3),(_arg4)))
3650 /* Do interleaving for 1 128 bit vector, for ST1 insns. */
3652 void math_INTERLEAVE1_128( /*OUTx1*/ IRTemp
* i0
,
3653 UInt laneSzBlg2
, IRTemp u0
)
3655 assign(*i0
, mkexpr(u0
));
3659 /* Do interleaving for 2 128 bit vectors, for ST2 insns. */
3661 void math_INTERLEAVE2_128( /*OUTx2*/ IRTemp
* i0
, IRTemp
* i1
,
3662 UInt laneSzBlg2
, IRTemp u0
, IRTemp u1
)
3664 /* This is pretty easy, since we have primitives directly to
3666 if (laneSzBlg2
== 3) {
3668 // u1 == B1 B0, u0 == A1 A0
3669 // i1 == B1 A1, i0 == B0 A0
3670 assign(*i0
, binop(Iop_InterleaveLO64x2
, mkexpr(u1
), mkexpr(u0
)));
3671 assign(*i1
, binop(Iop_InterleaveHI64x2
, mkexpr(u1
), mkexpr(u0
)));
3674 if (laneSzBlg2
== 2) {
3676 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3677 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3678 assign(*i0
, binop(Iop_InterleaveLO32x4
, mkexpr(u1
), mkexpr(u0
)));
3679 assign(*i1
, binop(Iop_InterleaveHI32x4
, mkexpr(u1
), mkexpr(u0
)));
3682 if (laneSzBlg2
== 1) {
3684 // u1 == B{7..0}, u0 == A{7..0}
3685 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3686 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3687 assign(*i0
, binop(Iop_InterleaveLO16x8
, mkexpr(u1
), mkexpr(u0
)));
3688 assign(*i1
, binop(Iop_InterleaveHI16x8
, mkexpr(u1
), mkexpr(u0
)));
3691 if (laneSzBlg2
== 0) {
3693 // u1 == B{f..0}, u0 == A{f..0}
3694 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3695 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3696 assign(*i0
, binop(Iop_InterleaveLO8x16
, mkexpr(u1
), mkexpr(u0
)));
3697 assign(*i1
, binop(Iop_InterleaveHI8x16
, mkexpr(u1
), mkexpr(u0
)));
3705 /* Do interleaving for 3 128 bit vectors, for ST3 insns. */
3707 void math_INTERLEAVE3_128(
3708 /*OUTx3*/ IRTemp
* i0
, IRTemp
* i1
, IRTemp
* i2
,
3710 IRTemp u0
, IRTemp u1
, IRTemp u2
)
3712 if (laneSzBlg2
== 3) {
3714 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
3715 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
3716 assign(*i2
, IHI64x2( EX(u2
), EX(u1
) ));
3717 assign(*i1
, ILO64x2( ROR(EX(u0
),8), EX(u2
) ));
3718 assign(*i0
, ILO64x2( EX(u1
), EX(u0
) ));
3722 if (laneSzBlg2
== 2) {
3724 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
3725 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
3726 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
3727 IRTemp p0
= newTempV128();
3728 IRTemp p1
= newTempV128();
3729 IRTemp p2
= newTempV128();
3730 IRTemp c1100
= newTempV128();
3731 IRTemp c0011
= newTempV128();
3732 IRTemp c0110
= newTempV128();
3733 assign(c1100
, mkV128(0xFF00));
3734 assign(c0011
, mkV128(0x00FF));
3735 assign(c0110
, mkV128(0x0FF0));
3736 // First interleave them at 64x2 granularity,
3737 // generating partial ("p") values.
3738 math_INTERLEAVE3_128(&p0
, &p1
, &p2
, 3, u0
, u1
, u2
);
3739 // And more shuffling around for the final answer
3740 assign(*i2
, OR2( AND( IHI32x4(EX(p2
), ROL(EX(p2
),8)), EX(c1100
) ),
3741 AND( IHI32x4(ROR(EX(p1
),4), EX(p2
)), EX(c0011
) ) ));
3742 assign(*i1
, OR3( SHL(EX(p2
),12),
3743 AND(EX(p1
),EX(c0110
)),
3745 assign(*i0
, OR2( AND( ILO32x4(EX(p0
),ROL(EX(p1
),4)), EX(c1100
) ),
3746 AND( ILO32x4(ROR(EX(p0
),8),EX(p0
)), EX(c0011
) ) ));
3750 if (laneSzBlg2
== 1) {
3752 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
3753 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
3754 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
3756 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
3757 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
3758 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
3760 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
3761 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
3762 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
3763 IRTemp p0
= newTempV128();
3764 IRTemp p1
= newTempV128();
3765 IRTemp p2
= newTempV128();
3766 IRTemp c1000
= newTempV128();
3767 IRTemp c0100
= newTempV128();
3768 IRTemp c0010
= newTempV128();
3769 IRTemp c0001
= newTempV128();
3770 assign(c1000
, mkV128(0xF000));
3771 assign(c0100
, mkV128(0x0F00));
3772 assign(c0010
, mkV128(0x00F0));
3773 assign(c0001
, mkV128(0x000F));
3774 // First interleave them at 32x4 granularity,
3775 // generating partial ("p") values.
3776 math_INTERLEAVE3_128(&p0
, &p1
, &p2
, 2, u0
, u1
, u2
);
3777 // And more shuffling around for the final answer
3779 OR4( AND( IHI16x8( EX(p2
), ROL(EX(p2
),4) ), EX(c1000
) ),
3780 AND( IHI16x8( ROL(EX(p2
),6), EX(p2
) ), EX(c0100
) ),
3781 AND( IHI16x8( ROL(EX(p2
),2), ROL(EX(p2
),6) ), EX(c0010
) ),
3782 AND( ILO16x8( ROR(EX(p2
),2), ROL(EX(p1
),2) ), EX(c0001
) )
3785 OR4( AND( IHI16x8( ROL(EX(p1
),4), ROR(EX(p2
),2) ), EX(c1000
) ),
3786 AND( IHI16x8( EX(p1
), ROL(EX(p1
),4) ), EX(c0100
) ),
3787 AND( IHI16x8( ROL(EX(p1
),4), ROL(EX(p1
),8) ), EX(c0010
) ),
3788 AND( IHI16x8( ROR(EX(p0
),6), ROL(EX(p1
),4) ), EX(c0001
) )
3791 OR4( AND( IHI16x8( ROR(EX(p1
),2), ROL(EX(p0
),2) ), EX(c1000
) ),
3792 AND( IHI16x8( ROL(EX(p0
),2), ROL(EX(p0
),6) ), EX(c0100
) ),
3793 AND( IHI16x8( ROL(EX(p0
),8), ROL(EX(p0
),2) ), EX(c0010
) ),
3794 AND( IHI16x8( ROL(EX(p0
),4), ROL(EX(p0
),8) ), EX(c0001
) )
3799 if (laneSzBlg2
== 0) {
3800 // 8x16. It doesn't seem worth the hassle of first doing a
3801 // 16x8 interleave, so just generate all 24 partial results
3803 // u2 == Cf .. C0, u1 == Bf .. B0, u0 == Af .. A0
3804 // i2 == Cf Bf Af Ce .. Bb Ab Ca
3805 // i1 == Ba Aa C9 B9 .. A6 C5 B5
3806 // i0 == A5 C4 B4 A4 .. C0 B0 A0
3808 IRTemp i2_FEDC
= newTempV128(); IRTemp i2_BA98
= newTempV128();
3809 IRTemp i2_7654
= newTempV128(); IRTemp i2_3210
= newTempV128();
3810 IRTemp i1_FEDC
= newTempV128(); IRTemp i1_BA98
= newTempV128();
3811 IRTemp i1_7654
= newTempV128(); IRTemp i1_3210
= newTempV128();
3812 IRTemp i0_FEDC
= newTempV128(); IRTemp i0_BA98
= newTempV128();
3813 IRTemp i0_7654
= newTempV128(); IRTemp i0_3210
= newTempV128();
3814 IRTemp i2_hi64
= newTempV128(); IRTemp i2_lo64
= newTempV128();
3815 IRTemp i1_hi64
= newTempV128(); IRTemp i1_lo64
= newTempV128();
3816 IRTemp i0_hi64
= newTempV128(); IRTemp i0_lo64
= newTempV128();
3818 // eg XXXX(qqq, CC, 0xF, BB, 0xA)) sets qqq to be a vector
3819 // of the form 14 bytes junk : CC[0xF] : BB[0xA]
3821 # define XXXX(_tempName,_srcVec1,_srcShift1,_srcVec2,_srcShift2) \
3822 IRTemp t_##_tempName = newTempV128(); \
3823 assign(t_##_tempName, \
3824 ILO8x16( ROR(EX(_srcVec1),(_srcShift1)), \
3825 ROR(EX(_srcVec2),(_srcShift2)) ) )
3827 // Let CC, BB, AA be (handy) aliases of u2, u1, u0 respectively
3828 IRTemp CC
= u2
; IRTemp BB
= u1
; IRTemp AA
= u0
;
3830 // The slicing and reassembly are done as interleavedly as possible,
3831 // so as to minimise the demand for registers in the back end, which
3832 // was observed to be a problem in testing.
3834 XXXX(CfBf
, CC
, 0xf, BB
, 0xf); // i2[15:14]
3835 XXXX(AfCe
, AA
, 0xf, CC
, 0xe);
3836 assign(i2_FEDC
, ILO16x8(EX(t_CfBf
), EX(t_AfCe
)));
3838 XXXX(BeAe
, BB
, 0xe, AA
, 0xe);
3839 XXXX(CdBd
, CC
, 0xd, BB
, 0xd);
3840 assign(i2_BA98
, ILO16x8(EX(t_BeAe
), EX(t_CdBd
)));
3841 assign(i2_hi64
, ILO32x4(EX(i2_FEDC
), EX(i2_BA98
)));
3843 XXXX(AdCc
, AA
, 0xd, CC
, 0xc);
3844 XXXX(BcAc
, BB
, 0xc, AA
, 0xc);
3845 assign(i2_7654
, ILO16x8(EX(t_AdCc
), EX(t_BcAc
)));
3847 XXXX(CbBb
, CC
, 0xb, BB
, 0xb);
3848 XXXX(AbCa
, AA
, 0xb, CC
, 0xa); // i2[1:0]
3849 assign(i2_3210
, ILO16x8(EX(t_CbBb
), EX(t_AbCa
)));
3850 assign(i2_lo64
, ILO32x4(EX(i2_7654
), EX(i2_3210
)));
3851 assign(*i2
, ILO64x2(EX(i2_hi64
), EX(i2_lo64
)));
3853 XXXX(BaAa
, BB
, 0xa, AA
, 0xa); // i1[15:14]
3854 XXXX(C9B9
, CC
, 0x9, BB
, 0x9);
3855 assign(i1_FEDC
, ILO16x8(EX(t_BaAa
), EX(t_C9B9
)));
3857 XXXX(A9C8
, AA
, 0x9, CC
, 0x8);
3858 XXXX(B8A8
, BB
, 0x8, AA
, 0x8);
3859 assign(i1_BA98
, ILO16x8(EX(t_A9C8
), EX(t_B8A8
)));
3860 assign(i1_hi64
, ILO32x4(EX(i1_FEDC
), EX(i1_BA98
)));
3862 XXXX(C7B7
, CC
, 0x7, BB
, 0x7);
3863 XXXX(A7C6
, AA
, 0x7, CC
, 0x6);
3864 assign(i1_7654
, ILO16x8(EX(t_C7B7
), EX(t_A7C6
)));
3866 XXXX(B6A6
, BB
, 0x6, AA
, 0x6);
3867 XXXX(C5B5
, CC
, 0x5, BB
, 0x5); // i1[1:0]
3868 assign(i1_3210
, ILO16x8(EX(t_B6A6
), EX(t_C5B5
)));
3869 assign(i1_lo64
, ILO32x4(EX(i1_7654
), EX(i1_3210
)));
3870 assign(*i1
, ILO64x2(EX(i1_hi64
), EX(i1_lo64
)));
3872 XXXX(A5C4
, AA
, 0x5, CC
, 0x4); // i0[15:14]
3873 XXXX(B4A4
, BB
, 0x4, AA
, 0x4);
3874 assign(i0_FEDC
, ILO16x8(EX(t_A5C4
), EX(t_B4A4
)));
3876 XXXX(C3B3
, CC
, 0x3, BB
, 0x3);
3877 XXXX(A3C2
, AA
, 0x3, CC
, 0x2);
3878 assign(i0_BA98
, ILO16x8(EX(t_C3B3
), EX(t_A3C2
)));
3879 assign(i0_hi64
, ILO32x4(EX(i0_FEDC
), EX(i0_BA98
)));
3881 XXXX(B2A2
, BB
, 0x2, AA
, 0x2);
3882 XXXX(C1B1
, CC
, 0x1, BB
, 0x1);
3883 assign(i0_7654
, ILO16x8(EX(t_B2A2
), EX(t_C1B1
)));
3885 XXXX(A1C0
, AA
, 0x1, CC
, 0x0);
3886 XXXX(B0A0
, BB
, 0x0, AA
, 0x0); // i0[1:0]
3887 assign(i0_3210
, ILO16x8(EX(t_A1C0
), EX(t_B0A0
)));
3888 assign(i0_lo64
, ILO32x4(EX(i0_7654
), EX(i0_3210
)));
3889 assign(*i0
, ILO64x2(EX(i0_hi64
), EX(i0_lo64
)));
3900 /* Do interleaving for 4 128 bit vectors, for ST4 insns. */
3902 void math_INTERLEAVE4_128(
3903 /*OUTx4*/ IRTemp
* i0
, IRTemp
* i1
, IRTemp
* i2
, IRTemp
* i3
,
3905 IRTemp u0
, IRTemp u1
, IRTemp u2
, IRTemp u3
)
3907 if (laneSzBlg2
== 3) {
3909 assign(*i0
, ILO64x2(EX(u1
), EX(u0
)));
3910 assign(*i1
, ILO64x2(EX(u3
), EX(u2
)));
3911 assign(*i2
, IHI64x2(EX(u1
), EX(u0
)));
3912 assign(*i3
, IHI64x2(EX(u3
), EX(u2
)));
3915 if (laneSzBlg2
== 2) {
3917 // First, interleave at the 64-bit lane size.
3918 IRTemp p0
= newTempV128();
3919 IRTemp p1
= newTempV128();
3920 IRTemp p2
= newTempV128();
3921 IRTemp p3
= newTempV128();
3922 math_INTERLEAVE4_128(&p0
, &p1
, &p2
, &p3
, 3, u0
, u1
, u2
, u3
);
3923 // And interleave (cat) at the 32 bit size.
3924 assign(*i0
, CEV32x4(EX(p1
), EX(p0
)));
3925 assign(*i1
, COD32x4(EX(p1
), EX(p0
)));
3926 assign(*i2
, CEV32x4(EX(p3
), EX(p2
)));
3927 assign(*i3
, COD32x4(EX(p3
), EX(p2
)));
3930 if (laneSzBlg2
== 1) {
3932 // First, interleave at the 32-bit lane size.
3933 IRTemp p0
= newTempV128();
3934 IRTemp p1
= newTempV128();
3935 IRTemp p2
= newTempV128();
3936 IRTemp p3
= newTempV128();
3937 math_INTERLEAVE4_128(&p0
, &p1
, &p2
, &p3
, 2, u0
, u1
, u2
, u3
);
3938 // And rearrange within each vector, to get the right 16 bit lanes.
3939 assign(*i0
, COD16x8(EX(p0
), SHL(EX(p0
), 2)));
3940 assign(*i1
, COD16x8(EX(p1
), SHL(EX(p1
), 2)));
3941 assign(*i2
, COD16x8(EX(p2
), SHL(EX(p2
), 2)));
3942 assign(*i3
, COD16x8(EX(p3
), SHL(EX(p3
), 2)));
3945 if (laneSzBlg2
== 0) {
3947 // First, interleave at the 16-bit lane size.
3948 IRTemp p0
= newTempV128();
3949 IRTemp p1
= newTempV128();
3950 IRTemp p2
= newTempV128();
3951 IRTemp p3
= newTempV128();
3952 math_INTERLEAVE4_128(&p0
, &p1
, &p2
, &p3
, 1, u0
, u1
, u2
, u3
);
3953 // And rearrange within each vector, to get the right 8 bit lanes.
3954 assign(*i0
, IHI32x4(COD8x16(EX(p0
),EX(p0
)), CEV8x16(EX(p0
),EX(p0
))));
3955 assign(*i1
, IHI32x4(COD8x16(EX(p1
),EX(p1
)), CEV8x16(EX(p1
),EX(p1
))));
3956 assign(*i2
, IHI32x4(COD8x16(EX(p2
),EX(p2
)), CEV8x16(EX(p2
),EX(p2
))));
3957 assign(*i3
, IHI32x4(COD8x16(EX(p3
),EX(p3
)), CEV8x16(EX(p3
),EX(p3
))));
3965 /* Do deinterleaving for 1 128 bit vector, for LD1 insns. */
3967 void math_DEINTERLEAVE1_128( /*OUTx1*/ IRTemp
* u0
,
3968 UInt laneSzBlg2
, IRTemp i0
)
3970 assign(*u0
, mkexpr(i0
));
3974 /* Do deinterleaving for 2 128 bit vectors, for LD2 insns. */
3976 void math_DEINTERLEAVE2_128( /*OUTx2*/ IRTemp
* u0
, IRTemp
* u1
,
3977 UInt laneSzBlg2
, IRTemp i0
, IRTemp i1
)
3979 /* This is pretty easy, since we have primitives directly to
3981 if (laneSzBlg2
== 3) {
3983 // i1 == B1 A1, i0 == B0 A0
3984 // u1 == B1 B0, u0 == A1 A0
3985 assign(*u0
, binop(Iop_InterleaveLO64x2
, mkexpr(i1
), mkexpr(i0
)));
3986 assign(*u1
, binop(Iop_InterleaveHI64x2
, mkexpr(i1
), mkexpr(i0
)));
3989 if (laneSzBlg2
== 2) {
3991 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3992 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3993 assign(*u0
, binop(Iop_CatEvenLanes32x4
, mkexpr(i1
), mkexpr(i0
)));
3994 assign(*u1
, binop(Iop_CatOddLanes32x4
, mkexpr(i1
), mkexpr(i0
)));
3997 if (laneSzBlg2
== 1) {
3999 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
4000 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
4001 // u1 == B{7..0}, u0 == A{7..0}
4002 assign(*u0
, binop(Iop_CatEvenLanes16x8
, mkexpr(i1
), mkexpr(i0
)));
4003 assign(*u1
, binop(Iop_CatOddLanes16x8
, mkexpr(i1
), mkexpr(i0
)));
4006 if (laneSzBlg2
== 0) {
4008 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
4009 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
4010 // u1 == B{f..0}, u0 == A{f..0}
4011 assign(*u0
, binop(Iop_CatEvenLanes8x16
, mkexpr(i1
), mkexpr(i0
)));
4012 assign(*u1
, binop(Iop_CatOddLanes8x16
, mkexpr(i1
), mkexpr(i0
)));
4020 /* Do deinterleaving for 3 128 bit vectors, for LD3 insns. */
4022 void math_DEINTERLEAVE3_128(
4023 /*OUTx3*/ IRTemp
* u0
, IRTemp
* u1
, IRTemp
* u2
,
4025 IRTemp i0
, IRTemp i1
, IRTemp i2
)
4027 if (laneSzBlg2
== 3) {
4029 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
4030 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
4031 assign(*u2
, ILO64x2( ROL(EX(i2
),8), EX(i1
) ));
4032 assign(*u1
, ILO64x2( EX(i2
), ROL(EX(i0
),8) ));
4033 assign(*u0
, ILO64x2( ROL(EX(i1
),8), EX(i0
) ));
4037 if (laneSzBlg2
== 2) {
4039 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
4040 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
4041 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
4042 IRTemp t_a1c0b0a0
= newTempV128();
4043 IRTemp t_a2c1b1a1
= newTempV128();
4044 IRTemp t_a3c2b2a2
= newTempV128();
4045 IRTemp t_a0c3b3a3
= newTempV128();
4046 IRTemp p0
= newTempV128();
4047 IRTemp p1
= newTempV128();
4048 IRTemp p2
= newTempV128();
4049 // Compute some intermediate values.
4050 assign(t_a1c0b0a0
, EX(i0
));
4051 assign(t_a2c1b1a1
, SL(EX(i1
),EX(i0
),3*4));
4052 assign(t_a3c2b2a2
, SL(EX(i2
),EX(i1
),2*4));
4053 assign(t_a0c3b3a3
, SL(EX(i0
),EX(i2
),1*4));
4054 // First deinterleave into lane-pairs
4055 assign(p0
, ILO32x4(EX(t_a2c1b1a1
),EX(t_a1c0b0a0
)));
4056 assign(p1
, ILO64x2(ILO32x4(EX(t_a0c3b3a3
), EX(t_a3c2b2a2
)),
4057 IHI32x4(EX(t_a2c1b1a1
), EX(t_a1c0b0a0
))));
4058 assign(p2
, ILO32x4(ROR(EX(t_a0c3b3a3
),1*4), ROR(EX(t_a3c2b2a2
),1*4)));
4059 // Then deinterleave at 64x2 granularity.
4060 math_DEINTERLEAVE3_128(u0
, u1
, u2
, 3, p0
, p1
, p2
);
4064 if (laneSzBlg2
== 1) {
4066 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
4067 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
4068 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
4070 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
4071 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
4072 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
4074 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
4075 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
4076 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
4078 IRTemp s0
, s1
, s2
, s3
, t0
, t1
, t2
, t3
, p0
, p1
, p2
, c00111111
;
4080 = t0
= t1
= t2
= t3
= p0
= p1
= p2
= c00111111
= IRTemp_INVALID
;
4081 newTempsV128_4(&s0
, &s1
, &s2
, &s3
);
4082 newTempsV128_4(&t0
, &t1
, &t2
, &t3
);
4083 newTempsV128_4(&p0
, &p1
, &p2
, &c00111111
);
4085 // s0 == b2a2 c1b1a1 c0b0a0
4086 // s1 == b4a4 c3b3c3 c2b2a2
4087 // s2 == b6a6 c5b5a5 c4b4a4
4088 // s3 == b0a0 c7b7a7 c6b6a6
4090 assign(s1
, SL(EX(i1
),EX(i0
),6*2));
4091 assign(s2
, SL(EX(i2
),EX(i1
),4*2));
4092 assign(s3
, SL(EX(i0
),EX(i2
),2*2));
4094 // t0 == 0 0 c1c0 b1b0 a1a0
4095 // t1 == 0 0 c3c2 b3b2 a3a2
4096 // t2 == 0 0 c5c4 b5b4 a5a4
4097 // t3 == 0 0 c7c6 b7b6 a7a6
4098 assign(c00111111
, mkV128(0x0FFF));
4099 assign(t0
, AND( ILO16x8( ROR(EX(s0
),3*2), EX(s0
)), EX(c00111111
)));
4100 assign(t1
, AND( ILO16x8( ROR(EX(s1
),3*2), EX(s1
)), EX(c00111111
)));
4101 assign(t2
, AND( ILO16x8( ROR(EX(s2
),3*2), EX(s2
)), EX(c00111111
)));
4102 assign(t3
, AND( ILO16x8( ROR(EX(s3
),3*2), EX(s3
)), EX(c00111111
)));
4104 assign(p0
, OR2(EX(t0
), SHL(EX(t1
),6*2)));
4105 assign(p1
, OR2(SHL(EX(t2
),4*2), SHR(EX(t1
),2*2)));
4106 assign(p2
, OR2(SHL(EX(t3
),2*2), SHR(EX(t2
),4*2)));
4108 // Then deinterleave at 32x4 granularity.
4109 math_DEINTERLEAVE3_128(u0
, u1
, u2
, 2, p0
, p1
, p2
);
4113 if (laneSzBlg2
== 0) {
4114 // 8x16. This is the same scheme as for 16x8, with twice the
4115 // number of intermediate values.
4121 // i2 == CBA{f} CBA{e} CBA{d} CBA{c} CBA{b} C{a}
4122 // i1 == BA{a} CBA{9} CBA{8} CBA{7} CBA{6} CB{5}
4123 // i0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4125 // p2 == C{fe} B{fe} A{fe} C{dc} B{dc} A{dc} C{ba} B{ba}
4126 // p1 == A{ba} C{98} B{98} A{98} C{76} B{76} A{76} C{54}
4127 // p0 == B{54} A{54} C{32} B{32} A{32} C{10} B{10} A{10}
4129 IRTemp s0
, s1
, s2
, s3
, s4
, s5
, s6
, s7
,
4130 t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
, p0
, p1
, p2
, cMASK
;
4131 s0
= s1
= s2
= s3
= s4
= s5
= s6
= s7
4132 = t0
= t1
= t2
= t3
= t4
= t5
= t6
= t7
= p0
= p1
= p2
= cMASK
4134 newTempsV128_4(&s0
, &s1
, &s2
, &s3
);
4135 newTempsV128_4(&s4
, &s5
, &s6
, &s7
);
4136 newTempsV128_4(&t0
, &t1
, &t2
, &t3
);
4137 newTempsV128_4(&t4
, &t5
, &t6
, &t7
);
4138 newTempsV128_4(&p0
, &p1
, &p2
, &cMASK
);
4140 // s0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4141 // s1 == A{7} CBA{6} CBA{5} CBA{4} CBA{3} CBA{2}
4142 // s2 == A{9} CBA{8} CBA{7} CBA{6} CBA{5} CBA{4}
4143 // s3 == A{b} CBA{a} CBA{9} CBA{8} CBA{7} CBA{6}
4144 // s4 == A{d} CBA{c} CBA{b} CBA{a} CBA{9} CBA{8}
4145 // s5 == A{f} CBA{e} CBA{d} CBA{c} CBA{b} CBA{a}
4146 // s6 == A{1} CBA{0} CBA{f} CBA{e} CBA{d} CBA{c}
4147 // s7 == A{3} CBA{2} CBA{1} CBA{0} CBA{f} CBA{e}
4148 assign(s0
, SL(EX(i1
),EX(i0
), 0));
4149 assign(s1
, SL(EX(i1
),EX(i0
), 6));
4150 assign(s2
, SL(EX(i1
),EX(i0
),12));
4151 assign(s3
, SL(EX(i2
),EX(i1
), 2));
4152 assign(s4
, SL(EX(i2
),EX(i1
), 8));
4153 assign(s5
, SL(EX(i2
),EX(i1
),14));
4154 assign(s6
, SL(EX(i0
),EX(i2
), 4));
4155 assign(s7
, SL(EX(i0
),EX(i2
),10));
4157 // t0 == 0--(ten)--0 C1 C0 B1 B0 A1 A0
4158 // t1 == 0--(ten)--0 C3 C2 B3 B2 A3 A2
4159 // t2 == 0--(ten)--0 C5 C4 B5 B4 A5 A4
4160 // t3 == 0--(ten)--0 C7 C6 B7 B6 A7 A6
4161 // t4 == 0--(ten)--0 C9 C8 B9 B8 A9 A8
4162 // t5 == 0--(ten)--0 Cb Ca Bb Ba Ab Aa
4163 // t6 == 0--(ten)--0 Cd Cc Bd Bc Ad Ac
4164 // t7 == 0--(ten)--0 Cf Ce Bf Be Af Ae
4165 assign(cMASK
, mkV128(0x003F));
4166 assign(t0
, AND( ILO8x16( ROR(EX(s0
),3), EX(s0
)), EX(cMASK
)));
4167 assign(t1
, AND( ILO8x16( ROR(EX(s1
),3), EX(s1
)), EX(cMASK
)));
4168 assign(t2
, AND( ILO8x16( ROR(EX(s2
),3), EX(s2
)), EX(cMASK
)));
4169 assign(t3
, AND( ILO8x16( ROR(EX(s3
),3), EX(s3
)), EX(cMASK
)));
4170 assign(t4
, AND( ILO8x16( ROR(EX(s4
),3), EX(s4
)), EX(cMASK
)));
4171 assign(t5
, AND( ILO8x16( ROR(EX(s5
),3), EX(s5
)), EX(cMASK
)));
4172 assign(t6
, AND( ILO8x16( ROR(EX(s6
),3), EX(s6
)), EX(cMASK
)));
4173 assign(t7
, AND( ILO8x16( ROR(EX(s7
),3), EX(s7
)), EX(cMASK
)));
4175 assign(p0
, OR3( SHL(EX(t2
),12), SHL(EX(t1
),6), EX(t0
) ));
4176 assign(p1
, OR4( SHL(EX(t5
),14), SHL(EX(t4
),8),
4177 SHL(EX(t3
),2), SHR(EX(t2
),4) ));
4178 assign(p2
, OR3( SHL(EX(t7
),10), SHL(EX(t6
),4), SHR(EX(t5
),2) ));
4180 // Then deinterleave at 16x8 granularity.
4181 math_DEINTERLEAVE3_128(u0
, u1
, u2
, 1, p0
, p1
, p2
);
4190 /* Do deinterleaving for 4 128 bit vectors, for LD4 insns. */
4192 void math_DEINTERLEAVE4_128(
4193 /*OUTx4*/ IRTemp
* u0
, IRTemp
* u1
, IRTemp
* u2
, IRTemp
* u3
,
4195 IRTemp i0
, IRTemp i1
, IRTemp i2
, IRTemp i3
)
4197 if (laneSzBlg2
== 3) {
4199 assign(*u0
, ILO64x2(EX(i2
), EX(i0
)));
4200 assign(*u1
, IHI64x2(EX(i2
), EX(i0
)));
4201 assign(*u2
, ILO64x2(EX(i3
), EX(i1
)));
4202 assign(*u3
, IHI64x2(EX(i3
), EX(i1
)));
4205 if (laneSzBlg2
== 2) {
4207 IRTemp p0
= newTempV128();
4208 IRTemp p2
= newTempV128();
4209 IRTemp p1
= newTempV128();
4210 IRTemp p3
= newTempV128();
4211 assign(p0
, ILO32x4(EX(i1
), EX(i0
)));
4212 assign(p1
, IHI32x4(EX(i1
), EX(i0
)));
4213 assign(p2
, ILO32x4(EX(i3
), EX(i2
)));
4214 assign(p3
, IHI32x4(EX(i3
), EX(i2
)));
4215 // And now do what we did for the 64-bit case.
4216 math_DEINTERLEAVE4_128(u0
, u1
, u2
, u3
, 3, p0
, p1
, p2
, p3
);
4219 if (laneSzBlg2
== 1) {
4221 // Deinterleave into 32-bit chunks, then do as the 32-bit case.
4222 IRTemp p0
= newTempV128();
4223 IRTemp p1
= newTempV128();
4224 IRTemp p2
= newTempV128();
4225 IRTemp p3
= newTempV128();
4226 assign(p0
, IHI16x8(EX(i0
), SHL(EX(i0
), 8)));
4227 assign(p1
, IHI16x8(EX(i1
), SHL(EX(i1
), 8)));
4228 assign(p2
, IHI16x8(EX(i2
), SHL(EX(i2
), 8)));
4229 assign(p3
, IHI16x8(EX(i3
), SHL(EX(i3
), 8)));
4230 // From here on is like the 32 bit case.
4231 math_DEINTERLEAVE4_128(u0
, u1
, u2
, u3
, 2, p0
, p1
, p2
, p3
);
4234 if (laneSzBlg2
== 0) {
4236 // Deinterleave into 16-bit chunks, then do as the 16-bit case.
4237 IRTemp p0
= newTempV128();
4238 IRTemp p1
= newTempV128();
4239 IRTemp p2
= newTempV128();
4240 IRTemp p3
= newTempV128();
4241 assign(p0
, IHI64x2( IHI8x16(EX(i0
),ROL(EX(i0
),4)),
4242 ILO8x16(EX(i0
),ROL(EX(i0
),4)) ));
4243 assign(p1
, IHI64x2( IHI8x16(EX(i1
),ROL(EX(i1
),4)),
4244 ILO8x16(EX(i1
),ROL(EX(i1
),4)) ));
4245 assign(p2
, IHI64x2( IHI8x16(EX(i2
),ROL(EX(i2
),4)),
4246 ILO8x16(EX(i2
),ROL(EX(i2
),4)) ));
4247 assign(p3
, IHI64x2( IHI8x16(EX(i3
),ROL(EX(i3
),4)),
4248 ILO8x16(EX(i3
),ROL(EX(i3
),4)) ));
4249 // From here on is like the 16 bit case.
4250 math_DEINTERLEAVE4_128(u0
, u1
, u2
, u3
, 1, p0
, p1
, p2
, p3
);
4258 /* Wrappers that use the full-width (de)interleavers to do half-width
4259 (de)interleaving. The scheme is to clone each input lane in the
4260 lower half of each incoming value, do a full width (de)interleave
4261 at the next lane size up, and remove every other lane of the the
4262 result. The returned values may have any old junk in the upper
4263 64 bits -- the caller must ignore that. */
4265 /* Helper function -- get doubling and narrowing operations. */
4267 void math_get_doubler_and_halver ( /*OUT*/IROp
* doubler
,
4268 /*OUT*/IROp
* halver
,
4271 switch (laneSzBlg2
) {
4273 *doubler
= Iop_InterleaveLO32x4
; *halver
= Iop_CatEvenLanes32x4
;
4276 *doubler
= Iop_InterleaveLO16x8
; *halver
= Iop_CatEvenLanes16x8
;
4279 *doubler
= Iop_InterleaveLO8x16
; *halver
= Iop_CatEvenLanes8x16
;
4286 /* Do interleaving for 1 64 bit vector, for ST1 insns. */
4288 void math_INTERLEAVE1_64( /*OUTx1*/ IRTemp
* i0
,
4289 UInt laneSzBlg2
, IRTemp u0
)
4291 assign(*i0
, mkexpr(u0
));
4295 /* Do interleaving for 2 64 bit vectors, for ST2 insns. */
4297 void math_INTERLEAVE2_64( /*OUTx2*/ IRTemp
* i0
, IRTemp
* i1
,
4298 UInt laneSzBlg2
, IRTemp u0
, IRTemp u1
)
4300 if (laneSzBlg2
== 3) {
4301 // 1x64, degenerate case
4302 assign(*i0
, EX(u0
));
4303 assign(*i1
, EX(u1
));
4307 vassert(laneSzBlg2
>= 0 && laneSzBlg2
<= 2);
4308 IROp doubler
= Iop_INVALID
, halver
= Iop_INVALID
;
4309 math_get_doubler_and_halver(&doubler
, &halver
, laneSzBlg2
);
4311 IRTemp du0
= newTempV128();
4312 IRTemp du1
= newTempV128();
4313 assign(du0
, binop(doubler
, EX(u0
), EX(u0
)));
4314 assign(du1
, binop(doubler
, EX(u1
), EX(u1
)));
4315 IRTemp di0
= newTempV128();
4316 IRTemp di1
= newTempV128();
4317 math_INTERLEAVE2_128(&di0
, &di1
, laneSzBlg2
+ 1, du0
, du1
);
4318 assign(*i0
, binop(halver
, EX(di0
), EX(di0
)));
4319 assign(*i1
, binop(halver
, EX(di1
), EX(di1
)));
4323 /* Do interleaving for 3 64 bit vectors, for ST3 insns. */
4325 void math_INTERLEAVE3_64(
4326 /*OUTx3*/ IRTemp
* i0
, IRTemp
* i1
, IRTemp
* i2
,
4328 IRTemp u0
, IRTemp u1
, IRTemp u2
)
4330 if (laneSzBlg2
== 3) {
4331 // 1x64, degenerate case
4332 assign(*i0
, EX(u0
));
4333 assign(*i1
, EX(u1
));
4334 assign(*i2
, EX(u2
));
4338 vassert(laneSzBlg2
>= 0 && laneSzBlg2
<= 2);
4339 IROp doubler
= Iop_INVALID
, halver
= Iop_INVALID
;
4340 math_get_doubler_and_halver(&doubler
, &halver
, laneSzBlg2
);
4342 IRTemp du0
= newTempV128();
4343 IRTemp du1
= newTempV128();
4344 IRTemp du2
= newTempV128();
4345 assign(du0
, binop(doubler
, EX(u0
), EX(u0
)));
4346 assign(du1
, binop(doubler
, EX(u1
), EX(u1
)));
4347 assign(du2
, binop(doubler
, EX(u2
), EX(u2
)));
4348 IRTemp di0
= newTempV128();
4349 IRTemp di1
= newTempV128();
4350 IRTemp di2
= newTempV128();
4351 math_INTERLEAVE3_128(&di0
, &di1
, &di2
, laneSzBlg2
+ 1, du0
, du1
, du2
);
4352 assign(*i0
, binop(halver
, EX(di0
), EX(di0
)));
4353 assign(*i1
, binop(halver
, EX(di1
), EX(di1
)));
4354 assign(*i2
, binop(halver
, EX(di2
), EX(di2
)));
4358 /* Do interleaving for 4 64 bit vectors, for ST4 insns. */
4360 void math_INTERLEAVE4_64(
4361 /*OUTx4*/ IRTemp
* i0
, IRTemp
* i1
, IRTemp
* i2
, IRTemp
* i3
,
4363 IRTemp u0
, IRTemp u1
, IRTemp u2
, IRTemp u3
)
4365 if (laneSzBlg2
== 3) {
4366 // 1x64, degenerate case
4367 assign(*i0
, EX(u0
));
4368 assign(*i1
, EX(u1
));
4369 assign(*i2
, EX(u2
));
4370 assign(*i3
, EX(u3
));
4374 vassert(laneSzBlg2
>= 0 && laneSzBlg2
<= 2);
4375 IROp doubler
= Iop_INVALID
, halver
= Iop_INVALID
;
4376 math_get_doubler_and_halver(&doubler
, &halver
, laneSzBlg2
);
4378 IRTemp du0
= newTempV128();
4379 IRTemp du1
= newTempV128();
4380 IRTemp du2
= newTempV128();
4381 IRTemp du3
= newTempV128();
4382 assign(du0
, binop(doubler
, EX(u0
), EX(u0
)));
4383 assign(du1
, binop(doubler
, EX(u1
), EX(u1
)));
4384 assign(du2
, binop(doubler
, EX(u2
), EX(u2
)));
4385 assign(du3
, binop(doubler
, EX(u3
), EX(u3
)));
4386 IRTemp di0
= newTempV128();
4387 IRTemp di1
= newTempV128();
4388 IRTemp di2
= newTempV128();
4389 IRTemp di3
= newTempV128();
4390 math_INTERLEAVE4_128(&di0
, &di1
, &di2
, &di3
,
4391 laneSzBlg2
+ 1, du0
, du1
, du2
, du3
);
4392 assign(*i0
, binop(halver
, EX(di0
), EX(di0
)));
4393 assign(*i1
, binop(halver
, EX(di1
), EX(di1
)));
4394 assign(*i2
, binop(halver
, EX(di2
), EX(di2
)));
4395 assign(*i3
, binop(halver
, EX(di3
), EX(di3
)));
4399 /* Do deinterleaving for 1 64 bit vector, for LD1 insns. */
4401 void math_DEINTERLEAVE1_64( /*OUTx1*/ IRTemp
* u0
,
4402 UInt laneSzBlg2
, IRTemp i0
)
4404 assign(*u0
, mkexpr(i0
));
4408 /* Do deinterleaving for 2 64 bit vectors, for LD2 insns. */
4410 void math_DEINTERLEAVE2_64( /*OUTx2*/ IRTemp
* u0
, IRTemp
* u1
,
4411 UInt laneSzBlg2
, IRTemp i0
, IRTemp i1
)
4413 if (laneSzBlg2
== 3) {
4414 // 1x64, degenerate case
4415 assign(*u0
, EX(i0
));
4416 assign(*u1
, EX(i1
));
4420 vassert(laneSzBlg2
>= 0 && laneSzBlg2
<= 2);
4421 IROp doubler
= Iop_INVALID
, halver
= Iop_INVALID
;
4422 math_get_doubler_and_halver(&doubler
, &halver
, laneSzBlg2
);
4424 IRTemp di0
= newTempV128();
4425 IRTemp di1
= newTempV128();
4426 assign(di0
, binop(doubler
, EX(i0
), EX(i0
)));
4427 assign(di1
, binop(doubler
, EX(i1
), EX(i1
)));
4429 IRTemp du0
= newTempV128();
4430 IRTemp du1
= newTempV128();
4431 math_DEINTERLEAVE2_128(&du0
, &du1
, laneSzBlg2
+ 1, di0
, di1
);
4432 assign(*u0
, binop(halver
, EX(du0
), EX(du0
)));
4433 assign(*u1
, binop(halver
, EX(du1
), EX(du1
)));
4437 /* Do deinterleaving for 3 64 bit vectors, for LD3 insns. */
4439 void math_DEINTERLEAVE3_64(
4440 /*OUTx3*/ IRTemp
* u0
, IRTemp
* u1
, IRTemp
* u2
,
4442 IRTemp i0
, IRTemp i1
, IRTemp i2
)
4444 if (laneSzBlg2
== 3) {
4445 // 1x64, degenerate case
4446 assign(*u0
, EX(i0
));
4447 assign(*u1
, EX(i1
));
4448 assign(*u2
, EX(i2
));
4452 vassert(laneSzBlg2
>= 0 && laneSzBlg2
<= 2);
4453 IROp doubler
= Iop_INVALID
, halver
= Iop_INVALID
;
4454 math_get_doubler_and_halver(&doubler
, &halver
, laneSzBlg2
);
4456 IRTemp di0
= newTempV128();
4457 IRTemp di1
= newTempV128();
4458 IRTemp di2
= newTempV128();
4459 assign(di0
, binop(doubler
, EX(i0
), EX(i0
)));
4460 assign(di1
, binop(doubler
, EX(i1
), EX(i1
)));
4461 assign(di2
, binop(doubler
, EX(i2
), EX(i2
)));
4462 IRTemp du0
= newTempV128();
4463 IRTemp du1
= newTempV128();
4464 IRTemp du2
= newTempV128();
4465 math_DEINTERLEAVE3_128(&du0
, &du1
, &du2
, laneSzBlg2
+ 1, di0
, di1
, di2
);
4466 assign(*u0
, binop(halver
, EX(du0
), EX(du0
)));
4467 assign(*u1
, binop(halver
, EX(du1
), EX(du1
)));
4468 assign(*u2
, binop(halver
, EX(du2
), EX(du2
)));
4472 /* Do deinterleaving for 4 64 bit vectors, for LD4 insns. */
4474 void math_DEINTERLEAVE4_64(
4475 /*OUTx4*/ IRTemp
* u0
, IRTemp
* u1
, IRTemp
* u2
, IRTemp
* u3
,
4477 IRTemp i0
, IRTemp i1
, IRTemp i2
, IRTemp i3
)
4479 if (laneSzBlg2
== 3) {
4480 // 1x64, degenerate case
4481 assign(*u0
, EX(i0
));
4482 assign(*u1
, EX(i1
));
4483 assign(*u2
, EX(i2
));
4484 assign(*u3
, EX(i3
));
4488 vassert(laneSzBlg2
>= 0 && laneSzBlg2
<= 2);
4489 IROp doubler
= Iop_INVALID
, halver
= Iop_INVALID
;
4490 math_get_doubler_and_halver(&doubler
, &halver
, laneSzBlg2
);
4492 IRTemp di0
= newTempV128();
4493 IRTemp di1
= newTempV128();
4494 IRTemp di2
= newTempV128();
4495 IRTemp di3
= newTempV128();
4496 assign(di0
, binop(doubler
, EX(i0
), EX(i0
)));
4497 assign(di1
, binop(doubler
, EX(i1
), EX(i1
)));
4498 assign(di2
, binop(doubler
, EX(i2
), EX(i2
)));
4499 assign(di3
, binop(doubler
, EX(i3
), EX(i3
)));
4500 IRTemp du0
= newTempV128();
4501 IRTemp du1
= newTempV128();
4502 IRTemp du2
= newTempV128();
4503 IRTemp du3
= newTempV128();
4504 math_DEINTERLEAVE4_128(&du0
, &du1
, &du2
, &du3
,
4505 laneSzBlg2
+ 1, di0
, di1
, di2
, di3
);
4506 assign(*u0
, binop(halver
, EX(du0
), EX(du0
)));
4507 assign(*u1
, binop(halver
, EX(du1
), EX(du1
)));
4508 assign(*u2
, binop(halver
, EX(du2
), EX(du2
)));
4509 assign(*u3
, binop(halver
, EX(du3
), EX(du3
)));
4538 /*------------------------------------------------------------*/
4539 /*--- Load and Store instructions ---*/
4540 /*------------------------------------------------------------*/
4542 /* Generate the EA for a "reg + reg" style amode. This is done from
4543 parts of the insn, but for sanity checking sake it takes the whole
4544 insn. This appears to depend on insn[15:12], with opt=insn[15:13]
4547 The possible forms, along with their opt:S values, are:
4550 011:1 Xn|SP + Xm * transfer_szB
4551 111:1 Xn|SP + Xm * transfer_szB
4552 010:0 Xn|SP + 32Uto64(Wm)
4553 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB
4554 110:0 Xn|SP + 32Sto64(Wm)
4555 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB
4557 Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of
4558 the transfer size is insn[23,31,30]. For integer loads/stores,
4559 insn[23] is zero, hence szLg2 can be at most 3 in such cases.
4561 If the decoding fails, it returns IRTemp_INVALID.
4563 isInt is True iff this is decoding is for transfers to/from integer
4564 registers. If False it is for transfers to/from vector registers.
4566 static IRTemp
gen_indexed_EA ( /*OUT*/HChar
* buf
, UInt insn
, Bool isInt
)
4568 UInt optS
= SLICE_UInt(insn
, 15, 12);
4569 UInt mm
= SLICE_UInt(insn
, 20, 16);
4570 UInt nn
= SLICE_UInt(insn
, 9, 5);
4571 UInt szLg2
= (isInt
? 0 : (SLICE_UInt(insn
, 23, 23) << 2))
4572 | SLICE_UInt(insn
, 31, 30); // Log2 of the size
4576 /* Sanity checks, that this really is a load/store insn. */
4577 if (SLICE_UInt(insn
, 11, 10) != BITS2(1,0))
4581 && SLICE_UInt(insn
, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
4582 && SLICE_UInt(insn
, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
4583 && SLICE_UInt(insn
, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
4584 && SLICE_UInt(insn
, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
4588 && SLICE_UInt(insn
, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
4591 /* Throw out non-verified but possibly valid cases. */
4593 case BITS3(0,0,0): break; // 8 bit, valid for both int and vec
4594 case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
4595 case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
4596 case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
4597 case BITS3(1,0,0): // can only ever be valid for the vector case
4598 if (isInt
) goto fail
; else break;
4599 case BITS3(1,0,1): // these sizes are never valid
4601 case BITS3(1,1,1): goto fail
;
4603 default: vassert(0);
4608 case BITS4(1,1,1,0): goto fail
; //ATC
4609 case BITS4(0,1,1,0):
4610 rhs
= getIReg64orZR(mm
);
4611 vex_sprintf(buf
, "[%s, %s]",
4612 nameIReg64orZR(nn
), nameIReg64orZR(mm
));
4614 case BITS4(1,1,1,1): goto fail
; //ATC
4615 case BITS4(0,1,1,1):
4616 rhs
= binop(Iop_Shl64
, getIReg64orZR(mm
), mkU8(szLg2
));
4617 vex_sprintf(buf
, "[%s, %s lsl %u]",
4618 nameIReg64orZR(nn
), nameIReg64orZR(mm
), szLg2
);
4620 case BITS4(0,1,0,0):
4621 rhs
= unop(Iop_32Uto64
, getIReg32orZR(mm
));
4622 vex_sprintf(buf
, "[%s, %s uxtx]",
4623 nameIReg64orZR(nn
), nameIReg32orZR(mm
));
4625 case BITS4(0,1,0,1):
4626 rhs
= binop(Iop_Shl64
,
4627 unop(Iop_32Uto64
, getIReg32orZR(mm
)), mkU8(szLg2
));
4628 vex_sprintf(buf
, "[%s, %s uxtx, lsl %u]",
4629 nameIReg64orZR(nn
), nameIReg32orZR(mm
), szLg2
);
4631 case BITS4(1,1,0,0):
4632 rhs
= unop(Iop_32Sto64
, getIReg32orZR(mm
));
4633 vex_sprintf(buf
, "[%s, %s sxtx]",
4634 nameIReg64orZR(nn
), nameIReg32orZR(mm
));
4636 case BITS4(1,1,0,1):
4637 rhs
= binop(Iop_Shl64
,
4638 unop(Iop_32Sto64
, getIReg32orZR(mm
)), mkU8(szLg2
));
4639 vex_sprintf(buf
, "[%s, %s sxtx, lsl %u]",
4640 nameIReg64orZR(nn
), nameIReg32orZR(mm
), szLg2
);
4643 /* The rest appear to be genuinely invalid */
4648 IRTemp res
= newTemp(Ity_I64
);
4649 assign(res
, binop(Iop_Add64
, getIReg64orSP(nn
), rhs
));
4653 if (0 /*really, sigill_diag, but that causes too much plumbing*/) {
4654 vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS
);
4656 return IRTemp_INVALID
;
4660 /* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
4661 bits of DATAE :: Ity_I64. */
4662 static void gen_narrowing_store ( UInt szB
, IRTemp addr
, IRExpr
* dataE
)
4664 IRExpr
* addrE
= mkexpr(addr
);
4667 storeLE(addrE
, dataE
);
4670 storeLE(addrE
, unop(Iop_64to32
, dataE
));
4673 storeLE(addrE
, unop(Iop_64to16
, dataE
));
4676 storeLE(addrE
, unop(Iop_64to8
, dataE
));
4684 /* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
4685 placing the result in an Ity_I64 temporary. */
4686 static IRTemp
gen_zwidening_load ( UInt szB
, IRTemp addr
)
4688 IRTemp res
= newTemp(Ity_I64
);
4689 IRExpr
* addrE
= mkexpr(addr
);
4692 assign(res
, loadLE(Ity_I64
,addrE
));
4695 assign(res
, unop(Iop_32Uto64
, loadLE(Ity_I32
,addrE
)));
4698 assign(res
, unop(Iop_16Uto64
, loadLE(Ity_I16
,addrE
)));
4701 assign(res
, unop(Iop_8Uto64
, loadLE(Ity_I8
,addrE
)));
4710 /* Generate a "standard 7" name, from bitQ and size. But also
4711 allow ".1d" since that's occasionally useful. */
4713 const HChar
* nameArr_Q_SZ ( UInt bitQ
, UInt size
)
4715 vassert(bitQ
<= 1 && size
<= 3);
4717 = { "8b", "4h", "2s", "1d", "16b", "8h", "4s", "2d" };
4718 UInt ix
= (bitQ
<< 2) | size
;
4725 Bool
dis_ARM64_load_store(/*MB_OUT*/DisResult
* dres
, UInt insn
,
4726 const VexAbiInfo
* abiinfo
, Bool sigill_diag
)
4728 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
4730 /* ------------ LDR,STR (immediate, uimm12) ----------- */
4731 /* uimm12 is scaled by the transfer size
4735 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8]
4736 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8]
4738 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4]
4739 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4]
4741 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2]
4742 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2]
4744 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1]
4745 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1]
4747 if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
4748 UInt szLg2
= INSN(31,30);
4749 UInt szB
= 1 << szLg2
;
4750 Bool isLD
= INSN(22,22) == 1;
4751 UInt offs
= INSN(21,10) * szB
;
4752 UInt nn
= INSN(9,5);
4753 UInt tt
= INSN(4,0);
4754 IRTemp ta
= newTemp(Ity_I64
);
4755 assign(ta
, binop(Iop_Add64
, getIReg64orSP(nn
), mkU64(offs
)));
4756 if (nn
== 31) { /* FIXME generate stack alignment check */ }
4759 putIReg64orZR(tt
, mkexpr(gen_zwidening_load(szB
, ta
)));
4761 gen_narrowing_store(szB
, ta
, getIReg64orZR(tt
));
4763 const HChar
* ld_name
[4] = { "ldrb", "ldrh", "ldr", "ldr" };
4764 const HChar
* st_name
[4] = { "strb", "strh", "str", "str" };
4765 DIP("%s %s, [%s, #%u]\n",
4766 (isLD
? ld_name
: st_name
)[szLg2
], nameIRegOrZR(szB
== 8, tt
),
4767 nameIReg64orSP(nn
), offs
);
4771 /* ------------ LDUR,STUR (immediate, simm9) ----------- */
4775 (at-Rn-then-Rn=EA) | | |
4776 sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9
4777 sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9
4780 sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]!
4781 sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]!
4784 sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9]
4785 sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9]
4789 The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the
4790 load case this is because would create two competing values for
4791 Rt. In the store case the reason is unclear, but the spec
4792 disallows it anyway.
4794 Stores are narrowing, loads are unsigned widening. sz encodes
4795 the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
4797 if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
4798 == BITS9(1,1,1, 0,0,0,0,0, 0)) {
4799 UInt szLg2
= INSN(31,30);
4800 UInt szB
= 1 << szLg2
;
4801 Bool isLoad
= INSN(22,22) == 1;
4802 UInt imm9
= INSN(20,12);
4803 UInt nn
= INSN(9,5);
4804 UInt tt
= INSN(4,0);
4805 Bool wBack
= INSN(10,10) == 1;
4806 UInt how
= INSN(11,10);
4807 if (how
== BITS2(1,0) || (wBack
&& nn
== tt
&& tt
!= 31)) {
4808 /* undecodable; fall through */
4810 if (nn
== 31) { /* FIXME generate stack alignment check */ }
4812 // Compute the transfer address TA and the writeback address WA.
4813 IRTemp tRN
= newTemp(Ity_I64
);
4814 assign(tRN
, getIReg64orSP(nn
));
4815 IRTemp tEA
= newTemp(Ity_I64
);
4816 Long simm9
= (Long
)sx_to_64(imm9
, 9);
4817 assign(tEA
, binop(Iop_Add64
, mkexpr(tRN
), mkU64(simm9
)));
4819 IRTemp tTA
= newTemp(Ity_I64
);
4820 IRTemp tWA
= newTemp(Ity_I64
);
4823 assign(tTA
, mkexpr(tRN
)); assign(tWA
, mkexpr(tEA
)); break;
4825 assign(tTA
, mkexpr(tEA
)); assign(tWA
, mkexpr(tEA
)); break;
4827 assign(tTA
, mkexpr(tEA
)); /* tWA is unused */ break;
4829 vassert(0); /* NOTREACHED */
4832 /* Normally rN would be updated after the transfer. However, in
4833 the special cases typifed by
4836 it is necessary to update SP before the transfer, (1)
4837 because Memcheck will otherwise complain about a write
4838 below the stack pointer, and (2) because the segfault
4839 stack extension mechanism will otherwise extend the stack
4840 only down to SP before the instruction, which might not be
4841 far enough, if the -16/-32 bit takes the actual access
4842 address to the next page.
4845 = wBack
&& simm9
< 0 && (szB
== 8 || szB
== 4)
4846 && how
== BITS2(1,1) && nn
== 31 && !isLoad
;
4848 if (wBack
&& earlyWBack
)
4849 putIReg64orSP(nn
, mkexpr(tEA
));
4852 putIReg64orZR(tt
, mkexpr(gen_zwidening_load(szB
, tTA
)));
4854 gen_narrowing_store(szB
, tTA
, getIReg64orZR(tt
));
4857 if (wBack
&& !earlyWBack
)
4858 putIReg64orSP(nn
, mkexpr(tEA
));
4860 const HChar
* ld_name
[4] = { "ldurb", "ldurh", "ldur", "ldur" };
4861 const HChar
* st_name
[4] = { "sturb", "sturh", "stur", "stur" };
4862 const HChar
* fmt_str
= NULL
;
4865 fmt_str
= "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4868 fmt_str
= "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4871 fmt_str
= "%s %s, [%s, #%lld] (at-Rn)\n";
4876 DIP(fmt_str
, (isLoad
? ld_name
: st_name
)[szLg2
],
4877 nameIRegOrZR(szB
== 8, tt
),
4878 nameIReg64orSP(nn
), simm9
);
4883 /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
4886 x==0 => 32 bit transfers, and zero extended loads
4887 x==1 => 64 bit transfers
4888 simm7 is scaled by the (single-register) transfer size
4891 x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm
4894 x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]!
4897 x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]
4899 UInt insn_30_23
= INSN(30,23);
4900 if (insn_30_23
== BITS8(0,1,0,1,0,0,0,1)
4901 || insn_30_23
== BITS8(0,1,0,1,0,0,1,1)
4902 || insn_30_23
== BITS8(0,1,0,1,0,0,1,0)) {
4903 UInt bL
= INSN(22,22);
4904 UInt bX
= INSN(31,31);
4905 UInt bWBack
= INSN(23,23);
4906 UInt rT1
= INSN(4,0);
4907 UInt rN
= INSN(9,5);
4908 UInt rT2
= INSN(14,10);
4909 Long simm7
= (Long
)sx_to_64(INSN(21,15), 7);
4910 if ((bWBack
&& (rT1
== rN
|| rT2
== rN
) && rN
!= 31)
4911 || (bL
&& rT1
== rT2
)) {
4912 /* undecodable; fall through */
4914 if (rN
== 31) { /* FIXME generate stack alignment check */ }
4916 // Compute the transfer address TA and the writeback address WA.
4917 IRTemp tRN
= newTemp(Ity_I64
);
4918 assign(tRN
, getIReg64orSP(rN
));
4919 IRTemp tEA
= newTemp(Ity_I64
);
4920 simm7
= (bX
? 8 : 4) * simm7
;
4921 assign(tEA
, binop(Iop_Add64
, mkexpr(tRN
), mkU64(simm7
)));
4923 IRTemp tTA
= newTemp(Ity_I64
);
4924 IRTemp tWA
= newTemp(Ity_I64
);
4925 switch (INSN(24,23)) {
4927 assign(tTA
, mkexpr(tRN
)); assign(tWA
, mkexpr(tEA
)); break;
4929 assign(tTA
, mkexpr(tEA
)); assign(tWA
, mkexpr(tEA
)); break;
4931 assign(tTA
, mkexpr(tEA
)); /* tWA is unused */ break;
4933 vassert(0); /* NOTREACHED */
4936 /* Normally rN would be updated after the transfer. However, in
4937 the special case typifed by
4938 stp x29, x30, [sp,#-112]!
4939 it is necessary to update SP before the transfer, (1)
4940 because Memcheck will otherwise complain about a write
4941 below the stack pointer, and (2) because the segfault
4942 stack extension mechanism will otherwise extend the stack
4943 only down to SP before the instruction, which might not be
4944 far enough, if the -112 bit takes the actual access
4945 address to the next page.
4948 = bWBack
&& simm7
< 0
4949 && INSN(24,23) == BITS2(1,1) && rN
== 31 && bL
== 0;
4951 if (bWBack
&& earlyWBack
)
4952 putIReg64orSP(rN
, mkexpr(tEA
));
4954 /**/ if (bL
== 1 && bX
== 1) {
4956 putIReg64orZR(rT1
, loadLE(Ity_I64
,
4957 binop(Iop_Add64
,mkexpr(tTA
),mkU64(0))));
4958 putIReg64orZR(rT2
, loadLE(Ity_I64
,
4959 binop(Iop_Add64
,mkexpr(tTA
),mkU64(8))));
4960 } else if (bL
== 1 && bX
== 0) {
4962 putIReg32orZR(rT1
, loadLE(Ity_I32
,
4963 binop(Iop_Add64
,mkexpr(tTA
),mkU64(0))));
4964 putIReg32orZR(rT2
, loadLE(Ity_I32
,
4965 binop(Iop_Add64
,mkexpr(tTA
),mkU64(4))));
4966 } else if (bL
== 0 && bX
== 1) {
4968 storeLE(binop(Iop_Add64
,mkexpr(tTA
),mkU64(0)),
4969 getIReg64orZR(rT1
));
4970 storeLE(binop(Iop_Add64
,mkexpr(tTA
),mkU64(8)),
4971 getIReg64orZR(rT2
));
4973 vassert(bL
== 0 && bX
== 0);
4975 storeLE(binop(Iop_Add64
,mkexpr(tTA
),mkU64(0)),
4976 getIReg32orZR(rT1
));
4977 storeLE(binop(Iop_Add64
,mkexpr(tTA
),mkU64(4)),
4978 getIReg32orZR(rT2
));
4981 if (bWBack
&& !earlyWBack
)
4982 putIReg64orSP(rN
, mkexpr(tEA
));
4984 const HChar
* fmt_str
= NULL
;
4985 switch (INSN(24,23)) {
4987 fmt_str
= "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4990 fmt_str
= "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4993 fmt_str
= "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
4998 DIP(fmt_str
, bL
== 0 ? "st" : "ld",
4999 nameIRegOrZR(bX
== 1, rT1
),
5000 nameIRegOrZR(bX
== 1, rT2
),
5001 nameIReg64orSP(rN
), simm7
);
5006 /* -------- LDPSW (immediate, simm7) (INT REGS) -------- */
5007 /* Does 32 bit transfers which are sign extended to 64 bits.
5008 simm7 is scaled by the (single-register) transfer size
5011 01 101 0001 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP], #imm
5014 01 101 0011 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP, #imm]!
5017 01 101 0010 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP, #imm]
5019 UInt insn_31_22
= INSN(31,22);
5020 if (insn_31_22
== BITS10(0,1,1,0,1,0,0,0,1,1)
5021 || insn_31_22
== BITS10(0,1,1,0,1,0,0,1,1,1)
5022 || insn_31_22
== BITS10(0,1,1,0,1,0,0,1,0,1)) {
5023 UInt bWBack
= INSN(23,23);
5024 UInt rT1
= INSN(4,0);
5025 UInt rN
= INSN(9,5);
5026 UInt rT2
= INSN(14,10);
5027 Long simm7
= (Long
)sx_to_64(INSN(21,15), 7);
5028 if ((bWBack
&& (rT1
== rN
|| rT2
== rN
) && rN
!= 31)
5030 /* undecodable; fall through */
5032 if (rN
== 31) { /* FIXME generate stack alignment check */ }
5034 // Compute the transfer address TA and the writeback address WA.
5035 IRTemp tRN
= newTemp(Ity_I64
);
5036 assign(tRN
, getIReg64orSP(rN
));
5037 IRTemp tEA
= newTemp(Ity_I64
);
5039 assign(tEA
, binop(Iop_Add64
, mkexpr(tRN
), mkU64(simm7
)));
5041 IRTemp tTA
= newTemp(Ity_I64
);
5042 IRTemp tWA
= newTemp(Ity_I64
);
5043 switch (INSN(24,23)) {
5045 assign(tTA
, mkexpr(tRN
)); assign(tWA
, mkexpr(tEA
)); break;
5047 assign(tTA
, mkexpr(tEA
)); assign(tWA
, mkexpr(tEA
)); break;
5049 assign(tTA
, mkexpr(tEA
)); /* tWA is unused */ break;
5051 vassert(0); /* NOTREACHED */
5054 // 32 bit load, sign extended to 64 bits
5055 putIReg64orZR(rT1
, unop(Iop_32Sto64
,
5056 loadLE(Ity_I32
, binop(Iop_Add64
,
5059 putIReg64orZR(rT2
, unop(Iop_32Sto64
,
5060 loadLE(Ity_I32
, binop(Iop_Add64
,
5064 putIReg64orSP(rN
, mkexpr(tEA
));
5066 const HChar
* fmt_str
= NULL
;
5067 switch (INSN(24,23)) {
5069 fmt_str
= "ldpsw %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5072 fmt_str
= "ldpsw %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5075 fmt_str
= "ldpsw %s, %s, [%s, #%lld] (at-Rn)\n";
5080 DIP(fmt_str
, nameIReg64orZR(rT1
),
5081 nameIReg64orZR(rT2
),
5082 nameIReg64orSP(rN
), simm7
);
5087 /* ---------------- LDR (literal, int reg) ---------------- */
5089 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)]
5090 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)]
5091 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)]
5092 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)]
5093 Just handles the first two cases for now.
5095 if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
5096 UInt imm19
= INSN(23,5);
5097 UInt rT
= INSN(4,0);
5098 UInt bX
= INSN(30,30);
5099 ULong ea
= guest_PC_curr_instr
+ sx_to_64(imm19
<< 2, 21);
5101 putIReg64orZR(rT
, loadLE(Ity_I64
, mkU64(ea
)));
5103 putIReg32orZR(rT
, loadLE(Ity_I32
, mkU64(ea
)));
5105 DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX
== 1, rT
), ea
);
5109 /* -------------- {LD,ST}R (integer register) --------------- */
5110 /* 31 29 20 15 12 11 9 4
5112 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R<m>{ext/sh}]
5113 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R<m>{ext/sh}]
5114 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R<m>{ext/sh}]
5115 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R<m>{ext/sh}]
5117 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R<m>{ext/sh}]
5118 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R<m>{ext/sh}]
5119 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R<m>{ext/sh}]
5120 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R<m>{ext/sh}]
5122 if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
5123 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5125 UInt szLg2
= INSN(31,30);
5126 Bool isLD
= INSN(22,22) == 1;
5127 UInt tt
= INSN(4,0);
5128 IRTemp ea
= gen_indexed_EA(dis_buf
, insn
, True
/*to/from int regs*/);
5129 if (ea
!= IRTemp_INVALID
) {
5131 case 3: /* 64 bit */
5133 putIReg64orZR(tt
, loadLE(Ity_I64
, mkexpr(ea
)));
5134 DIP("ldr %s, %s\n", nameIReg64orZR(tt
), dis_buf
);
5136 storeLE(mkexpr(ea
), getIReg64orZR(tt
));
5137 DIP("str %s, %s\n", nameIReg64orZR(tt
), dis_buf
);
5140 case 2: /* 32 bit */
5142 putIReg32orZR(tt
, loadLE(Ity_I32
, mkexpr(ea
)));
5143 DIP("ldr %s, %s\n", nameIReg32orZR(tt
), dis_buf
);
5145 storeLE(mkexpr(ea
), getIReg32orZR(tt
));
5146 DIP("str %s, %s\n", nameIReg32orZR(tt
), dis_buf
);
5149 case 1: /* 16 bit */
5151 putIReg64orZR(tt
, unop(Iop_16Uto64
,
5152 loadLE(Ity_I16
, mkexpr(ea
))));
5153 DIP("ldruh %s, %s\n", nameIReg32orZR(tt
), dis_buf
);
5155 storeLE(mkexpr(ea
), unop(Iop_64to16
, getIReg64orZR(tt
)));
5156 DIP("strh %s, %s\n", nameIReg32orZR(tt
), dis_buf
);
5161 putIReg64orZR(tt
, unop(Iop_8Uto64
,
5162 loadLE(Ity_I8
, mkexpr(ea
))));
5163 DIP("ldrub %s, %s\n", nameIReg32orZR(tt
), dis_buf
);
5165 storeLE(mkexpr(ea
), unop(Iop_64to8
, getIReg64orZR(tt
)));
5166 DIP("strb %s, %s\n", nameIReg32orZR(tt
), dis_buf
);
5176 /* -------------- LDRS{B,H,W} (uimm12) -------------- */
5177 /* 31 29 26 23 21 9 4
5178 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4]
5179 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2]
5180 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1]
5182 Rt is Wt when x==1, Xt when x==0
5184 if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
5185 /* Further checks on bits 31:30 and 22 */
5187 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5189 case BITS3(0,1,0): case BITS3(0,1,1):
5190 case BITS3(0,0,0): case BITS3(0,0,1):
5195 UInt szLg2
= INSN(31,30);
5196 UInt bitX
= INSN(22,22);
5197 UInt imm12
= INSN(21,10);
5198 UInt nn
= INSN(9,5);
5199 UInt tt
= INSN(4,0);
5200 UInt szB
= 1 << szLg2
;
5201 IRExpr
* ea
= binop(Iop_Add64
,
5202 getIReg64orSP(nn
), mkU64(imm12
* szB
));
5206 putIReg64orZR(tt
, unop(Iop_32Sto64
, loadLE(Ity_I32
, ea
)));
5207 DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt
),
5208 nameIReg64orSP(nn
), imm12
* szB
);
5212 putIReg32orZR(tt
, unop(Iop_16Sto32
, loadLE(Ity_I16
, ea
)));
5214 putIReg64orZR(tt
, unop(Iop_16Sto64
, loadLE(Ity_I16
, ea
)));
5216 DIP("ldrsh %s, [%s, #%u]\n",
5217 nameIRegOrZR(bitX
== 0, tt
),
5218 nameIReg64orSP(nn
), imm12
* szB
);
5222 putIReg32orZR(tt
, unop(Iop_8Sto32
, loadLE(Ity_I8
, ea
)));
5224 putIReg64orZR(tt
, unop(Iop_8Sto64
, loadLE(Ity_I8
, ea
)));
5226 DIP("ldrsb %s, [%s, #%u]\n",
5227 nameIRegOrZR(bitX
== 0, tt
),
5228 nameIReg64orSP(nn
), imm12
* szB
);
5235 /* else fall through */
5238 /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
5239 /* (at-Rn-then-Rn=EA)
5240 31 29 23 21 20 11 9 4
5241 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9
5242 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9
5243 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9
5246 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]!
5247 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]!
5248 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]!
5250 Rt is Wt when x==1, Xt when x==0
5251 transfer-at-Rn when [11]==0, at EA when [11]==1
5253 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5254 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5255 /* Further checks on bits 31:30 and 22 */
5257 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5258 case BITS3(1,0,0): // LDRSW Xt
5259 case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
5260 case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
5265 UInt szLg2
= INSN(31,30);
5266 UInt imm9
= INSN(20,12);
5267 Bool atRN
= INSN(11,11) == 0;
5268 UInt nn
= INSN(9,5);
5269 UInt tt
= INSN(4,0);
5270 IRTemp tRN
= newTemp(Ity_I64
);
5271 IRTemp tEA
= newTemp(Ity_I64
);
5272 IRTemp tTA
= IRTemp_INVALID
;
5273 ULong simm9
= sx_to_64(imm9
, 9);
5274 Bool is64
= INSN(22,22) == 0;
5275 assign(tRN
, getIReg64orSP(nn
));
5276 assign(tEA
, binop(Iop_Add64
, mkexpr(tRN
), mkU64(simm9
)));
5277 tTA
= atRN
? tRN
: tEA
;
5279 /* There are 5 cases:
5281 byte load, SX to 32, ZX to 64
5282 halfword load, SX to 64
5283 halfword load, SX to 32, ZX to 64
5285 The ifs below handle them in the listed order.
5290 putIReg64orZR(tt
, unop(Iop_8Sto64
,
5291 loadLE(Ity_I8
, mkexpr(tTA
))));
5293 putIReg32orZR(tt
, unop(Iop_8Sto32
,
5294 loadLE(Ity_I8
, mkexpr(tTA
))));
5297 else if (szLg2
== 1) {
5300 putIReg64orZR(tt
, unop(Iop_16Sto64
,
5301 loadLE(Ity_I16
, mkexpr(tTA
))));
5303 putIReg32orZR(tt
, unop(Iop_16Sto32
,
5304 loadLE(Ity_I16
, mkexpr(tTA
))));
5307 else if (szLg2
== 2 && is64
) {
5309 putIReg64orZR(tt
, unop(Iop_32Sto64
,
5310 loadLE(Ity_I32
, mkexpr(tTA
))));
5315 putIReg64orSP(nn
, mkexpr(tEA
));
5316 DIP(atRN
? "ldrs%c %s, [%s], #%llu\n" : "ldrs%c %s, [%s, #%llu]!",
5317 ch
, nameIRegOrZR(is64
, tt
), nameIReg64orSP(nn
), simm9
);
5320 /* else fall through */
5323 /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
5324 /* 31 29 23 21 20 11 9 4
5325 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9]
5326 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9]
5327 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9]
5329 Rt is Wt when x==1, Xt when x==0
5331 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5332 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5333 /* Further checks on bits 31:30 and 22 */
5335 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5336 case BITS3(1,0,0): // LDURSW Xt
5337 case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
5338 case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
5343 UInt szLg2
= INSN(31,30);
5344 UInt imm9
= INSN(20,12);
5345 UInt nn
= INSN(9,5);
5346 UInt tt
= INSN(4,0);
5347 IRTemp tRN
= newTemp(Ity_I64
);
5348 IRTemp tEA
= newTemp(Ity_I64
);
5349 ULong simm9
= sx_to_64(imm9
, 9);
5350 Bool is64
= INSN(22,22) == 0;
5351 assign(tRN
, getIReg64orSP(nn
));
5352 assign(tEA
, binop(Iop_Add64
, mkexpr(tRN
), mkU64(simm9
)));
5354 /* There are 5 cases:
5356 byte load, SX to 32, ZX to 64
5357 halfword load, SX to 64
5358 halfword load, SX to 32, ZX to 64
5360 The ifs below handle them in the listed order.
5365 putIReg64orZR(tt
, unop(Iop_8Sto64
,
5366 loadLE(Ity_I8
, mkexpr(tEA
))));
5368 putIReg32orZR(tt
, unop(Iop_8Sto32
,
5369 loadLE(Ity_I8
, mkexpr(tEA
))));
5372 else if (szLg2
== 1) {
5375 putIReg64orZR(tt
, unop(Iop_16Sto64
,
5376 loadLE(Ity_I16
, mkexpr(tEA
))));
5378 putIReg32orZR(tt
, unop(Iop_16Sto32
,
5379 loadLE(Ity_I16
, mkexpr(tEA
))));
5382 else if (szLg2
== 2 && is64
) {
5384 putIReg64orZR(tt
, unop(Iop_32Sto64
,
5385 loadLE(Ity_I32
, mkexpr(tEA
))));
5390 DIP("ldurs%c %s, [%s, #%lld]",
5391 ch
, nameIRegOrZR(is64
, tt
), nameIReg64orSP(nn
), (Long
)simm9
);
5394 /* else fall through */
5397 /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
5400 sz==00 => 32 bit (S) transfers
5401 sz==01 => 64 bit (D) transfers
5402 sz==10 => 128 bit (Q) transfers
5403 sz==11 isn't allowed
5404 simm7 is scaled by the (single-register) transfer size
5406 31 29 26 22 21 14 9 4
5408 sz 101 1000 L imm7 t2 n t1 mmNP SDQt1, SDQt2, [Xn|SP, #imm]
5409 (at-EA, with nontemporal hint)
5411 sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm
5414 sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]
5417 sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]!
5420 if (INSN(29,25) == BITS5(1,0,1,1,0)) {
5421 UInt szSlg2
= INSN(31,30); // log2 of the xfer size in 32-bit units
5422 Bool isLD
= INSN(22,22) == 1;
5423 Bool wBack
= INSN(23,23) == 1;
5424 Long simm7
= (Long
)sx_to_64(INSN(21,15), 7);
5425 UInt tt2
= INSN(14,10);
5426 UInt nn
= INSN(9,5);
5427 UInt tt1
= INSN(4,0);
5428 if (szSlg2
== BITS2(1,1) || (isLD
&& tt1
== tt2
)) {
5429 /* undecodable; fall through */
5431 if (nn
== 31) { /* FIXME generate stack alignment check */ }
5433 // Compute the transfer address TA and the writeback address WA.
5434 UInt szB
= 4 << szSlg2
; /* szB is the per-register size */
5435 IRTemp tRN
= newTemp(Ity_I64
);
5436 assign(tRN
, getIReg64orSP(nn
));
5437 IRTemp tEA
= newTemp(Ity_I64
);
5438 simm7
= szB
* simm7
;
5439 assign(tEA
, binop(Iop_Add64
, mkexpr(tRN
), mkU64(simm7
)));
5441 IRTemp tTA
= newTemp(Ity_I64
);
5442 IRTemp tWA
= newTemp(Ity_I64
);
5443 switch (INSN(24,23)) {
5445 assign(tTA
, mkexpr(tRN
)); assign(tWA
, mkexpr(tEA
)); break;
5447 assign(tTA
, mkexpr(tEA
)); assign(tWA
, mkexpr(tEA
)); break;
5450 assign(tTA
, mkexpr(tEA
)); /* tWA is unused */ break;
5452 vassert(0); /* NOTREACHED */
5455 IRType ty
= Ity_INVALID
;
5457 case 4: ty
= Ity_F32
; break;
5458 case 8: ty
= Ity_F64
; break;
5459 case 16: ty
= Ity_V128
; break;
5460 default: vassert(0);
5463 /* Normally rN would be updated after the transfer. However, in
5464 the special cases typifed by
5465 stp q0, q1, [sp,#-512]!
5466 stp d0, d1, [sp,#-512]!
5467 stp s0, s1, [sp,#-512]!
5468 it is necessary to update SP before the transfer, (1)
5469 because Memcheck will otherwise complain about a write
5470 below the stack pointer, and (2) because the segfault
5471 stack extension mechanism will otherwise extend the stack
5472 only down to SP before the instruction, which might not be
5473 far enough, if the -512 bit takes the actual access
5474 address to the next page.
5477 = wBack
&& simm7
< 0
5478 && INSN(24,23) == BITS2(1,1) && nn
== 31 && !isLD
;
5480 if (wBack
&& earlyWBack
)
5481 putIReg64orSP(nn
, mkexpr(tEA
));
5485 putQReg128(tt1
, mkV128(0x0000));
5488 loadLE(ty
, binop(Iop_Add64
, mkexpr(tTA
), mkU64(0))));
5490 putQReg128(tt2
, mkV128(0x0000));
5493 loadLE(ty
, binop(Iop_Add64
, mkexpr(tTA
), mkU64(szB
))));
5495 storeLE(binop(Iop_Add64
, mkexpr(tTA
), mkU64(0)),
5496 getQRegLO(tt1
, ty
));
5497 storeLE(binop(Iop_Add64
, mkexpr(tTA
), mkU64(szB
)),
5498 getQRegLO(tt2
, ty
));
5501 if (wBack
&& !earlyWBack
)
5502 putIReg64orSP(nn
, mkexpr(tEA
));
5504 const HChar
* fmt_str
= NULL
;
5505 switch (INSN(24,23)) {
5507 fmt_str
= "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5510 fmt_str
= "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5513 fmt_str
= "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
5516 fmt_str
= "%snp %s, %s, [%s, #%lld] (at-Rn)\n";
5521 DIP(fmt_str
, isLD
? "ld" : "st",
5522 nameQRegLO(tt1
, ty
), nameQRegLO(tt2
, ty
),
5523 nameIReg64orSP(nn
), simm7
);
5528 /* -------------- {LD,ST}R (vector register) --------------- */
5529 /* 31 29 23 20 15 12 11 9 4
5531 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R<m>{ext/sh}]
5532 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R<m>{ext/sh}]
5533 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R<m>{ext/sh}]
5534 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R<m>{ext/sh}]
5535 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R<m>{ext/sh}]
5537 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R<m>{ext/sh}]
5538 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R<m>{ext/sh}]
5539 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R<m>{ext/sh}]
5540 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R<m>{ext/sh}]
5541 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R<m>{ext/sh}]
5543 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5544 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5546 UInt szLg2
= (INSN(23,23) << 2) | INSN(31,30);
5547 Bool isLD
= INSN(22,22) == 1;
5548 UInt tt
= INSN(4,0);
5549 if (szLg2
> 4) goto after_LDR_STR_vector_register
;
5550 IRTemp ea
= gen_indexed_EA(dis_buf
, insn
, False
/*to/from vec regs*/);
5551 if (ea
== IRTemp_INVALID
) goto after_LDR_STR_vector_register
;
5555 putQReg128(tt
, mkV128(0x0000));
5556 putQRegLO(tt
, loadLE(Ity_I8
, mkexpr(ea
)));
5557 DIP("ldr %s, %s\n", nameQRegLO(tt
, Ity_I8
), dis_buf
);
5559 storeLE(mkexpr(ea
), getQRegLO(tt
, Ity_I8
));
5560 DIP("str %s, %s\n", nameQRegLO(tt
, Ity_I8
), dis_buf
);
5565 putQReg128(tt
, mkV128(0x0000));
5566 putQRegLO(tt
, loadLE(Ity_I16
, mkexpr(ea
)));
5567 DIP("ldr %s, %s\n", nameQRegLO(tt
, Ity_I16
), dis_buf
);
5569 storeLE(mkexpr(ea
), getQRegLO(tt
, Ity_I16
));
5570 DIP("str %s, %s\n", nameQRegLO(tt
, Ity_I16
), dis_buf
);
5573 case 2: /* 32 bit */
5575 putQReg128(tt
, mkV128(0x0000));
5576 putQRegLO(tt
, loadLE(Ity_I32
, mkexpr(ea
)));
5577 DIP("ldr %s, %s\n", nameQRegLO(tt
, Ity_I32
), dis_buf
);
5579 storeLE(mkexpr(ea
), getQRegLO(tt
, Ity_I32
));
5580 DIP("str %s, %s\n", nameQRegLO(tt
, Ity_I32
), dis_buf
);
5583 case 3: /* 64 bit */
5585 putQReg128(tt
, mkV128(0x0000));
5586 putQRegLO(tt
, loadLE(Ity_I64
, mkexpr(ea
)));
5587 DIP("ldr %s, %s\n", nameQRegLO(tt
, Ity_I64
), dis_buf
);
5589 storeLE(mkexpr(ea
), getQRegLO(tt
, Ity_I64
));
5590 DIP("str %s, %s\n", nameQRegLO(tt
, Ity_I64
), dis_buf
);
5595 putQReg128(tt
, loadLE(Ity_V128
, mkexpr(ea
)));
5596 DIP("ldr %s, %s\n", nameQReg128(tt
), dis_buf
);
5598 storeLE(mkexpr(ea
), getQReg128(tt
));
5599 DIP("str %s, %s\n", nameQReg128(tt
), dis_buf
);
5607 after_LDR_STR_vector_register
:
5609 /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
5610 /* 31 29 22 20 15 12 11 9 4
5612 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
5614 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
5615 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
5617 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
5618 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
5620 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5621 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5623 UInt szLg2
= INSN(31,30);
5624 Bool sxTo64
= INSN(22,22) == 0; // else sx to 32 and zx to 64
5625 UInt tt
= INSN(4,0);
5626 if (szLg2
== 3) goto after_LDRS_integer_register
;
5627 IRTemp ea
= gen_indexed_EA(dis_buf
, insn
, True
/*to/from int regs*/);
5628 if (ea
== IRTemp_INVALID
) goto after_LDRS_integer_register
;
5629 /* Enumerate the 5 variants explicitly. */
5630 if (szLg2
== 2/*32 bit*/ && sxTo64
) {
5631 putIReg64orZR(tt
, unop(Iop_32Sto64
, loadLE(Ity_I32
, mkexpr(ea
))));
5632 DIP("ldrsw %s, %s\n", nameIReg64orZR(tt
), dis_buf
);
5636 if (szLg2
== 1/*16 bit*/) {
5638 putIReg64orZR(tt
, unop(Iop_16Sto64
, loadLE(Ity_I16
, mkexpr(ea
))));
5639 DIP("ldrsh %s, %s\n", nameIReg64orZR(tt
), dis_buf
);
5641 putIReg32orZR(tt
, unop(Iop_16Sto32
, loadLE(Ity_I16
, mkexpr(ea
))));
5642 DIP("ldrsh %s, %s\n", nameIReg32orZR(tt
), dis_buf
);
5647 if (szLg2
== 0/*8 bit*/) {
5649 putIReg64orZR(tt
, unop(Iop_8Sto64
, loadLE(Ity_I8
, mkexpr(ea
))));
5650 DIP("ldrsb %s, %s\n", nameIReg64orZR(tt
), dis_buf
);
5652 putIReg32orZR(tt
, unop(Iop_8Sto32
, loadLE(Ity_I8
, mkexpr(ea
))));
5653 DIP("ldrsb %s, %s\n", nameIReg32orZR(tt
), dis_buf
);
5657 /* else it's an invalid combination */
5659 after_LDRS_integer_register
:
5661 /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
5662 /* This is the Unsigned offset variant only. The Post-Index and
5663 Pre-Index variants are below.
5666 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1]
5667 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2]
5668 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4]
5669 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8]
5670 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16]
5672 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1]
5673 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2]
5674 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4]
5675 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8]
5676 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16]
5678 if (INSN(29,24) == BITS6(1,1,1,1,0,1)
5679 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
5680 UInt szLg2
= (INSN(23,23) << 2) | INSN(31,30);
5681 Bool isLD
= INSN(22,22) == 1;
5682 UInt pimm12
= INSN(21,10) << szLg2
;
5683 UInt nn
= INSN(9,5);
5684 UInt tt
= INSN(4,0);
5685 IRTemp tEA
= newTemp(Ity_I64
);
5686 IRType ty
= preferredVectorSubTypeFromSize(1 << szLg2
);
5687 assign(tEA
, binop(Iop_Add64
, getIReg64orSP(nn
), mkU64(pimm12
)));
5690 putQReg128(tt
, mkV128(0x0000));
5692 putQRegLO(tt
, loadLE(ty
, mkexpr(tEA
)));
5694 storeLE(mkexpr(tEA
), getQRegLO(tt
, ty
));
5696 DIP("%s %s, [%s, #%u]\n",
5697 isLD
? "ldr" : "str",
5698 nameQRegLO(tt
, ty
), nameIReg64orSP(nn
), pimm12
);
5702 /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
5703 /* These are the Post-Index and Pre-Index variants.
5707 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm
5708 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm
5709 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm
5710 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm
5711 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm
5714 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]!
5715 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]!
5716 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]!
5717 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]!
5718 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]!
5720 Stores are the same except with bit 22 set to 0.
5722 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5723 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5724 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5725 UInt szLg2
= (INSN(23,23) << 2) | INSN(31,30);
5726 Bool isLD
= INSN(22,22) == 1;
5727 UInt imm9
= INSN(20,12);
5728 Bool atRN
= INSN(11,11) == 0;
5729 UInt nn
= INSN(9,5);
5730 UInt tt
= INSN(4,0);
5731 IRTemp tRN
= newTemp(Ity_I64
);
5732 IRTemp tEA
= newTemp(Ity_I64
);
5733 IRTemp tTA
= IRTemp_INVALID
;
5734 IRType ty
= preferredVectorSubTypeFromSize(1 << szLg2
);
5735 ULong simm9
= sx_to_64(imm9
, 9);
5736 assign(tRN
, getIReg64orSP(nn
));
5737 assign(tEA
, binop(Iop_Add64
, mkexpr(tRN
), mkU64(simm9
)));
5738 tTA
= atRN
? tRN
: tEA
;
5740 /* Do early writeback for the cases typified by
5742 str d10, [sp, #-128]!
5744 for the same reasons as described in a similar comment in the
5745 "LDP,STP (immediate, simm7) (FP&VEC)" case just above.
5748 = !atRN
&& !isLD
&& (ty
== Ity_F64
|| ty
== Ity_V128
)
5749 && nn
== 31 && ((Long
)simm9
) < 0;
5752 putIReg64orSP(nn
, mkexpr(tEA
));
5756 putQReg128(tt
, mkV128(0x0000));
5758 putQRegLO(tt
, loadLE(ty
, mkexpr(tTA
)));
5760 storeLE(mkexpr(tTA
), getQRegLO(tt
, ty
));
5764 putIReg64orSP(nn
, mkexpr(tEA
));
5766 DIP(atRN
? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
5767 isLD
? "ldr" : "str",
5768 nameQRegLO(tt
, ty
), nameIReg64orSP(nn
), (Long
)simm9
);
5772 /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
5773 /* 31 29 23 20 11 9 4
5774 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm]
5775 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm]
5776 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm]
5777 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm]
5778 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm]
5780 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm]
5781 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm]
5782 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm]
5783 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm]
5784 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm]
5786 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5787 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5788 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5789 UInt szLg2
= (INSN(23,23) << 2) | INSN(31,30);
5790 Bool isLD
= INSN(22,22) == 1;
5791 UInt imm9
= INSN(20,12);
5792 UInt nn
= INSN(9,5);
5793 UInt tt
= INSN(4,0);
5794 ULong simm9
= sx_to_64(imm9
, 9);
5795 IRTemp tEA
= newTemp(Ity_I64
);
5796 IRType ty
= preferredVectorSubTypeFromSize(1 << szLg2
);
5797 assign(tEA
, binop(Iop_Add64
, getIReg64orSP(nn
), mkU64(simm9
)));
5800 putQReg128(tt
, mkV128(0x0000));
5802 putQRegLO(tt
, loadLE(ty
, mkexpr(tEA
)));
5804 storeLE(mkexpr(tEA
), getQRegLO(tt
, ty
));
5806 DIP("%s %s, [%s, #%lld]\n",
5807 isLD
? "ldur" : "stur",
5808 nameQRegLO(tt
, ty
), nameIReg64orSP(nn
), (Long
)simm9
);
5812 /* ---------------- LDR (literal, SIMD&FP) ---------------- */
5814 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)]
5815 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)]
5816 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)]
5818 if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
5819 UInt szB
= 4 << INSN(31,30);
5820 UInt imm19
= INSN(23,5);
5821 UInt tt
= INSN(4,0);
5822 ULong ea
= guest_PC_curr_instr
+ sx_to_64(imm19
<< 2, 21);
5823 IRType ty
= preferredVectorSubTypeFromSize(szB
);
5824 putQReg128(tt
, mkV128(0x0000));
5825 putQRegLO(tt
, loadLE(ty
, mkU64(ea
)));
5826 DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt
, ty
), ea
);
5830 /* ------ LD1/ST1 (multiple 1-elem structs to/from 1 reg ------ */
5831 /* ------ LD2/ST2 (multiple 2-elem structs to/from 2 regs ------ */
5832 /* ------ LD3/ST3 (multiple 3-elem structs to/from 3 regs ------ */
5833 /* ------ LD4/ST4 (multiple 4-elem structs to/from 4 regs ------ */
5834 /* 31 29 26 22 21 20 15 11 9 4
5836 0q 001 1000 L 0 00000 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP]
5837 0q 001 1001 L 0 m 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP], step
5839 0q 001 1000 L 0 00000 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP]
5840 0q 001 1001 L 0 m 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP], step
5842 0q 001 1000 L 0 00000 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP]
5843 0q 001 1001 L 0 m 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP], step
5845 0q 001 1000 L 0 00000 0111 sz n t xx1 {Vt.T}, [Xn|SP]
5846 0q 001 1001 L 0 m 0111 sz n t xx1 {Vt.T}, [Xn|SP], step
5848 T = defined by Q and sz in the normal way
5849 step = if m == 11111 then transfer-size else Xm
5850 xx = case L of 1 -> LD ; 0 -> ST
5852 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
5853 && INSN(21,21) == 0) {
5854 Bool bitQ
= INSN(30,30);
5855 Bool isPX
= INSN(23,23) == 1;
5856 Bool isLD
= INSN(22,22) == 1;
5857 UInt mm
= INSN(20,16);
5858 UInt opc
= INSN(15,12);
5859 UInt sz
= INSN(11,10);
5860 UInt nn
= INSN(9,5);
5861 UInt tt
= INSN(4,0);
5862 Bool isQ
= bitQ
== 1;
5863 Bool is1d
= sz
== BITS2(1,1) && !isQ
;
5866 case BITS4(0,0,0,0): nRegs
= 4; break;
5867 case BITS4(0,1,0,0): nRegs
= 3; break;
5868 case BITS4(1,0,0,0): nRegs
= 2; break;
5869 case BITS4(0,1,1,1): nRegs
= 1; break;
5873 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
5874 If we see it, set nRegs to 0 so as to cause the next conditional
5876 if (!isPX
&& mm
!= 0)
5879 if (nRegs
== 1 /* .1d is allowed */
5880 || (nRegs
>= 2 && nRegs
<= 4 && !is1d
) /* .1d is not allowed */) {
5882 UInt xferSzB
= (isQ
? 16 : 8) * nRegs
;
5884 /* Generate the transfer address (TA) and if necessary the
5885 writeback address (WB) */
5886 IRTemp tTA
= newTemp(Ity_I64
);
5887 assign(tTA
, getIReg64orSP(nn
));
5888 if (nn
== 31) { /* FIXME generate stack alignment check */ }
5889 IRTemp tWB
= IRTemp_INVALID
;
5891 tWB
= newTemp(Ity_I64
);
5892 assign(tWB
, binop(Iop_Add64
,
5894 mm
== BITS5(1,1,1,1,1) ? mkU64(xferSzB
)
5895 : getIReg64orZR(mm
)));
5898 /* -- BEGIN generate the transfers -- */
5900 IRTemp u0
, u1
, u2
, u3
, i0
, i1
, i2
, i3
;
5901 u0
= u1
= u2
= u3
= i0
= i1
= i2
= i3
= IRTemp_INVALID
;
5903 case 4: u3
= newTempV128(); i3
= newTempV128(); /* fallthru */
5904 case 3: u2
= newTempV128(); i2
= newTempV128(); /* fallthru */
5905 case 2: u1
= newTempV128(); i1
= newTempV128(); /* fallthru */
5906 case 1: u0
= newTempV128(); i0
= newTempV128(); break;
5907 default: vassert(0);
5910 /* -- Multiple 128 or 64 bit stores -- */
5913 case 4: assign(u3
, getQReg128((tt
+3) % 32)); /* fallthru */
5914 case 3: assign(u2
, getQReg128((tt
+2) % 32)); /* fallthru */
5915 case 2: assign(u1
, getQReg128((tt
+1) % 32)); /* fallthru */
5916 case 1: assign(u0
, getQReg128((tt
+0) % 32)); break;
5917 default: vassert(0);
5920 case 4: (isQ
? math_INTERLEAVE4_128
: math_INTERLEAVE4_64
)
5921 (&i0
, &i1
, &i2
, &i3
, sz
, u0
, u1
, u2
, u3
);
5923 case 3: (isQ
? math_INTERLEAVE3_128
: math_INTERLEAVE3_64
)
5924 (&i0
, &i1
, &i2
, sz
, u0
, u1
, u2
);
5926 case 2: (isQ
? math_INTERLEAVE2_128
: math_INTERLEAVE2_64
)
5927 (&i0
, &i1
, sz
, u0
, u1
);
5929 case 1: (isQ
? math_INTERLEAVE1_128
: math_INTERLEAVE1_64
)
5932 default: vassert(0);
5934 # define MAYBE_NARROW_TO_64(_expr) \
5935 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
5936 UInt step
= isQ
? 16 : 8;
5938 case 4: storeLE( binop(Iop_Add64
, mkexpr(tTA
), mkU64(3*step
)),
5939 MAYBE_NARROW_TO_64(mkexpr(i3
)) );
5941 case 3: storeLE( binop(Iop_Add64
, mkexpr(tTA
), mkU64(2*step
)),
5942 MAYBE_NARROW_TO_64(mkexpr(i2
)) );
5944 case 2: storeLE( binop(Iop_Add64
, mkexpr(tTA
), mkU64(1*step
)),
5945 MAYBE_NARROW_TO_64(mkexpr(i1
)) );
5947 case 1: storeLE( binop(Iop_Add64
, mkexpr(tTA
), mkU64(0*step
)),
5948 MAYBE_NARROW_TO_64(mkexpr(i0
)) );
5950 default: vassert(0);
5952 # undef MAYBE_NARROW_TO_64
5955 /* -- Multiple 128 or 64 bit loads -- */
5957 UInt step
= isQ
? 16 : 8;
5958 IRType loadTy
= isQ
? Ity_V128
: Ity_I64
;
5959 # define MAYBE_WIDEN_FROM_64(_expr) \
5960 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
5963 assign(i3
, MAYBE_WIDEN_FROM_64(
5965 binop(Iop_Add64
, mkexpr(tTA
),
5966 mkU64(3 * step
)))));
5969 assign(i2
, MAYBE_WIDEN_FROM_64(
5971 binop(Iop_Add64
, mkexpr(tTA
),
5972 mkU64(2 * step
)))));
5975 assign(i1
, MAYBE_WIDEN_FROM_64(
5977 binop(Iop_Add64
, mkexpr(tTA
),
5978 mkU64(1 * step
)))));
5981 assign(i0
, MAYBE_WIDEN_FROM_64(
5983 binop(Iop_Add64
, mkexpr(tTA
),
5984 mkU64(0 * step
)))));
5989 # undef MAYBE_WIDEN_FROM_64
5991 case 4: (isQ
? math_DEINTERLEAVE4_128
: math_DEINTERLEAVE4_64
)
5992 (&u0
, &u1
, &u2
, &u3
, sz
, i0
,i1
,i2
,i3
);
5994 case 3: (isQ
? math_DEINTERLEAVE3_128
: math_DEINTERLEAVE3_64
)
5995 (&u0
, &u1
, &u2
, sz
, i0
, i1
, i2
);
5997 case 2: (isQ
? math_DEINTERLEAVE2_128
: math_DEINTERLEAVE2_64
)
5998 (&u0
, &u1
, sz
, i0
, i1
);
6000 case 1: (isQ
? math_DEINTERLEAVE1_128
: math_DEINTERLEAVE1_64
)
6003 default: vassert(0);
6006 case 4: putQReg128( (tt
+3) % 32,
6007 math_MAYBE_ZERO_HI64(bitQ
, u3
));
6009 case 3: putQReg128( (tt
+2) % 32,
6010 math_MAYBE_ZERO_HI64(bitQ
, u2
));
6012 case 2: putQReg128( (tt
+1) % 32,
6013 math_MAYBE_ZERO_HI64(bitQ
, u1
));
6015 case 1: putQReg128( (tt
+0) % 32,
6016 math_MAYBE_ZERO_HI64(bitQ
, u0
));
6018 default: vassert(0);
6022 /* -- END generate the transfers -- */
6024 /* Do the writeback, if necessary */
6026 putIReg64orSP(nn
, mkexpr(tWB
));
6030 pxStr
[0] = pxStr
[sizeof(pxStr
)-1] = 0;
6032 if (mm
== BITS5(1,1,1,1,1))
6033 vex_sprintf(pxStr
, ", #%u", xferSzB
);
6035 vex_sprintf(pxStr
, ", %s", nameIReg64orZR(mm
));
6037 const HChar
* arr
= nameArr_Q_SZ(bitQ
, sz
);
6038 DIP("%s%u {v%u.%s .. v%u.%s}, [%s]%s\n",
6039 isLD
? "ld" : "st", nRegs
,
6040 (tt
+0) % 32, arr
, (tt
+nRegs
-1) % 32, arr
, nameIReg64orSP(nn
),
6045 /* else fall through */
6048 /* ------ LD1/ST1 (multiple 1-elem structs to/from 2 regs ------ */
6049 /* ------ LD1/ST1 (multiple 1-elem structs to/from 3 regs ------ */
6050 /* ------ LD1/ST1 (multiple 1-elem structs to/from 4 regs ------ */
6051 /* 31 29 26 22 21 20 15 11 9 4
6053 0q 001 1000 L 0 00000 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP]
6054 0q 001 1001 L 0 m 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP], step
6056 0q 001 1000 L 0 00000 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP]
6057 0q 001 1001 L 0 m 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP], step
6059 0q 001 1000 L 0 00000 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP]
6060 0q 001 1001 L 0 m 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP], step
6062 T = defined by Q and sz in the normal way
6063 step = if m == 11111 then transfer-size else Xm
6064 xx = case L of 1 -> LD ; 0 -> ST
6066 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
6067 && INSN(21,21) == 0) {
6068 Bool bitQ
= INSN(30,30);
6069 Bool isPX
= INSN(23,23) == 1;
6070 Bool isLD
= INSN(22,22) == 1;
6071 UInt mm
= INSN(20,16);
6072 UInt opc
= INSN(15,12);
6073 UInt sz
= INSN(11,10);
6074 UInt nn
= INSN(9,5);
6075 UInt tt
= INSN(4,0);
6076 Bool isQ
= bitQ
== 1;
6079 case BITS4(0,0,1,0): nRegs
= 4; break;
6080 case BITS4(0,1,1,0): nRegs
= 3; break;
6081 case BITS4(1,0,1,0): nRegs
= 2; break;
6085 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
6086 If we see it, set nRegs to 0 so as to cause the next conditional
6088 if (!isPX
&& mm
!= 0)
6091 if (nRegs
>= 2 && nRegs
<= 4) {
6093 UInt xferSzB
= (isQ
? 16 : 8) * nRegs
;
6095 /* Generate the transfer address (TA) and if necessary the
6096 writeback address (WB) */
6097 IRTemp tTA
= newTemp(Ity_I64
);
6098 assign(tTA
, getIReg64orSP(nn
));
6099 if (nn
== 31) { /* FIXME generate stack alignment check */ }
6100 IRTemp tWB
= IRTemp_INVALID
;
6102 tWB
= newTemp(Ity_I64
);
6103 assign(tWB
, binop(Iop_Add64
,
6105 mm
== BITS5(1,1,1,1,1) ? mkU64(xferSzB
)
6106 : getIReg64orZR(mm
)));
6109 /* -- BEGIN generate the transfers -- */
6111 IRTemp u0
, u1
, u2
, u3
;
6112 u0
= u1
= u2
= u3
= IRTemp_INVALID
;
6114 case 4: u3
= newTempV128(); /* fallthru */
6115 case 3: u2
= newTempV128(); /* fallthru */
6116 case 2: u1
= newTempV128();
6117 u0
= newTempV128(); break;
6118 default: vassert(0);
6121 /* -- Multiple 128 or 64 bit stores -- */
6124 case 4: assign(u3
, getQReg128((tt
+3) % 32)); /* fallthru */
6125 case 3: assign(u2
, getQReg128((tt
+2) % 32)); /* fallthru */
6126 case 2: assign(u1
, getQReg128((tt
+1) % 32));
6127 assign(u0
, getQReg128((tt
+0) % 32)); break;
6128 default: vassert(0);
6130 # define MAYBE_NARROW_TO_64(_expr) \
6131 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
6132 UInt step
= isQ
? 16 : 8;
6134 case 4: storeLE( binop(Iop_Add64
, mkexpr(tTA
), mkU64(3*step
)),
6135 MAYBE_NARROW_TO_64(mkexpr(u3
)) );
6137 case 3: storeLE( binop(Iop_Add64
, mkexpr(tTA
), mkU64(2*step
)),
6138 MAYBE_NARROW_TO_64(mkexpr(u2
)) );
6140 case 2: storeLE( binop(Iop_Add64
, mkexpr(tTA
), mkU64(1*step
)),
6141 MAYBE_NARROW_TO_64(mkexpr(u1
)) );
6142 storeLE( binop(Iop_Add64
, mkexpr(tTA
), mkU64(0*step
)),
6143 MAYBE_NARROW_TO_64(mkexpr(u0
)) );
6145 default: vassert(0);
6147 # undef MAYBE_NARROW_TO_64
6150 /* -- Multiple 128 or 64 bit loads -- */
6152 UInt step
= isQ
? 16 : 8;
6153 IRType loadTy
= isQ
? Ity_V128
: Ity_I64
;
6154 # define MAYBE_WIDEN_FROM_64(_expr) \
6155 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
6158 assign(u3
, MAYBE_WIDEN_FROM_64(
6160 binop(Iop_Add64
, mkexpr(tTA
),
6161 mkU64(3 * step
)))));
6164 assign(u2
, MAYBE_WIDEN_FROM_64(
6166 binop(Iop_Add64
, mkexpr(tTA
),
6167 mkU64(2 * step
)))));
6170 assign(u1
, MAYBE_WIDEN_FROM_64(
6172 binop(Iop_Add64
, mkexpr(tTA
),
6173 mkU64(1 * step
)))));
6174 assign(u0
, MAYBE_WIDEN_FROM_64(
6176 binop(Iop_Add64
, mkexpr(tTA
),
6177 mkU64(0 * step
)))));
6182 # undef MAYBE_WIDEN_FROM_64
6184 case 4: putQReg128( (tt
+3) % 32,
6185 math_MAYBE_ZERO_HI64(bitQ
, u3
));
6187 case 3: putQReg128( (tt
+2) % 32,
6188 math_MAYBE_ZERO_HI64(bitQ
, u2
));
6190 case 2: putQReg128( (tt
+1) % 32,
6191 math_MAYBE_ZERO_HI64(bitQ
, u1
));
6192 putQReg128( (tt
+0) % 32,
6193 math_MAYBE_ZERO_HI64(bitQ
, u0
));
6195 default: vassert(0);
6199 /* -- END generate the transfers -- */
6201 /* Do the writeback, if necessary */
6203 putIReg64orSP(nn
, mkexpr(tWB
));
6207 pxStr
[0] = pxStr
[sizeof(pxStr
)-1] = 0;
6209 if (mm
== BITS5(1,1,1,1,1))
6210 vex_sprintf(pxStr
, ", #%u", xferSzB
);
6212 vex_sprintf(pxStr
, ", %s", nameIReg64orZR(mm
));
6214 const HChar
* arr
= nameArr_Q_SZ(bitQ
, sz
);
6215 DIP("%s1 {v%u.%s .. v%u.%s}, [%s]%s\n",
6217 (tt
+0) % 32, arr
, (tt
+nRegs
-1) % 32, arr
, nameIReg64orSP(nn
),
6222 /* else fall through */
6225 /* ---------- LD1R (single structure, replicate) ---------- */
6226 /* ---------- LD2R (single structure, replicate) ---------- */
6227 /* ---------- LD3R (single structure, replicate) ---------- */
6228 /* ---------- LD4R (single structure, replicate) ---------- */
6229 /* 31 29 22 20 15 11 9 4
6230 0q 001 1010 10 00000 110 0 sz n t LD1R {Vt.T}, [Xn|SP]
6231 0q 001 1011 10 m 110 0 sz n t LD1R {Vt.T}, [Xn|SP], step
6233 0q 001 1010 11 00000 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP]
6234 0q 001 1011 11 m 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP], step
6236 0q 001 1010 10 00000 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP]
6237 0q 001 1011 10 m 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP], step
6239 0q 001 1010 11 00000 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP]
6240 0q 001 1011 11 m 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP], step
6242 step = if m == 11111 then transfer-size else Xm
6244 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)
6245 && INSN(22,22) == 1 && INSN(15,14) == BITS2(1,1)
6246 && INSN(12,12) == 0) {
6247 UInt bitQ
= INSN(30,30);
6248 Bool isPX
= INSN(23,23) == 1;
6249 UInt nRegs
= ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6250 UInt mm
= INSN(20,16);
6251 UInt sz
= INSN(11,10);
6252 UInt nn
= INSN(9,5);
6253 UInt tt
= INSN(4,0);
6255 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6256 if (isPX
|| mm
== 0) {
6258 IRType ty
= integerIRTypeOfSize(1 << sz
);
6260 UInt laneSzB
= 1 << sz
;
6261 UInt xferSzB
= laneSzB
* nRegs
;
6263 /* Generate the transfer address (TA) and if necessary the
6264 writeback address (WB) */
6265 IRTemp tTA
= newTemp(Ity_I64
);
6266 assign(tTA
, getIReg64orSP(nn
));
6267 if (nn
== 31) { /* FIXME generate stack alignment check */ }
6268 IRTemp tWB
= IRTemp_INVALID
;
6270 tWB
= newTemp(Ity_I64
);
6271 assign(tWB
, binop(Iop_Add64
,
6273 mm
== BITS5(1,1,1,1,1) ? mkU64(xferSzB
)
6274 : getIReg64orZR(mm
)));
6277 /* Do the writeback, if necessary */
6279 putIReg64orSP(nn
, mkexpr(tWB
));
6282 IRTemp e0
, e1
, e2
, e3
, v0
, v1
, v2
, v3
;
6283 e0
= e1
= e2
= e3
= v0
= v1
= v2
= v3
= IRTemp_INVALID
;
6287 assign(e3
, loadLE(ty
, binop(Iop_Add64
, mkexpr(tTA
),
6288 mkU64(3 * laneSzB
))));
6289 v3
= math_DUP_TO_V128(e3
, ty
);
6290 putQReg128((tt
+3) % 32, math_MAYBE_ZERO_HI64(bitQ
, v3
));
6294 assign(e2
, loadLE(ty
, binop(Iop_Add64
, mkexpr(tTA
),
6295 mkU64(2 * laneSzB
))));
6296 v2
= math_DUP_TO_V128(e2
, ty
);
6297 putQReg128((tt
+2) % 32, math_MAYBE_ZERO_HI64(bitQ
, v2
));
6301 assign(e1
, loadLE(ty
, binop(Iop_Add64
, mkexpr(tTA
),
6302 mkU64(1 * laneSzB
))));
6303 v1
= math_DUP_TO_V128(e1
, ty
);
6304 putQReg128((tt
+1) % 32, math_MAYBE_ZERO_HI64(bitQ
, v1
));
6308 assign(e0
, loadLE(ty
, binop(Iop_Add64
, mkexpr(tTA
),
6309 mkU64(0 * laneSzB
))));
6310 v0
= math_DUP_TO_V128(e0
, ty
);
6311 putQReg128((tt
+0) % 32, math_MAYBE_ZERO_HI64(bitQ
, v0
));
6318 pxStr
[0] = pxStr
[sizeof(pxStr
)-1] = 0;
6320 if (mm
== BITS5(1,1,1,1,1))
6321 vex_sprintf(pxStr
, ", #%u", xferSzB
);
6323 vex_sprintf(pxStr
, ", %s", nameIReg64orZR(mm
));
6325 const HChar
* arr
= nameArr_Q_SZ(bitQ
, sz
);
6326 DIP("ld%ur {v%u.%s .. v%u.%s}, [%s]%s\n",
6328 (tt
+0) % 32, arr
, (tt
+nRegs
-1) % 32, arr
, nameIReg64orSP(nn
),
6333 /* else fall through */
6336 /* ------ LD1/ST1 (single structure, to/from one lane) ------ */
6337 /* ------ LD2/ST2 (single structure, to/from one lane) ------ */
6338 /* ------ LD3/ST3 (single structure, to/from one lane) ------ */
6339 /* ------ LD4/ST4 (single structure, to/from one lane) ------ */
6340 /* 31 29 22 21 20 15 11 9 4
6341 0q 001 1010 L 0 00000 xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP]
6342 0q 001 1011 L 0 m xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP], step
6344 0q 001 1010 L 1 00000 xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP]
6345 0q 001 1011 L 1 m xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP], step
6347 0q 001 1010 L 0 00000 xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP]
6348 0q 001 1011 L 0 m xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP], step
6350 0q 001 1010 L 1 00000 xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP]
6351 0q 001 1011 L 1 m xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP], step
6353 step = if m == 11111 then transfer-size else Xm
6354 op = case L of 1 -> LD ; 0 -> ST
6356 laneszB,ix = case xx:q:S:sz of 00:b:b:bb -> 1, bbbb
6361 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)) {
6362 UInt bitQ
= INSN(30,30);
6363 Bool isPX
= INSN(23,23) == 1;
6364 Bool isLD
= INSN(22,22) == 1;
6365 UInt nRegs
= ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6366 UInt mm
= INSN(20,16);
6367 UInt xx
= INSN(15,14);
6368 UInt bitS
= INSN(12,12);
6369 UInt sz
= INSN(11,10);
6370 UInt nn
= INSN(9,5);
6371 UInt tt
= INSN(4,0);
6375 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6376 if (!isPX
&& mm
!= 0)
6379 UInt laneSzB
= 0; /* invalid */
6380 UInt ix
= 16; /* invalid */
6382 UInt xx_q_S_sz
= (xx
<< 4) | (bitQ
<< 3) | (bitS
<< 2) | sz
;
6383 switch (xx_q_S_sz
) {
6384 case 0x00: case 0x01: case 0x02: case 0x03:
6385 case 0x04: case 0x05: case 0x06: case 0x07:
6386 case 0x08: case 0x09: case 0x0A: case 0x0B:
6387 case 0x0C: case 0x0D: case 0x0E: case 0x0F:
6388 laneSzB
= 1; ix
= xx_q_S_sz
& 0xF;
6390 case 0x10: case 0x12: case 0x14: case 0x16:
6391 case 0x18: case 0x1A: case 0x1C: case 0x1E:
6392 laneSzB
= 2; ix
= (xx_q_S_sz
>> 1) & 7;
6394 case 0x20: case 0x24: case 0x28: case 0x2C:
6395 laneSzB
= 4; ix
= (xx_q_S_sz
>> 2) & 3;
6397 case 0x21: case 0x29:
6398 laneSzB
= 8; ix
= (xx_q_S_sz
>> 3) & 1;
6404 if (valid
&& laneSzB
!= 0) {
6406 IRType ty
= integerIRTypeOfSize(laneSzB
);
6407 UInt xferSzB
= laneSzB
* nRegs
;
6409 /* Generate the transfer address (TA) and if necessary the
6410 writeback address (WB) */
6411 IRTemp tTA
= newTemp(Ity_I64
);
6412 assign(tTA
, getIReg64orSP(nn
));
6413 if (nn
== 31) { /* FIXME generate stack alignment check */ }
6414 IRTemp tWB
= IRTemp_INVALID
;
6416 tWB
= newTemp(Ity_I64
);
6417 assign(tWB
, binop(Iop_Add64
,
6419 mm
== BITS5(1,1,1,1,1) ? mkU64(xferSzB
)
6420 : getIReg64orZR(mm
)));
6423 /* Do the writeback, if necessary */
6425 putIReg64orSP(nn
, mkexpr(tWB
));
6431 = binop(Iop_Add64
, mkexpr(tTA
), mkU64(3 * laneSzB
));
6433 putQRegLane((tt
+3) % 32, ix
, loadLE(ty
, addr
));
6435 storeLE(addr
, getQRegLane((tt
+3) % 32, ix
, ty
));
6441 = binop(Iop_Add64
, mkexpr(tTA
), mkU64(2 * laneSzB
));
6443 putQRegLane((tt
+2) % 32, ix
, loadLE(ty
, addr
));
6445 storeLE(addr
, getQRegLane((tt
+2) % 32, ix
, ty
));
6451 = binop(Iop_Add64
, mkexpr(tTA
), mkU64(1 * laneSzB
));
6453 putQRegLane((tt
+1) % 32, ix
, loadLE(ty
, addr
));
6455 storeLE(addr
, getQRegLane((tt
+1) % 32, ix
, ty
));
6461 = binop(Iop_Add64
, mkexpr(tTA
), mkU64(0 * laneSzB
));
6463 putQRegLane((tt
+0) % 32, ix
, loadLE(ty
, addr
));
6465 storeLE(addr
, getQRegLane((tt
+0) % 32, ix
, ty
));
6474 pxStr
[0] = pxStr
[sizeof(pxStr
)-1] = 0;
6476 if (mm
== BITS5(1,1,1,1,1))
6477 vex_sprintf(pxStr
, ", #%u", xferSzB
);
6479 vex_sprintf(pxStr
, ", %s", nameIReg64orZR(mm
));
6481 const HChar
* arr
= nameArr_Q_SZ(bitQ
, sz
);
6482 DIP("%s%u {v%u.%s .. v%u.%s}[%u], [%s]%s\n",
6483 isLD
? "ld" : "st", nRegs
,
6484 (tt
+0) % 32, arr
, (tt
+nRegs
-1) % 32, arr
,
6485 ix
, nameIReg64orSP(nn
), pxStr
);
6489 /* else fall through */
6492 /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
6493 /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
6494 /* 31 29 23 20 14 9 4
6495 sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP]
6496 sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP]
6497 sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP]
6498 sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP]
6500 /* For the "standard" implementation we pass through the LL and SC to
6501 the host. For the "fallback" implementation, for details see
6502 https://bugs.kde.org/show_bug.cgi?id=344524 and
6503 https://bugs.kde.org/show_bug.cgi?id=369459,
6507 gs.LLsize = load_size // 1, 2, 4 or 8
6509 gs.LLdata = zeroExtend(*addr)
6511 StoreCond(addr, data)
6512 tmp_LLsize = gs.LLsize
6513 gs.LLsize = 0 // "no transaction"
6514 if tmp_LLsize != store_size -> fail
6515 if addr != gs.LLaddr -> fail
6516 if zeroExtend(*addr) != gs.LLdata -> fail
6517 cas_ok = CAS(store_size, addr, gs.LLdata -> data)
6521 When thread scheduled
6522 gs.LLsize = 0 // "no transaction"
6523 (coregrind/m_scheduler/scheduler.c, run_thread_for_a_while()
6526 if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
6527 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
6528 && INSN(14,10) == BITS5(1,1,1,1,1)) {
6529 UInt szBlg2
= INSN(31,30);
6530 Bool isLD
= INSN(22,22) == 1;
6531 Bool isAcqOrRel
= INSN(15,15) == 1;
6532 UInt ss
= INSN(20,16);
6533 UInt nn
= INSN(9,5);
6534 UInt tt
= INSN(4,0);
6536 vassert(szBlg2
< 4);
6537 UInt szB
= 1 << szBlg2
; /* 1, 2, 4 or 8 */
6538 IRType ty
= integerIRTypeOfSize(szB
);
6539 const HChar
* suffix
[4] = { "rb", "rh", "r", "r" };
6541 IRTemp ea
= newTemp(Ity_I64
);
6542 assign(ea
, getIReg64orSP(nn
));
6543 /* FIXME generate check that ea is szB-aligned */
6545 if (isLD
&& ss
== BITS5(1,1,1,1,1)) {
6546 IRTemp res
= newTemp(ty
);
6547 if (abiinfo
->guest__use_fallback_LLSC
) {
6548 // Do the load first so we don't update any guest state
6550 IRTemp loaded_data64
= newTemp(Ity_I64
);
6551 assign(loaded_data64
, widenUto64(ty
, loadLE(ty
, mkexpr(ea
))));
6552 stmt( IRStmt_Put( OFFB_LLSC_DATA
, mkexpr(loaded_data64
) ));
6553 stmt( IRStmt_Put( OFFB_LLSC_ADDR
, mkexpr(ea
) ));
6554 stmt( IRStmt_Put( OFFB_LLSC_SIZE
, mkU64(szB
) ));
6555 putIReg64orZR(tt
, mkexpr(loaded_data64
));
6557 stmt(IRStmt_LLSC(Iend_LE
, res
, mkexpr(ea
), NULL
/*LL*/));
6558 putIReg64orZR(tt
, widenUto64(ty
, mkexpr(res
)));
6561 stmt(IRStmt_MBE(Imbe_Fence
));
6563 DIP("ld%sx%s %s, [%s] %s\n", isAcqOrRel
? "a" : "", suffix
[szBlg2
],
6564 nameIRegOrZR(szB
== 8, tt
), nameIReg64orSP(nn
),
6565 abiinfo
->guest__use_fallback_LLSC
6566 ? "(fallback implementation)" : "");
6571 stmt(IRStmt_MBE(Imbe_Fence
));
6573 IRExpr
* data
= narrowFrom64(ty
, getIReg64orZR(tt
));
6574 if (abiinfo
->guest__use_fallback_LLSC
) {
6575 // This is really ugly, since we don't have any way to do
6576 // proper if-then-else. First, set up as if the SC failed,
6577 // and jump forwards if it really has failed.
6579 // Continuation address
6580 IRConst
* nia
= IRConst_U64(guest_PC_curr_instr
+ 4);
6582 // "the SC failed". Any non-zero value means failure.
6583 putIReg64orZR(ss
, mkU64(1));
6585 IRTemp tmp_LLsize
= newTemp(Ity_I64
);
6586 assign(tmp_LLsize
, IRExpr_Get(OFFB_LLSC_SIZE
, Ity_I64
));
6587 stmt( IRStmt_Put( OFFB_LLSC_SIZE
, mkU64(0) // "no transaction"
6589 // Fail if no or wrong-size transaction
6590 vassert(szB
== 8 || szB
== 4 || szB
== 2 || szB
== 1);
6592 binop(Iop_CmpNE64
, mkexpr(tmp_LLsize
), mkU64(szB
)),
6593 Ijk_Boring
, nia
, OFFB_PC
6595 // Fail if the address doesn't match the LL address
6597 binop(Iop_CmpNE64
, mkexpr(ea
),
6598 IRExpr_Get(OFFB_LLSC_ADDR
, Ity_I64
)),
6599 Ijk_Boring
, nia
, OFFB_PC
6601 // Fail if the data doesn't match the LL data
6602 IRTemp llsc_data64
= newTemp(Ity_I64
);
6603 assign(llsc_data64
, IRExpr_Get(OFFB_LLSC_DATA
, Ity_I64
));
6605 binop(Iop_CmpNE64
, widenUto64(ty
, loadLE(ty
, mkexpr(ea
))),
6606 mkexpr(llsc_data64
)),
6607 Ijk_Boring
, nia
, OFFB_PC
6609 // Try to CAS the new value in.
6610 IRTemp old
= newTemp(ty
);
6611 IRTemp expd
= newTemp(ty
);
6612 assign(expd
, narrowFrom64(ty
, mkexpr(llsc_data64
)));
6613 stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID
, old
,
6614 Iend_LE
, mkexpr(ea
),
6615 /*expdHi*/NULL
, mkexpr(expd
),
6616 /*dataHi*/NULL
, data
6618 // Fail if the CAS failed (viz, old != expd)
6621 widenUto64(ty
, mkexpr(old
)),
6622 widenUto64(ty
, mkexpr(expd
))),
6623 Ijk_Boring
, nia
, OFFB_PC
6625 // Otherwise we succeeded (!)
6626 putIReg64orZR(ss
, mkU64(0));
6628 IRTemp res
= newTemp(Ity_I1
);
6629 stmt(IRStmt_LLSC(Iend_LE
, res
, mkexpr(ea
), data
));
6630 /* IR semantics: res is 1 if store succeeds, 0 if it fails.
6631 Need to set rS to 1 on failure, 0 on success. */
6632 putIReg64orZR(ss
, binop(Iop_Xor64
, unop(Iop_1Uto64
, mkexpr(res
)),
6635 DIP("st%sx%s %s, %s, [%s] %s\n", isAcqOrRel
? "a" : "", suffix
[szBlg2
],
6636 nameIRegOrZR(False
, ss
),
6637 nameIRegOrZR(szB
== 8, tt
), nameIReg64orSP(nn
),
6638 abiinfo
->guest__use_fallback_LLSC
6639 ? "(fallback implementation)" : "");
6642 /* else fall through */
6645 /* ------------------ LDA{R,RH,RB} ------------------ */
6646 /* ------------------ STL{R,RH,RB} ------------------ */
6647 /* 31 29 23 20 14 9 4
6648 sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn|SP]
6649 sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn|SP]
6651 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
6652 && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
6653 UInt szBlg2
= INSN(31,30);
6654 Bool isLD
= INSN(22,22) == 1;
6655 UInt nn
= INSN(9,5);
6656 UInt tt
= INSN(4,0);
6658 vassert(szBlg2
< 4);
6659 UInt szB
= 1 << szBlg2
; /* 1, 2, 4 or 8 */
6660 IRType ty
= integerIRTypeOfSize(szB
);
6661 const HChar
* suffix
[4] = { "rb", "rh", "r", "r" };
6663 IRTemp ea
= newTemp(Ity_I64
);
6664 assign(ea
, getIReg64orSP(nn
));
6665 /* FIXME generate check that ea is szB-aligned */
6668 IRTemp res
= newTemp(ty
);
6669 assign(res
, loadLE(ty
, mkexpr(ea
)));
6670 putIReg64orZR(tt
, widenUto64(ty
, mkexpr(res
)));
6671 stmt(IRStmt_MBE(Imbe_Fence
));
6672 DIP("lda%s %s, [%s]\n", suffix
[szBlg2
],
6673 nameIRegOrZR(szB
== 8, tt
), nameIReg64orSP(nn
));
6675 stmt(IRStmt_MBE(Imbe_Fence
));
6676 IRExpr
* data
= narrowFrom64(ty
, getIReg64orZR(tt
));
6677 storeLE(mkexpr(ea
), data
);
6678 DIP("stl%s %s, [%s]\n", suffix
[szBlg2
],
6679 nameIRegOrZR(szB
== 8, tt
), nameIReg64orSP(nn
));
6684 /* The PRFM cases that follow are possibly allow Rt values (the
6685 prefetch operation) which are not allowed by the documentation.
6686 This should be looked into. */
6687 /* ------------------ PRFM (immediate) ------------------ */
6689 11 111 00110 imm12 n t PRFM pfrop=Rt, [Xn|SP, #pimm]
6691 if (INSN(31,22) == BITS10(1,1,1,1,1,0,0,1,1,0)) {
6692 UInt imm12
= INSN(21,10);
6693 UInt nn
= INSN(9,5);
6694 UInt tt
= INSN(4,0);
6695 /* Generating any IR here is pointless, except for documentation
6696 purposes, as it will get optimised away later. */
6697 IRTemp ea
= newTemp(Ity_I64
);
6698 assign(ea
, binop(Iop_Add64
, getIReg64orSP(nn
), mkU64(imm12
* 8)));
6699 DIP("prfm prfop=%u, [%s, #%u]\n", tt
, nameIReg64orSP(nn
), imm12
* 8);
6703 /* ------------------ PRFM (register) ------------------ */
6704 /* 31 29 22 20 15 12 11 9 4
6705 11 1110001 01 Rm opt S 10 Rn Rt PRFM pfrop=Rt, [Xn|SP, R<m>{ext/sh}]
6707 if (INSN(31,21) == BITS11(1,1,1,1,1,0,0,0,1,0,1)
6708 && INSN(11,10) == BITS2(1,0)) {
6710 UInt tt
= INSN(4,0);
6711 IRTemp ea
= gen_indexed_EA(dis_buf
, insn
, True
/*to/from int regs*/);
6712 if (ea
!= IRTemp_INVALID
) {
6713 /* No actual code to generate. */
6714 DIP("prfm prfop=%u, %s\n", tt
, dis_buf
);
6719 /* ------------------ PRFM (unscaled offset) ------------------ */
6720 /* 31 29 22 20 11 9 4
6721 11 1110001 00 imm9 00 Rn Rt PRFM pfrop=Rt, [Xn|SP, #simm]
6723 if (INSN(31,21) == BITS11(1,1, 1,1,1,0,0,0,1, 0,0)
6724 && INSN(11,10) == BITS2(0,0)) {
6725 ULong imm9
= INSN(20,12);
6726 UInt nn
= INSN(9,5);
6727 UInt tt
= INSN(4,0);
6728 ULong offset
= sx_to_64(imm9
, 9);
6729 IRTemp ea
= newTemp(Ity_I64
);
6730 assign(ea
, binop(Iop_Add64
, getIReg64orSP(nn
), mkU64(offset
)));
6731 /* No actual code to generate. */
6732 DIP("prfum prfop=%u, [%s, #0x%llx]\n", tt
, nameIReg64orSP(nn
), offset
);
6736 /* ---------------- ARMv8.1-LSE: Atomic Memory Operations ---------------- */
6737 /* 31 29 23 22 21 20 15 11 9 4
6738 sz 111000 A R 1 s 0000 00 n t LDADD{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6739 sz 111000 A R 1 s 0001 00 n t LDCLR{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6740 sz 111000 A R 1 s 0010 00 n t LDEOR{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6741 sz 111000 A R 1 s 0011 00 n t LDSET{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6742 sz 111000 A R 1 s 0100 00 n t LDSMAX{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6743 sz 111000 A R 1 s 0101 00 n t LDSMIN{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6744 sz 111000 A R 1 s 0110 00 n t LDUMAX{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6745 sz 111000 A R 1 s 0111 00 n t LDUMIN{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6746 sz 111000 A R 1 s 1000 00 n t SWP{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6748 if (INSN(29,24) == BITS6(1,1,1,0,0,0)
6750 && (INSN(15,12) <= BITS4(1,0,0,0))
6751 && INSN(11,10) == BITS2(0,0)) {
6752 UInt szBlg2
= INSN(31,30);
6753 Bool isAcq
= INSN(23,23) == 1;
6754 Bool isRel
= INSN(22,22) == 1;
6755 UInt ss
= INSN(20,16);
6756 UInt opc
= INSN(15,12);
6757 UInt nn
= INSN(9,5);
6758 UInt tt
= INSN(4,0);
6760 const HChar
* nm
= NULL
;
6761 const HChar
* suffix
[4] = { "b", "h", "", "" };
6763 vassert(szBlg2
< 4);
6764 UInt szB
= 1 << szBlg2
; /* 1, 2, 4 or 8 bytes*/
6765 IRType ty
= integerIRTypeOfSize(szB
);
6766 Bool is64
= szB
== 8;
6767 Bool isSigned
= (opc
== 4) || (opc
== 5) /*smax || smin*/;
6769 // IR used to emulate these atomic memory ops:
6772 // 3) widen operands and do arithmetic/logic op
6773 // 4) cas to see if target memory updated
6775 // 6) repeat from 1) if cas says target memory not updated
6776 // 7) update register
6778 IRTemp ea
= newTemp(Ity_I64
);
6779 assign(ea
, getIReg64orSP(nn
));
6781 // Insert barrier before loading for acquire and acquire-release variants:
6783 if (isAcq
&& (tt
!= 31))
6784 stmt(IRStmt_MBE(Imbe_Fence
));
6786 // Load LHS from memory, RHS from register.
6787 IRTemp orig
= newTemp(ty
);
6788 assign(orig
, loadLE(ty
, mkexpr(ea
)));
6789 IRExpr
*lhs
= mkexpr(orig
);
6790 IRExpr
*rhs
= narrowFrom64(ty
, getIReg64orZR(ss
));
6793 lhs
= isSigned
? widenSto64(ty
, lhs
) : widenUto64(ty
, lhs
);
6794 rhs
= isSigned
? widenSto64(ty
, rhs
) : widenUto64(ty
, rhs
);
6796 // Perform the operation.
6800 res
= binop(Iop_Add64
, lhs
, rhs
);
6804 res
= binop(Iop_And64
, lhs
, unop(mkNOT(Ity_I64
), rhs
));
6808 res
= binop(Iop_Xor64
, lhs
, rhs
);
6812 res
= binop(Iop_Or64
, lhs
, rhs
);
6816 res
= IRExpr_ITE(binop(Iop_CmpLT64S
, lhs
, rhs
), rhs
, lhs
);
6820 res
= IRExpr_ITE(binop(Iop_CmpLT64S
, lhs
, rhs
), lhs
, rhs
);
6824 res
= IRExpr_ITE(binop(Iop_CmpLT64U
, lhs
, rhs
), rhs
, rhs
);
6828 res
= IRExpr_ITE(binop(Iop_CmpLT64U
, lhs
, rhs
), lhs
, rhs
);
6839 // Store the result back if LHS remains unchanged in memory.
6840 IRTemp old
= newTemp(ty
);
6841 stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID
, old
,
6842 Iend_LE
, mkexpr(ea
),
6843 /*expdHi*/NULL
, mkexpr(orig
),
6844 /*dataHi*/NULL
, narrowFrom64(ty
, res
))) );
6846 // Insert barrier after storing for release and acquire-release variants:
6849 stmt(IRStmt_MBE(Imbe_Fence
));
6851 // Retry if the CAS failed (i.e. when old != orig).
6852 IRConst
* nia
= IRConst_U64(guest_PC_curr_instr
);
6854 binop(Iop_CasCmpNE64
,
6855 widenUto64(ty
, mkexpr(old
)),
6856 widenUto64(ty
, mkexpr(orig
))),
6857 Ijk_Boring
, nia
, OFFB_PC
));
6858 // Otherwise we succeeded.
6859 putIReg64orZR(tt
, widenUto64(ty
, mkexpr(old
)));
6861 DIP("%s%s%s%s %s, %s, [%s]\n", nm
, isAcq
? "a" : "", isRel
? "l" : "",
6862 suffix
[szBlg2
], nameIRegOrZR(is64
, ss
), nameIRegOrZR(is64
, tt
),
6863 nameIReg64orSP(nn
));
6867 /* ------------------ ARMv8.1-LSE: Compare-and-Swap ------------------ */
6868 /* 31 29 22 21 20 15 14 9 4
6869 sz 0010001 A 1 s R 11111 n t CAS{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
6871 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
6873 && INSN(14,10) == BITS5(1,1,1,1,1)) {
6874 UInt szBlg2
= INSN(31,30);
6875 Bool isAcq
= INSN(22,22) == 1;
6876 Bool isRel
= INSN(15,15) == 1;
6877 UInt ss
= INSN(20,16);
6878 UInt nn
= INSN(9,5);
6879 UInt tt
= INSN(4,0);
6881 const HChar
* suffix
[4] = { "b", "h", "", "" };
6883 UInt szB
= 1 << szBlg2
; /* 1, 2, 4 or 8 */
6884 IRType ty
= integerIRTypeOfSize(szB
);
6885 Bool is64
= szB
== 8;
6887 IRExpr
*exp
= narrowFrom64(ty
, getIReg64orZR(ss
));
6888 IRExpr
*new = narrowFrom64(ty
, getIReg64orZR(tt
));
6891 stmt(IRStmt_MBE(Imbe_Fence
));
6893 // Store the result back if LHS remains unchanged in memory.
6894 IRTemp old
= newTemp(ty
);
6895 stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID
, old
,
6896 Iend_LE
, getIReg64orSP(nn
),
6897 /*expdHi*/NULL
, exp
,
6898 /*dataHi*/NULL
, new)) );
6901 stmt(IRStmt_MBE(Imbe_Fence
));
6903 putIReg64orZR(ss
, widenUto64(ty
, mkexpr(old
)));
6904 DIP("cas%s%s%s %s, %s, [%s]\n",
6905 isAcq
? "a" : "", isRel
? "l" : "", suffix
[szBlg2
],
6906 nameIRegOrZR(is64
, ss
), nameIRegOrZR(is64
, tt
), nameIReg64orSP(nn
));
6910 /* ---------------- ARMv8.1-LSE: Compare-and-Swap Pair --------------- */
6911 /* 31 30 29 22 21 20 15 14 9 4
6912 0 sz 0010000 A 1 s R 11111 n t CASP{,A}{,L} <Rs>, <Rt>, [<Xn|SP>]
6914 if (INSN(31,31) == 0
6915 && INSN(29,23) == BITS7(0,0,1,0,0,0,0)
6917 && INSN(14,10) == BITS5(1,1,1,1,1)) {
6918 UInt is64
= INSN(30,30);
6919 Bool isAcq
= INSN(22,22) == 1;
6920 Bool isRel
= INSN(15,15) == 1;
6921 UInt ss
= INSN(20,16);
6922 UInt nn
= INSN(9,5);
6923 UInt tt
= INSN(4,0);
6925 if ((ss
& 0x1) || (tt
& 0x1)) {
6926 /* undefined; fall through */
6928 IRExpr
*expLo
= getIRegOrZR(is64
, ss
);
6929 IRExpr
*expHi
= getIRegOrZR(is64
, ss
+ 1);
6930 IRExpr
*newLo
= getIRegOrZR(is64
, tt
);
6931 IRExpr
*newHi
= getIRegOrZR(is64
, tt
+ 1);
6932 IRTemp oldLo
= newTemp(is64
? Ity_I64
: Ity_I32
);
6933 IRTemp oldHi
= newTemp(is64
? Ity_I64
: Ity_I32
);
6936 stmt(IRStmt_MBE(Imbe_Fence
));
6938 stmt( IRStmt_CAS(mkIRCAS(oldHi
, oldLo
,
6939 Iend_LE
, getIReg64orSP(nn
),
6944 stmt(IRStmt_MBE(Imbe_Fence
));
6946 putIRegOrZR(is64
, ss
, mkexpr(oldLo
));
6947 putIRegOrZR(is64
, ss
+1, mkexpr(oldHi
));
6948 DIP("casp%s%s %s, %s, %s, %s, [%s]\n",
6949 isAcq
? "a" : "", isRel
? "l" : "",
6950 nameIRegOrZR(is64
, ss
), nameIRegOrZR(is64
, ss
+1),
6951 nameIRegOrZR(is64
, tt
), nameIRegOrZR(is64
, tt
+1),
6952 nameIReg64orSP(nn
));
6958 vex_printf("ARM64 front end: load_store\n");
6966 /*------------------------------------------------------------*/
6967 /*--- Control flow and misc instructions ---*/
6968 /*------------------------------------------------------------*/
6971 Bool
dis_ARM64_branch_etc(/*MB_OUT*/DisResult
* dres
, UInt insn
,
6972 const VexArchInfo
* archinfo
,
6973 const VexAbiInfo
* abiinfo
, Bool sigill_diag
)
6975 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
6977 /* ---------------------- B cond ----------------------- */
6979 0101010 0 imm19 0 cond */
6980 if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
6981 UInt cond
= INSN(3,0);
6982 ULong uimm64
= INSN(23,5) << 2;
6983 Long simm64
= (Long
)sx_to_64(uimm64
, 21);
6984 vassert(dres
->whatNext
== Dis_Continue
);
6985 vassert(dres
->len
== 4);
6986 vassert(dres
->jk_StopHere
== Ijk_INVALID
);
6987 stmt( IRStmt_Exit(unop(Iop_64to1
, mk_arm64g_calculate_condition(cond
)),
6989 IRConst_U64(guest_PC_curr_instr
+ simm64
),
6991 putPC(mkU64(guest_PC_curr_instr
+ 4));
6992 dres
->whatNext
= Dis_StopHere
;
6993 dres
->jk_StopHere
= Ijk_Boring
;
6994 DIP("b.%s 0x%llx\n", nameCC(cond
), guest_PC_curr_instr
+ simm64
);
6998 /* -------------------- B{L} uncond -------------------- */
6999 if (INSN(30,26) == BITS5(0,0,1,0,1)) {
7000 /* 000101 imm26 B (PC + sxTo64(imm26 << 2))
7001 100101 imm26 B (PC + sxTo64(imm26 << 2))
7003 UInt bLink
= INSN(31,31);
7004 ULong uimm64
= INSN(25,0) << 2;
7005 Long simm64
= (Long
)sx_to_64(uimm64
, 28);
7007 putIReg64orSP(30, mkU64(guest_PC_curr_instr
+ 4));
7009 putPC(mkU64(guest_PC_curr_instr
+ simm64
));
7010 dres
->whatNext
= Dis_StopHere
;
7011 dres
->jk_StopHere
= Ijk_Call
;
7012 DIP("b%s 0x%llx\n", bLink
== 1 ? "l" : "",
7013 guest_PC_curr_instr
+ simm64
);
7017 /* --------------------- B{L} reg --------------------- */
7018 /* 31 24 22 20 15 9 4
7019 1101011 00 10 11111 000000 nn 00000 RET Rn
7020 1101011 00 01 11111 000000 nn 00000 CALL Rn
7021 1101011 00 00 11111 000000 nn 00000 JMP Rn
7023 if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
7024 && INSN(20,16) == BITS5(1,1,1,1,1)
7025 && INSN(15,10) == BITS6(0,0,0,0,0,0)
7026 && INSN(4,0) == BITS5(0,0,0,0,0)) {
7027 UInt branch_type
= INSN(22,21);
7028 UInt nn
= INSN(9,5);
7029 if (branch_type
== BITS2(1,0) /* RET */) {
7030 putPC(getIReg64orZR(nn
));
7031 dres
->whatNext
= Dis_StopHere
;
7032 dres
->jk_StopHere
= Ijk_Ret
;
7033 DIP("ret %s\n", nameIReg64orZR(nn
));
7036 if (branch_type
== BITS2(0,1) /* CALL */) {
7037 IRTemp dst
= newTemp(Ity_I64
);
7038 assign(dst
, getIReg64orZR(nn
));
7039 putIReg64orSP(30, mkU64(guest_PC_curr_instr
+ 4));
7041 dres
->whatNext
= Dis_StopHere
;
7042 dres
->jk_StopHere
= Ijk_Call
;
7043 DIP("blr %s\n", nameIReg64orZR(nn
));
7046 if (branch_type
== BITS2(0,0) /* JMP */) {
7047 putPC(getIReg64orZR(nn
));
7048 dres
->whatNext
= Dis_StopHere
;
7049 dres
->jk_StopHere
= Ijk_Boring
;
7050 DIP("jmp %s\n", nameIReg64orZR(nn
));
7055 /* -------------------- CB{N}Z -------------------- */
7056 /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
7057 sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2))
7059 if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
7060 Bool is64
= INSN(31,31) == 1;
7061 Bool bIfZ
= INSN(24,24) == 0;
7062 ULong uimm64
= INSN(23,5) << 2;
7063 UInt rT
= INSN(4,0);
7064 Long simm64
= (Long
)sx_to_64(uimm64
, 21);
7065 IRExpr
* cond
= NULL
;
7067 cond
= binop(bIfZ
? Iop_CmpEQ64
: Iop_CmpNE64
,
7068 getIReg64orZR(rT
), mkU64(0));
7070 cond
= binop(bIfZ
? Iop_CmpEQ32
: Iop_CmpNE32
,
7071 getIReg32orZR(rT
), mkU32(0));
7073 stmt( IRStmt_Exit(cond
,
7075 IRConst_U64(guest_PC_curr_instr
+ simm64
),
7077 putPC(mkU64(guest_PC_curr_instr
+ 4));
7078 dres
->whatNext
= Dis_StopHere
;
7079 dres
->jk_StopHere
= Ijk_Boring
;
7080 DIP("cb%sz %s, 0x%llx\n",
7081 bIfZ
? "" : "n", nameIRegOrZR(is64
, rT
),
7082 guest_PC_curr_instr
+ simm64
);
7086 /* -------------------- TB{N}Z -------------------- */
7087 /* 31 30 24 23 18 5 4
7088 b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
7089 b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
7091 if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
7092 UInt b5
= INSN(31,31);
7093 Bool bIfZ
= INSN(24,24) == 0;
7094 UInt b40
= INSN(23,19);
7095 UInt imm14
= INSN(18,5);
7096 UInt tt
= INSN(4,0);
7097 UInt bitNo
= (b5
<< 5) | b40
;
7098 ULong uimm64
= imm14
<< 2;
7099 Long simm64
= sx_to_64(uimm64
, 16);
7101 = binop(bIfZ
? Iop_CmpEQ64
: Iop_CmpNE64
,
7103 binop(Iop_Shr64
, getIReg64orZR(tt
), mkU8(bitNo
)),
7106 stmt( IRStmt_Exit(cond
,
7108 IRConst_U64(guest_PC_curr_instr
+ simm64
),
7110 putPC(mkU64(guest_PC_curr_instr
+ 4));
7111 dres
->whatNext
= Dis_StopHere
;
7112 dres
->jk_StopHere
= Ijk_Boring
;
7113 DIP("tb%sz %s, #%u, 0x%llx\n",
7114 bIfZ
? "" : "n", nameIReg64orZR(tt
), bitNo
,
7115 guest_PC_curr_instr
+ simm64
);
7119 /* -------------------- SVC -------------------- */
7120 /* 11010100 000 imm16 000 01
7121 Don't bother with anything except the imm16==0 case.
7123 if (INSN(31,0) == 0xD4000001) {
7124 putPC(mkU64(guest_PC_curr_instr
+ 4));
7125 dres
->whatNext
= Dis_StopHere
;
7126 dres
->jk_StopHere
= Ijk_Sys_syscall
;
7131 /* ------------------ M{SR,RS} ------------------ */
7132 /* ---- Cases for TPIDR_EL0 ----
7133 0xD51BD0 010 Rt MSR tpidr_el0, rT
7134 0xD53BD0 010 Rt MRS rT, tpidr_el0
7136 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
7137 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
7138 Bool toSys
= INSN(21,21) == 0;
7139 UInt tt
= INSN(4,0);
7141 stmt( IRStmt_Put( OFFB_TPIDR_EL0
, getIReg64orZR(tt
)) );
7142 DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt
));
7144 putIReg64orZR(tt
, IRExpr_Get( OFFB_TPIDR_EL0
, Ity_I64
));
7145 DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt
));
7149 /* ---- Cases for FPCR ----
7150 0xD51B44 000 Rt MSR fpcr, rT
7151 0xD53B44 000 Rt MSR rT, fpcr
7153 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
7154 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
7155 Bool toSys
= INSN(21,21) == 0;
7156 UInt tt
= INSN(4,0);
7158 stmt( IRStmt_Put( OFFB_FPCR
, getIReg32orZR(tt
)) );
7159 DIP("msr fpcr, %s\n", nameIReg64orZR(tt
));
7161 putIReg32orZR(tt
, IRExpr_Get(OFFB_FPCR
, Ity_I32
));
7162 DIP("mrs %s, fpcr\n", nameIReg64orZR(tt
));
7166 /* ---- Cases for FPSR ----
7167 0xD51B44 001 Rt MSR fpsr, rT
7168 0xD53B44 001 Rt MSR rT, fpsr
7169 The only part of this we model is FPSR.QC. All other bits
7170 are ignored when writing to it and RAZ when reading from it.
7172 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
7173 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
7174 Bool toSys
= INSN(21,21) == 0;
7175 UInt tt
= INSN(4,0);
7177 /* Just deal with FPSR.QC. Make up a V128 value which is
7178 zero if Xt[27] is zero and any other value if Xt[27] is
7180 IRTemp qc64
= newTemp(Ity_I64
);
7181 assign(qc64
, binop(Iop_And64
,
7182 binop(Iop_Shr64
, getIReg64orZR(tt
), mkU8(27)),
7184 IRExpr
* qcV128
= binop(Iop_64HLtoV128
, mkexpr(qc64
), mkexpr(qc64
));
7185 stmt( IRStmt_Put( OFFB_QCFLAG
, qcV128
) );
7186 DIP("msr fpsr, %s\n", nameIReg64orZR(tt
));
7188 /* Generate a value which is all zeroes except for bit 27,
7189 which must be zero if QCFLAG is all zeroes and one otherwise. */
7190 IRTemp qcV128
= newTempV128();
7191 assign(qcV128
, IRExpr_Get( OFFB_QCFLAG
, Ity_V128
));
7192 IRTemp qc64
= newTemp(Ity_I64
);
7193 assign(qc64
, binop(Iop_Or64
, unop(Iop_V128HIto64
, mkexpr(qcV128
)),
7194 unop(Iop_V128to64
, mkexpr(qcV128
))));
7195 IRExpr
* res
= binop(Iop_Shl64
,
7197 binop(Iop_CmpNE64
, mkexpr(qc64
), mkU64(0))),
7199 putIReg64orZR(tt
, res
);
7200 DIP("mrs %s, fpsr\n", nameIReg64orZR(tt
));
7204 /* ---- Cases for NZCV ----
7205 D51B42 000 Rt MSR nzcv, rT
7206 D53B42 000 Rt MRS rT, nzcv
7207 The only parts of NZCV that actually exist are bits 31:28, which
7208 are the N Z C and V bits themselves. Hence the flags thunk provides
7209 all the state we need.
7211 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
7212 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
7213 Bool toSys
= INSN(21,21) == 0;
7214 UInt tt
= INSN(4,0);
7216 IRTemp t
= newTemp(Ity_I64
);
7217 assign(t
, binop(Iop_And64
, getIReg64orZR(tt
), mkU64(0xF0000000ULL
)));
7219 DIP("msr %s, nzcv\n", nameIReg32orZR(tt
));
7221 IRTemp res
= newTemp(Ity_I64
);
7222 assign(res
, mk_arm64g_calculate_flags_nzcv());
7223 putIReg32orZR(tt
, unop(Iop_64to32
, mkexpr(res
)));
7224 DIP("mrs %s, nzcv\n", nameIReg64orZR(tt
));
7228 /* ---- Cases for DCZID_EL0 ----
7229 Don't support arbitrary reads and writes to this register. Just
7230 return the value 16, which indicates that the DC ZVA instruction
7231 is not permitted, so we don't have to emulate it.
7232 D5 3B 00 111 Rt MRS rT, dczid_el0
7234 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
7235 UInt tt
= INSN(4,0);
7236 putIReg64orZR(tt
, mkU64(1<<4));
7237 DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt
));
7240 /* ---- Cases for CTR_EL0 ----
7241 We just handle reads, and make up a value from the D and I line
7242 sizes in the VexArchInfo we are given, and patch in the following
7243 fields that the Foundation model gives ("natively"):
7244 CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
7245 D5 3B 00 001 Rt MRS rT, dczid_el0
7247 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
7248 UInt tt
= INSN(4,0);
7249 /* Need to generate a value from dMinLine_lg2_szB and
7250 dMinLine_lg2_szB. The value in the register is in 32-bit
7251 units, so need to subtract 2 from the values in the
7252 VexArchInfo. We can assume that the values here are valid --
7253 disInstr_ARM64 checks them -- so there's no need to deal with
7254 out-of-range cases. */
7255 vassert(archinfo
->arm64_dMinLine_lg2_szB
>= 2
7256 && archinfo
->arm64_dMinLine_lg2_szB
<= 17
7257 && archinfo
->arm64_iMinLine_lg2_szB
>= 2
7258 && archinfo
->arm64_iMinLine_lg2_szB
<= 17);
7260 = 0x8440c000 | ((0xF & (archinfo
->arm64_dMinLine_lg2_szB
- 2)) << 16)
7261 | ((0xF & (archinfo
->arm64_iMinLine_lg2_szB
- 2)) << 0);
7262 putIReg64orZR(tt
, mkU64(val
));
7263 DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt
));
7266 /* ---- Cases for CNTVCT_EL0 ----
7267 This is a timestamp counter of some sort. Support reads of it only
7268 by passing through to the host.
7269 D5 3B E0 010 Rt MRS Xt, cntvct_el0
7271 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE040) {
7272 UInt tt
= INSN(4,0);
7273 IRTemp val
= newTemp(Ity_I64
);
7274 IRExpr
** args
= mkIRExprVec_0();
7275 IRDirty
* d
= unsafeIRDirty_1_N (
7278 "arm64g_dirtyhelper_MRS_CNTVCT_EL0",
7279 &arm64g_dirtyhelper_MRS_CNTVCT_EL0
,
7282 /* execute the dirty call, dumping the result in val. */
7283 stmt( IRStmt_Dirty(d
) );
7284 putIReg64orZR(tt
, mkexpr(val
));
7285 DIP("mrs %s, cntvct_el0\n", nameIReg64orZR(tt
));
7288 /* ---- Cases for CNTFRQ_EL0 ----
7289 This is always RO at EL0, so it's safe to pass through to the host.
7290 D5 3B E0 000 Rt MRS Xt, cntfrq_el0
7292 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE000) {
7293 UInt tt
= INSN(4,0);
7294 IRTemp val
= newTemp(Ity_I64
);
7295 IRExpr
** args
= mkIRExprVec_0();
7296 IRDirty
* d
= unsafeIRDirty_1_N (
7299 "arm64g_dirtyhelper_MRS_CNTFRQ_EL0",
7300 &arm64g_dirtyhelper_MRS_CNTFRQ_EL0
,
7303 /* execute the dirty call, dumping the result in val. */
7304 stmt( IRStmt_Dirty(d
) );
7305 putIReg64orZR(tt
, mkexpr(val
));
7306 DIP("mrs %s, cntfrq_el0\n", nameIReg64orZR(tt
));
7310 /* ------------------ IC_IVAU ------------------ */
7311 /* D5 0B 75 001 Rt ic ivau, rT
7313 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
7314 /* We will always be provided with a valid iMinLine value. */
7315 vassert(archinfo
->arm64_iMinLine_lg2_szB
>= 2
7316 && archinfo
->arm64_iMinLine_lg2_szB
<= 17);
7317 /* Round the requested address, in rT, down to the start of the
7318 containing block. */
7319 UInt tt
= INSN(4,0);
7320 ULong lineszB
= 1ULL << archinfo
->arm64_iMinLine_lg2_szB
;
7321 IRTemp addr
= newTemp(Ity_I64
);
7322 assign( addr
, binop( Iop_And64
,
7324 mkU64(~(lineszB
- 1))) );
7325 /* Set the invalidation range, request exit-and-invalidate, with
7326 continuation at the next instruction. */
7327 stmt(IRStmt_Put(OFFB_CMSTART
, mkexpr(addr
)));
7328 stmt(IRStmt_Put(OFFB_CMLEN
, mkU64(lineszB
)));
7329 /* be paranoid ... */
7330 stmt( IRStmt_MBE(Imbe_Fence
) );
7331 putPC(mkU64( guest_PC_curr_instr
+ 4 ));
7332 dres
->whatNext
= Dis_StopHere
;
7333 dres
->jk_StopHere
= Ijk_InvalICache
;
7334 DIP("ic ivau, %s\n", nameIReg64orZR(tt
));
7338 /* ------------------ DC_CVAU ------------------ */
7339 /* D5 0B 7B 001 Rt dc cvau, rT
7340 D5 0B 7E 001 Rt dc civac, rT
7342 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20
7343 || (INSN(31,0) & 0xFFFFFFE0) == 0xD50B7E20) {
7344 /* Exactly the same scheme as for IC IVAU, except we observe the
7345 dMinLine size, and request an Ijk_FlushDCache instead of
7347 /* We will always be provided with a valid dMinLine value. */
7348 vassert(archinfo
->arm64_dMinLine_lg2_szB
>= 2
7349 && archinfo
->arm64_dMinLine_lg2_szB
<= 17);
7350 /* Round the requested address, in rT, down to the start of the
7351 containing block. */
7352 UInt tt
= INSN(4,0);
7353 ULong lineszB
= 1ULL << archinfo
->arm64_dMinLine_lg2_szB
;
7354 IRTemp addr
= newTemp(Ity_I64
);
7355 assign( addr
, binop( Iop_And64
,
7357 mkU64(~(lineszB
- 1))) );
7358 /* Set the flush range, request exit-and-flush, with
7359 continuation at the next instruction. */
7360 stmt(IRStmt_Put(OFFB_CMSTART
, mkexpr(addr
)));
7361 stmt(IRStmt_Put(OFFB_CMLEN
, mkU64(lineszB
)));
7362 /* be paranoid ... */
7363 stmt( IRStmt_MBE(Imbe_Fence
) );
7364 putPC(mkU64( guest_PC_curr_instr
+ 4 ));
7365 dres
->whatNext
= Dis_StopHere
;
7366 dres
->jk_StopHere
= Ijk_FlushDCache
;
7367 DIP("dc cvau, %s\n", nameIReg64orZR(tt
));
7371 /* ------------------ ISB, DMB, DSB ------------------ */
7373 11010 10100 0 00 011 0011 CRm 1 01 11111 DMB opt
7374 11010 10100 0 00 011 0011 CRm 1 00 11111 DSB opt
7375 11010 10100 0 00 011 0011 CRm 1 10 11111 ISB opt
7377 if (INSN(31,22) == BITS10(1,1,0,1,0,1,0,1,0,0)
7378 && INSN(21,12) == BITS10(0,0,0,0,1,1,0,0,1,1)
7380 && INSN(6,5) <= BITS2(1,0) && INSN(4,0) == BITS5(1,1,1,1,1)) {
7381 UInt opc
= INSN(6,5);
7382 UInt CRm
= INSN(11,8);
7383 vassert(opc
<= 2 && CRm
<= 15);
7384 stmt(IRStmt_MBE(Imbe_Fence
));
7385 const HChar
* opNames
[3]
7386 = { "dsb", "dmb", "isb" };
7387 const HChar
* howNames
[16]
7388 = { "#0", "oshld", "oshst", "osh", "#4", "nshld", "nshst", "nsh",
7389 "#8", "ishld", "ishst", "ish", "#12", "ld", "st", "sy" };
7390 DIP("%s %s\n", opNames
[opc
], howNames
[CRm
]);
7394 /* -------------------- NOP -------------------- */
7395 if (INSN(31,0) == 0xD503201F) {
7400 /* -------------------- BRK -------------------- */
7402 1101 0100 001 imm16 00000 BRK #imm16
7404 if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,0)
7405 && INSN(23,21) == BITS3(0,0,1) && INSN(4,0) == BITS5(0,0,0,0,0)) {
7406 UInt imm16
= INSN(20,5);
7407 /* Request SIGTRAP and then restart of this insn. */
7408 putPC(mkU64(guest_PC_curr_instr
+ 0));
7409 dres
->whatNext
= Dis_StopHere
;
7410 dres
->jk_StopHere
= Ijk_SigTRAP
;
7411 DIP("brk #%u\n", imm16
);
7415 /* ------------------- YIELD ------------------- */
7417 1101 0101 0000 0011 0010 0000 0011 1111
7419 if (INSN(31,0) == 0xD503203F) {
7420 /* Request yield followed by continuation at the next insn. */
7421 putPC(mkU64(guest_PC_curr_instr
+ 4));
7422 dres
->whatNext
= Dis_StopHere
;
7423 dres
->jk_StopHere
= Ijk_Yield
;
7428 /* -------------------- HINT ------------------- */
7430 1101 0101 0000 0011 0010 imm7 1 1111
7431 Catch otherwise unhandled HINT instructions - any
7432 like YIELD which are explicitly handled should go
7435 if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,1)
7436 && INSN(23,16) == BITS8(0,0,0,0,0,0,1,1)
7437 && INSN(15,12) == BITS4(0,0,1,0)
7438 && INSN(4,0) == BITS5(1,1,1,1,1)) {
7439 UInt imm7
= INSN(11,5);
7440 DIP("hint #%u\n", imm7
);
7444 /* ------------------- CLREX ------------------ */
7446 1101 0101 0000 0011 0011 m 0101 1111 CLREX CRm
7447 CRm is apparently ignored.
7449 if ((INSN(31,0) & 0xFFFFF0FF) == 0xD503305F) {
7450 UInt mm
= INSN(11,8);
7451 /* AFAICS, this simply cancels a (all?) reservations made by a
7452 (any?) preceding LDREX(es). Arrange to hand it through to
7454 if (abiinfo
->guest__use_fallback_LLSC
) {
7455 stmt( IRStmt_Put( OFFB_LLSC_SIZE
, mkU64(0) )); // "no transaction"
7457 stmt( IRStmt_MBE(Imbe_CancelReservation
) );
7459 DIP("clrex #%u\n", mm
);
7464 vex_printf("ARM64 front end: branch_etc\n");
7471 /*------------------------------------------------------------*/
7472 /*--- SIMD and FP instructions: helper functions ---*/
7473 /*------------------------------------------------------------*/
7475 /* Some constructors for interleave/deinterleave expressions. */
7477 static IRExpr
* mk_CatEvenLanes64x2 ( IRTemp a10
, IRTemp b10
) {
7479 return binop(Iop_InterleaveLO64x2
, mkexpr(a10
), mkexpr(b10
));
7482 static IRExpr
* mk_CatOddLanes64x2 ( IRTemp a10
, IRTemp b10
) {
7484 return binop(Iop_InterleaveHI64x2
, mkexpr(a10
), mkexpr(b10
));
7487 static IRExpr
* mk_CatEvenLanes32x4 ( IRTemp a3210
, IRTemp b3210
) {
7488 // returns a2 a0 b2 b0
7489 return binop(Iop_CatEvenLanes32x4
, mkexpr(a3210
), mkexpr(b3210
));
7492 static IRExpr
* mk_CatOddLanes32x4 ( IRTemp a3210
, IRTemp b3210
) {
7493 // returns a3 a1 b3 b1
7494 return binop(Iop_CatOddLanes32x4
, mkexpr(a3210
), mkexpr(b3210
));
7497 static IRExpr
* mk_InterleaveLO32x4 ( IRTemp a3210
, IRTemp b3210
) {
7498 // returns a1 b1 a0 b0
7499 return binop(Iop_InterleaveLO32x4
, mkexpr(a3210
), mkexpr(b3210
));
7502 static IRExpr
* mk_InterleaveHI32x4 ( IRTemp a3210
, IRTemp b3210
) {
7503 // returns a3 b3 a2 b2
7504 return binop(Iop_InterleaveHI32x4
, mkexpr(a3210
), mkexpr(b3210
));
7507 static IRExpr
* mk_CatEvenLanes16x8 ( IRTemp a76543210
, IRTemp b76543210
) {
7508 // returns a6 a4 a2 a0 b6 b4 b2 b0
7509 return binop(Iop_CatEvenLanes16x8
, mkexpr(a76543210
), mkexpr(b76543210
));
7512 static IRExpr
* mk_CatOddLanes16x8 ( IRTemp a76543210
, IRTemp b76543210
) {
7513 // returns a7 a5 a3 a1 b7 b5 b3 b1
7514 return binop(Iop_CatOddLanes16x8
, mkexpr(a76543210
), mkexpr(b76543210
));
7517 static IRExpr
* mk_InterleaveLO16x8 ( IRTemp a76543210
, IRTemp b76543210
) {
7518 // returns a3 b3 a2 b2 a1 b1 a0 b0
7519 return binop(Iop_InterleaveLO16x8
, mkexpr(a76543210
), mkexpr(b76543210
));
7522 static IRExpr
* mk_InterleaveHI16x8 ( IRTemp a76543210
, IRTemp b76543210
) {
7523 // returns a7 b7 a6 b6 a5 b5 a4 b4
7524 return binop(Iop_InterleaveHI16x8
, mkexpr(a76543210
), mkexpr(b76543210
));
7527 static IRExpr
* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210
,
7528 IRTemp bFEDCBA9876543210
) {
7529 // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
7530 return binop(Iop_CatEvenLanes8x16
, mkexpr(aFEDCBA9876543210
),
7531 mkexpr(bFEDCBA9876543210
));
7534 static IRExpr
* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210
,
7535 IRTemp bFEDCBA9876543210
) {
7536 // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
7537 return binop(Iop_CatOddLanes8x16
, mkexpr(aFEDCBA9876543210
),
7538 mkexpr(bFEDCBA9876543210
));
7541 static IRExpr
* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210
,
7542 IRTemp bFEDCBA9876543210
) {
7543 // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
7544 return binop(Iop_InterleaveLO8x16
, mkexpr(aFEDCBA9876543210
),
7545 mkexpr(bFEDCBA9876543210
));
7548 static IRExpr
* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210
,
7549 IRTemp bFEDCBA9876543210
) {
7550 // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
7551 return binop(Iop_InterleaveHI8x16
, mkexpr(aFEDCBA9876543210
),
7552 mkexpr(bFEDCBA9876543210
));
7555 /* Generate N copies of |bit| in the bottom of a ULong. */
7556 static ULong
Replicate ( ULong bit
, Int N
)
7558 vassert(bit
<= 1 && N
>= 1 && N
< 64);
7562 /* Careful. This won't work for N == 64. */
7563 return (1ULL << N
) - 1;
7567 static ULong
Replicate32x2 ( ULong bits32
)
7569 vassert(0 == (bits32
& ~0xFFFFFFFFULL
));
7570 return (bits32
<< 32) | bits32
;
7573 static ULong
Replicate16x4 ( ULong bits16
)
7575 vassert(0 == (bits16
& ~0xFFFFULL
));
7576 return Replicate32x2((bits16
<< 16) | bits16
);
7579 static ULong
Replicate8x8 ( ULong bits8
)
7581 vassert(0 == (bits8
& ~0xFFULL
));
7582 return Replicate16x4((bits8
<< 8) | bits8
);
7585 /* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
7586 |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
7587 is 64. In the former case, the upper 32 bits of the returned value
7588 are guaranteed to be zero. */
7589 static ULong
VFPExpandImm ( ULong imm8
, Int N
)
7591 vassert(imm8
<= 0xFF);
7592 vassert(N
== 32 || N
== 64);
7593 Int E
= ((N
== 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
7595 ULong imm8_6
= (imm8
>> 6) & 1;
7599 ULong sign
= (imm8
>> 7) & 1;
7600 ULong exp
= ((imm8_6
^ 1) << (E
-1)) | Replicate(imm8_6
, E
-1);
7601 ULong frac
= ((imm8
& 63) << (F
-6)) | Replicate(0, F
-6);
7602 vassert(sign
< (1ULL << 1));
7603 vassert(exp
< (1ULL << E
));
7604 vassert(frac
< (1ULL << F
));
7605 vassert(1 + E
+ F
== N
);
7606 ULong res
= (sign
<< (E
+F
)) | (exp
<< F
) | frac
;
7610 /* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
7611 This might fail, as indicated by the returned Bool. Page 2530 of
7613 static Bool
AdvSIMDExpandImm ( /*OUT*/ULong
* res
,
7614 UInt op
, UInt cmode
, UInt imm8
)
7617 vassert(cmode
<= 15);
7618 vassert(imm8
<= 255);
7620 *res
= 0; /* will overwrite iff returning True */
7623 Bool testimm8
= False
;
7625 switch (cmode
>> 1) {
7627 testimm8
= False
; imm64
= Replicate32x2(imm8
); break;
7629 testimm8
= True
; imm64
= Replicate32x2(imm8
<< 8); break;
7631 testimm8
= True
; imm64
= Replicate32x2(imm8
<< 16); break;
7633 testimm8
= True
; imm64
= Replicate32x2(imm8
<< 24); break;
7635 testimm8
= False
; imm64
= Replicate16x4(imm8
); break;
7637 testimm8
= True
; imm64
= Replicate16x4(imm8
<< 8); break;
7640 if ((cmode
& 1) == 0)
7641 imm64
= Replicate32x2((imm8
<< 8) | 0xFF);
7643 imm64
= Replicate32x2((imm8
<< 16) | 0xFFFF);
7647 if ((cmode
& 1) == 0 && op
== 0)
7648 imm64
= Replicate8x8(imm8
);
7649 if ((cmode
& 1) == 0 && op
== 1) {
7650 imm64
= 0; imm64
|= (imm8
& 0x80) ? 0xFF : 0x00;
7651 imm64
<<= 8; imm64
|= (imm8
& 0x40) ? 0xFF : 0x00;
7652 imm64
<<= 8; imm64
|= (imm8
& 0x20) ? 0xFF : 0x00;
7653 imm64
<<= 8; imm64
|= (imm8
& 0x10) ? 0xFF : 0x00;
7654 imm64
<<= 8; imm64
|= (imm8
& 0x08) ? 0xFF : 0x00;
7655 imm64
<<= 8; imm64
|= (imm8
& 0x04) ? 0xFF : 0x00;
7656 imm64
<<= 8; imm64
|= (imm8
& 0x02) ? 0xFF : 0x00;
7657 imm64
<<= 8; imm64
|= (imm8
& 0x01) ? 0xFF : 0x00;
7659 if ((cmode
& 1) == 1 && op
== 0) {
7660 ULong imm8_7
= (imm8
>> 7) & 1;
7661 ULong imm8_6
= (imm8
>> 6) & 1;
7662 ULong imm8_50
= imm8
& 63;
7663 ULong imm32
= (imm8_7
<< (1 + 5 + 6 + 19))
7664 | ((imm8_6
^ 1) << (5 + 6 + 19))
7665 | (Replicate(imm8_6
, 5) << (6 + 19))
7667 imm64
= Replicate32x2(imm32
);
7669 if ((cmode
& 1) == 1 && op
== 1) {
7670 // imm64 = imm8<7>:NOT(imm8<6>)
7671 // :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
7672 ULong imm8_7
= (imm8
>> 7) & 1;
7673 ULong imm8_6
= (imm8
>> 6) & 1;
7674 ULong imm8_50
= imm8
& 63;
7675 imm64
= (imm8_7
<< 63) | ((imm8_6
^ 1) << 62)
7676 | (Replicate(imm8_6
, 8) << 54)
7684 if (testimm8
&& imm8
== 0)
7691 /* Help a bit for decoding laneage for vector operations that can be
7692 of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
7693 and SZ bits, typically for vector floating point. */
7694 static Bool
getLaneInfo_Q_SZ ( /*OUT*/IRType
* tyI
, /*OUT*/IRType
* tyF
,
7695 /*OUT*/UInt
* nLanes
, /*OUT*/Bool
* zeroUpper
,
7696 /*OUT*/const HChar
** arrSpec
,
7697 Bool bitQ
, Bool bitSZ
)
7699 vassert(bitQ
== True
|| bitQ
== False
);
7700 vassert(bitSZ
== True
|| bitSZ
== False
);
7701 if (bitQ
&& bitSZ
) { // 2x64
7702 if (tyI
) *tyI
= Ity_I64
;
7703 if (tyF
) *tyF
= Ity_F64
;
7704 if (nLanes
) *nLanes
= 2;
7705 if (zeroUpper
) *zeroUpper
= False
;
7706 if (arrSpec
) *arrSpec
= "2d";
7709 if (bitQ
&& !bitSZ
) { // 4x32
7710 if (tyI
) *tyI
= Ity_I32
;
7711 if (tyF
) *tyF
= Ity_F32
;
7712 if (nLanes
) *nLanes
= 4;
7713 if (zeroUpper
) *zeroUpper
= False
;
7714 if (arrSpec
) *arrSpec
= "4s";
7717 if (!bitQ
&& !bitSZ
) { // 2x32
7718 if (tyI
) *tyI
= Ity_I32
;
7719 if (tyF
) *tyF
= Ity_F32
;
7720 if (nLanes
) *nLanes
= 2;
7721 if (zeroUpper
) *zeroUpper
= True
;
7722 if (arrSpec
) *arrSpec
= "2s";
7725 // Else impliedly 1x64, which isn't allowed.
7729 /* Helper for decoding laneage for shift-style vector operations
7730 that involve an immediate shift amount. */
7731 static Bool
getLaneInfo_IMMH_IMMB ( /*OUT*/UInt
* shift
, /*OUT*/UInt
* szBlg2
,
7732 UInt immh
, UInt immb
)
7734 vassert(immh
< (1<<4));
7735 vassert(immb
< (1<<3));
7736 UInt immhb
= (immh
<< 3) | immb
;
7738 if (shift
) *shift
= 128 - immhb
;
7739 if (szBlg2
) *szBlg2
= 3;
7743 if (shift
) *shift
= 64 - immhb
;
7744 if (szBlg2
) *szBlg2
= 2;
7748 if (shift
) *shift
= 32 - immhb
;
7749 if (szBlg2
) *szBlg2
= 1;
7753 if (shift
) *shift
= 16 - immhb
;
7754 if (szBlg2
) *szBlg2
= 0;
7760 /* Generate IR to fold all lanes of the V128 value in 'src' as
7761 characterised by the operator 'op', and return the result in the
7762 bottom bits of a V128, with all other bits set to zero. */
7763 static IRTemp
math_FOLDV ( IRTemp src
, IROp op
)
7765 /* The basic idea is to use repeated applications of Iop_CatEven*
7766 and Iop_CatOdd* operators to 'src' so as to clone each lane into
7767 a complete vector. Then fold all those vectors with 'op' and
7768 zero out all but the least significant lane. */
7770 case Iop_Min8Sx16
: case Iop_Min8Ux16
:
7771 case Iop_Max8Sx16
: case Iop_Max8Ux16
: case Iop_Add8x16
: {
7772 /* NB: temp naming here is misleading -- the naming is for 8
7773 lanes of 16 bit, whereas what is being operated on is 16
7775 IRTemp x76543210
= src
;
7776 IRTemp x76547654
= newTempV128();
7777 IRTemp x32103210
= newTempV128();
7778 assign(x76547654
, mk_CatOddLanes64x2 (x76543210
, x76543210
));
7779 assign(x32103210
, mk_CatEvenLanes64x2(x76543210
, x76543210
));
7780 IRTemp x76767676
= newTempV128();
7781 IRTemp x54545454
= newTempV128();
7782 IRTemp x32323232
= newTempV128();
7783 IRTemp x10101010
= newTempV128();
7784 assign(x76767676
, mk_CatOddLanes32x4 (x76547654
, x76547654
));
7785 assign(x54545454
, mk_CatEvenLanes32x4(x76547654
, x76547654
));
7786 assign(x32323232
, mk_CatOddLanes32x4 (x32103210
, x32103210
));
7787 assign(x10101010
, mk_CatEvenLanes32x4(x32103210
, x32103210
));
7788 IRTemp x77777777
= newTempV128();
7789 IRTemp x66666666
= newTempV128();
7790 IRTemp x55555555
= newTempV128();
7791 IRTemp x44444444
= newTempV128();
7792 IRTemp x33333333
= newTempV128();
7793 IRTemp x22222222
= newTempV128();
7794 IRTemp x11111111
= newTempV128();
7795 IRTemp x00000000
= newTempV128();
7796 assign(x77777777
, mk_CatOddLanes16x8 (x76767676
, x76767676
));
7797 assign(x66666666
, mk_CatEvenLanes16x8(x76767676
, x76767676
));
7798 assign(x55555555
, mk_CatOddLanes16x8 (x54545454
, x54545454
));
7799 assign(x44444444
, mk_CatEvenLanes16x8(x54545454
, x54545454
));
7800 assign(x33333333
, mk_CatOddLanes16x8 (x32323232
, x32323232
));
7801 assign(x22222222
, mk_CatEvenLanes16x8(x32323232
, x32323232
));
7802 assign(x11111111
, mk_CatOddLanes16x8 (x10101010
, x10101010
));
7803 assign(x00000000
, mk_CatEvenLanes16x8(x10101010
, x10101010
));
7804 /* Naming not misleading after here. */
7805 IRTemp xAllF
= newTempV128();
7806 IRTemp xAllE
= newTempV128();
7807 IRTemp xAllD
= newTempV128();
7808 IRTemp xAllC
= newTempV128();
7809 IRTemp xAllB
= newTempV128();
7810 IRTemp xAllA
= newTempV128();
7811 IRTemp xAll9
= newTempV128();
7812 IRTemp xAll8
= newTempV128();
7813 IRTemp xAll7
= newTempV128();
7814 IRTemp xAll6
= newTempV128();
7815 IRTemp xAll5
= newTempV128();
7816 IRTemp xAll4
= newTempV128();
7817 IRTemp xAll3
= newTempV128();
7818 IRTemp xAll2
= newTempV128();
7819 IRTemp xAll1
= newTempV128();
7820 IRTemp xAll0
= newTempV128();
7821 assign(xAllF
, mk_CatOddLanes8x16 (x77777777
, x77777777
));
7822 assign(xAllE
, mk_CatEvenLanes8x16(x77777777
, x77777777
));
7823 assign(xAllD
, mk_CatOddLanes8x16 (x66666666
, x66666666
));
7824 assign(xAllC
, mk_CatEvenLanes8x16(x66666666
, x66666666
));
7825 assign(xAllB
, mk_CatOddLanes8x16 (x55555555
, x55555555
));
7826 assign(xAllA
, mk_CatEvenLanes8x16(x55555555
, x55555555
));
7827 assign(xAll9
, mk_CatOddLanes8x16 (x44444444
, x44444444
));
7828 assign(xAll8
, mk_CatEvenLanes8x16(x44444444
, x44444444
));
7829 assign(xAll7
, mk_CatOddLanes8x16 (x33333333
, x33333333
));
7830 assign(xAll6
, mk_CatEvenLanes8x16(x33333333
, x33333333
));
7831 assign(xAll5
, mk_CatOddLanes8x16 (x22222222
, x22222222
));
7832 assign(xAll4
, mk_CatEvenLanes8x16(x22222222
, x22222222
));
7833 assign(xAll3
, mk_CatOddLanes8x16 (x11111111
, x11111111
));
7834 assign(xAll2
, mk_CatEvenLanes8x16(x11111111
, x11111111
));
7835 assign(xAll1
, mk_CatOddLanes8x16 (x00000000
, x00000000
));
7836 assign(xAll0
, mk_CatEvenLanes8x16(x00000000
, x00000000
));
7837 IRTemp maxFE
= newTempV128();
7838 IRTemp maxDC
= newTempV128();
7839 IRTemp maxBA
= newTempV128();
7840 IRTemp max98
= newTempV128();
7841 IRTemp max76
= newTempV128();
7842 IRTemp max54
= newTempV128();
7843 IRTemp max32
= newTempV128();
7844 IRTemp max10
= newTempV128();
7845 assign(maxFE
, binop(op
, mkexpr(xAllF
), mkexpr(xAllE
)));
7846 assign(maxDC
, binop(op
, mkexpr(xAllD
), mkexpr(xAllC
)));
7847 assign(maxBA
, binop(op
, mkexpr(xAllB
), mkexpr(xAllA
)));
7848 assign(max98
, binop(op
, mkexpr(xAll9
), mkexpr(xAll8
)));
7849 assign(max76
, binop(op
, mkexpr(xAll7
), mkexpr(xAll6
)));
7850 assign(max54
, binop(op
, mkexpr(xAll5
), mkexpr(xAll4
)));
7851 assign(max32
, binop(op
, mkexpr(xAll3
), mkexpr(xAll2
)));
7852 assign(max10
, binop(op
, mkexpr(xAll1
), mkexpr(xAll0
)));
7853 IRTemp maxFEDC
= newTempV128();
7854 IRTemp maxBA98
= newTempV128();
7855 IRTemp max7654
= newTempV128();
7856 IRTemp max3210
= newTempV128();
7857 assign(maxFEDC
, binop(op
, mkexpr(maxFE
), mkexpr(maxDC
)));
7858 assign(maxBA98
, binop(op
, mkexpr(maxBA
), mkexpr(max98
)));
7859 assign(max7654
, binop(op
, mkexpr(max76
), mkexpr(max54
)));
7860 assign(max3210
, binop(op
, mkexpr(max32
), mkexpr(max10
)));
7861 IRTemp maxFEDCBA98
= newTempV128();
7862 IRTemp max76543210
= newTempV128();
7863 assign(maxFEDCBA98
, binop(op
, mkexpr(maxFEDC
), mkexpr(maxBA98
)));
7864 assign(max76543210
, binop(op
, mkexpr(max7654
), mkexpr(max3210
)));
7865 IRTemp maxAllLanes
= newTempV128();
7866 assign(maxAllLanes
, binop(op
, mkexpr(maxFEDCBA98
),
7867 mkexpr(max76543210
)));
7868 IRTemp res
= newTempV128();
7869 assign(res
, unop(Iop_ZeroHI120ofV128
, mkexpr(maxAllLanes
)));
7872 case Iop_Min16Sx8
: case Iop_Min16Ux8
:
7873 case Iop_Max16Sx8
: case Iop_Max16Ux8
: case Iop_Add16x8
: {
7874 IRTemp x76543210
= src
;
7875 IRTemp x76547654
= newTempV128();
7876 IRTemp x32103210
= newTempV128();
7877 assign(x76547654
, mk_CatOddLanes64x2 (x76543210
, x76543210
));
7878 assign(x32103210
, mk_CatEvenLanes64x2(x76543210
, x76543210
));
7879 IRTemp x76767676
= newTempV128();
7880 IRTemp x54545454
= newTempV128();
7881 IRTemp x32323232
= newTempV128();
7882 IRTemp x10101010
= newTempV128();
7883 assign(x76767676
, mk_CatOddLanes32x4 (x76547654
, x76547654
));
7884 assign(x54545454
, mk_CatEvenLanes32x4(x76547654
, x76547654
));
7885 assign(x32323232
, mk_CatOddLanes32x4 (x32103210
, x32103210
));
7886 assign(x10101010
, mk_CatEvenLanes32x4(x32103210
, x32103210
));
7887 IRTemp x77777777
= newTempV128();
7888 IRTemp x66666666
= newTempV128();
7889 IRTemp x55555555
= newTempV128();
7890 IRTemp x44444444
= newTempV128();
7891 IRTemp x33333333
= newTempV128();
7892 IRTemp x22222222
= newTempV128();
7893 IRTemp x11111111
= newTempV128();
7894 IRTemp x00000000
= newTempV128();
7895 assign(x77777777
, mk_CatOddLanes16x8 (x76767676
, x76767676
));
7896 assign(x66666666
, mk_CatEvenLanes16x8(x76767676
, x76767676
));
7897 assign(x55555555
, mk_CatOddLanes16x8 (x54545454
, x54545454
));
7898 assign(x44444444
, mk_CatEvenLanes16x8(x54545454
, x54545454
));
7899 assign(x33333333
, mk_CatOddLanes16x8 (x32323232
, x32323232
));
7900 assign(x22222222
, mk_CatEvenLanes16x8(x32323232
, x32323232
));
7901 assign(x11111111
, mk_CatOddLanes16x8 (x10101010
, x10101010
));
7902 assign(x00000000
, mk_CatEvenLanes16x8(x10101010
, x10101010
));
7903 IRTemp max76
= newTempV128();
7904 IRTemp max54
= newTempV128();
7905 IRTemp max32
= newTempV128();
7906 IRTemp max10
= newTempV128();
7907 assign(max76
, binop(op
, mkexpr(x77777777
), mkexpr(x66666666
)));
7908 assign(max54
, binop(op
, mkexpr(x55555555
), mkexpr(x44444444
)));
7909 assign(max32
, binop(op
, mkexpr(x33333333
), mkexpr(x22222222
)));
7910 assign(max10
, binop(op
, mkexpr(x11111111
), mkexpr(x00000000
)));
7911 IRTemp max7654
= newTempV128();
7912 IRTemp max3210
= newTempV128();
7913 assign(max7654
, binop(op
, mkexpr(max76
), mkexpr(max54
)));
7914 assign(max3210
, binop(op
, mkexpr(max32
), mkexpr(max10
)));
7915 IRTemp max76543210
= newTempV128();
7916 assign(max76543210
, binop(op
, mkexpr(max7654
), mkexpr(max3210
)));
7917 IRTemp res
= newTempV128();
7918 assign(res
, unop(Iop_ZeroHI112ofV128
, mkexpr(max76543210
)));
7921 case Iop_Max32Fx4
: case Iop_Min32Fx4
:
7922 case Iop_Min32Sx4
: case Iop_Min32Ux4
:
7923 case Iop_Max32Sx4
: case Iop_Max32Ux4
: case Iop_Add32x4
: {
7925 IRTemp x3232
= newTempV128();
7926 IRTemp x1010
= newTempV128();
7927 assign(x3232
, mk_CatOddLanes64x2 (x3210
, x3210
));
7928 assign(x1010
, mk_CatEvenLanes64x2(x3210
, x3210
));
7929 IRTemp x3333
= newTempV128();
7930 IRTemp x2222
= newTempV128();
7931 IRTemp x1111
= newTempV128();
7932 IRTemp x0000
= newTempV128();
7933 assign(x3333
, mk_CatOddLanes32x4 (x3232
, x3232
));
7934 assign(x2222
, mk_CatEvenLanes32x4(x3232
, x3232
));
7935 assign(x1111
, mk_CatOddLanes32x4 (x1010
, x1010
));
7936 assign(x0000
, mk_CatEvenLanes32x4(x1010
, x1010
));
7937 IRTemp max32
= newTempV128();
7938 IRTemp max10
= newTempV128();
7939 assign(max32
, binop(op
, mkexpr(x3333
), mkexpr(x2222
)));
7940 assign(max10
, binop(op
, mkexpr(x1111
), mkexpr(x0000
)));
7941 IRTemp max3210
= newTempV128();
7942 assign(max3210
, binop(op
, mkexpr(max32
), mkexpr(max10
)));
7943 IRTemp res
= newTempV128();
7944 assign(res
, unop(Iop_ZeroHI96ofV128
, mkexpr(max3210
)));
7949 IRTemp x00
= newTempV128();
7950 IRTemp x11
= newTempV128();
7951 assign(x11
, binop(Iop_InterleaveHI64x2
, mkexpr(x10
), mkexpr(x10
)));
7952 assign(x00
, binop(Iop_InterleaveLO64x2
, mkexpr(x10
), mkexpr(x10
)));
7953 IRTemp max10
= newTempV128();
7954 assign(max10
, binop(op
, mkexpr(x11
), mkexpr(x00
)));
7955 IRTemp res
= newTempV128();
7956 assign(res
, unop(Iop_ZeroHI64ofV128
, mkexpr(max10
)));
7965 /* Generate IR for TBL and TBX. This deals with the 128 bit case
7967 static IRTemp
math_TBL_TBX ( IRTemp tab
[4], UInt len
, IRTemp src
,
7970 vassert(len
>= 0 && len
<= 3);
7972 /* Generate some useful constants as concisely as possible. */
7973 IRTemp half15
= newTemp(Ity_I64
);
7974 assign(half15
, mkU64(0x0F0F0F0F0F0F0F0FULL
));
7975 IRTemp half16
= newTemp(Ity_I64
);
7976 assign(half16
, mkU64(0x1010101010101010ULL
));
7979 IRTemp allZero
= newTempV128();
7980 assign(allZero
, mkV128(0x0000));
7981 /* A vector containing 15 in each 8-bit lane */
7982 IRTemp all15
= newTempV128();
7983 assign(all15
, binop(Iop_64HLtoV128
, mkexpr(half15
), mkexpr(half15
)));
7984 /* A vector containing 16 in each 8-bit lane */
7985 IRTemp all16
= newTempV128();
7986 assign(all16
, binop(Iop_64HLtoV128
, mkexpr(half16
), mkexpr(half16
)));
7987 /* A vector containing 32 in each 8-bit lane */
7988 IRTemp all32
= newTempV128();
7989 assign(all32
, binop(Iop_Add8x16
, mkexpr(all16
), mkexpr(all16
)));
7990 /* A vector containing 48 in each 8-bit lane */
7991 IRTemp all48
= newTempV128();
7992 assign(all48
, binop(Iop_Add8x16
, mkexpr(all16
), mkexpr(all32
)));
7993 /* A vector containing 64 in each 8-bit lane */
7994 IRTemp all64
= newTempV128();
7995 assign(all64
, binop(Iop_Add8x16
, mkexpr(all32
), mkexpr(all32
)));
7997 /* Group the 16/32/48/64 vectors so as to be indexable. */
7998 IRTemp allXX
[4] = { all16
, all32
, all48
, all64
};
8000 /* Compute the result for each table vector, with zeroes in places
8001 where the index values are out of range, and OR them into the
8003 IRTemp running_result
= newTempV128();
8004 assign(running_result
, mkV128(0));
8007 for (tabent
= 0; tabent
<= len
; tabent
++) {
8008 vassert(tabent
>= 0 && tabent
< 4);
8009 IRTemp bias
= newTempV128();
8011 mkexpr(tabent
== 0 ? allZero
: allXX
[tabent
-1]));
8012 IRTemp biased_indices
= newTempV128();
8013 assign(biased_indices
,
8014 binop(Iop_Sub8x16
, mkexpr(src
), mkexpr(bias
)));
8015 IRTemp valid_mask
= newTempV128();
8017 binop(Iop_CmpGT8Ux16
, mkexpr(all16
), mkexpr(biased_indices
)));
8018 IRTemp safe_biased_indices
= newTempV128();
8019 assign(safe_biased_indices
,
8020 binop(Iop_AndV128
, mkexpr(biased_indices
), mkexpr(all15
)));
8021 IRTemp results_or_junk
= newTempV128();
8022 assign(results_or_junk
,
8023 binop(Iop_Perm8x16
, mkexpr(tab
[tabent
]),
8024 mkexpr(safe_biased_indices
)));
8025 IRTemp results_or_zero
= newTempV128();
8026 assign(results_or_zero
,
8027 binop(Iop_AndV128
, mkexpr(results_or_junk
), mkexpr(valid_mask
)));
8028 /* And OR that into the running result. */
8029 IRTemp tmp
= newTempV128();
8030 assign(tmp
, binop(Iop_OrV128
, mkexpr(results_or_zero
),
8031 mkexpr(running_result
)));
8032 running_result
= tmp
;
8035 /* So now running_result holds the overall result where the indices
8036 are in range, and zero in out-of-range lanes. Now we need to
8037 compute an overall validity mask and use this to copy in the
8038 lanes in the oor_values for out of range indices. This is
8039 unnecessary for TBL but will get folded out by iropt, so we lean
8040 on that and generate the same code for TBL and TBX here. */
8041 IRTemp overall_valid_mask
= newTempV128();
8042 assign(overall_valid_mask
,
8043 binop(Iop_CmpGT8Ux16
, mkexpr(allXX
[len
]), mkexpr(src
)));
8044 IRTemp result
= newTempV128();
8047 mkexpr(running_result
),
8050 unop(Iop_NotV128
, mkexpr(overall_valid_mask
)))));
8055 /* Let |argL| and |argR| be V128 values, and let |opI64x2toV128| be
8056 an op which takes two I64s and produces a V128. That is, a widening
8057 operator. Generate IR which applies |opI64x2toV128| to either the
8058 lower (if |is2| is False) or upper (if |is2| is True) halves of
8059 |argL| and |argR|, and return the value in a new IRTemp.
8062 IRTemp
math_BINARY_WIDENING_V128 ( Bool is2
, IROp opI64x2toV128
,
8063 IRExpr
* argL
, IRExpr
* argR
)
8065 IRTemp res
= newTempV128();
8066 IROp slice
= is2
? Iop_V128HIto64
: Iop_V128to64
;
8067 assign(res
, binop(opI64x2toV128
, unop(slice
, argL
),
8068 unop(slice
, argR
)));
8073 /* Generate signed/unsigned absolute difference vector IR. */
8075 IRTemp
math_ABD ( Bool isU
, UInt size
, IRExpr
* argLE
, IRExpr
* argRE
)
8078 IRTemp argL
= newTempV128();
8079 IRTemp argR
= newTempV128();
8080 IRTemp msk
= newTempV128();
8081 IRTemp res
= newTempV128();
8082 assign(argL
, argLE
);
8083 assign(argR
, argRE
);
8084 assign(msk
, binop(isU
? mkVecCMPGTU(size
) : mkVecCMPGTS(size
),
8085 mkexpr(argL
), mkexpr(argR
)));
8089 binop(mkVecSUB(size
), mkexpr(argL
), mkexpr(argR
)),
8092 binop(mkVecSUB(size
), mkexpr(argR
), mkexpr(argL
)),
8093 unop(Iop_NotV128
, mkexpr(msk
)))));
8098 /* Generate IR that takes a V128 and sign- or zero-widens
8099 either the lower or upper set of lanes to twice-as-wide,
8100 resulting in a new V128 value. */
8102 IRTemp
math_WIDEN_LO_OR_HI_LANES ( Bool zWiden
, Bool fromUpperHalf
,
8103 UInt sizeNarrow
, IRExpr
* srcE
)
8105 IRTemp src
= newTempV128();
8106 IRTemp res
= newTempV128();
8108 switch (sizeNarrow
) {
8111 binop(zWiden
? Iop_ShrN64x2
: Iop_SarN64x2
,
8112 binop(fromUpperHalf
? Iop_InterleaveHI32x4
8113 : Iop_InterleaveLO32x4
,
8120 binop(zWiden
? Iop_ShrN32x4
: Iop_SarN32x4
,
8121 binop(fromUpperHalf
? Iop_InterleaveHI16x8
8122 : Iop_InterleaveLO16x8
,
8129 binop(zWiden
? Iop_ShrN16x8
: Iop_SarN16x8
,
8130 binop(fromUpperHalf
? Iop_InterleaveHI8x16
8131 : Iop_InterleaveLO8x16
,
8143 /* Generate IR that takes a V128 and sign- or zero-widens
8144 either the even or odd lanes to twice-as-wide,
8145 resulting in a new V128 value. */
8147 IRTemp
math_WIDEN_EVEN_OR_ODD_LANES ( Bool zWiden
, Bool fromOdd
,
8148 UInt sizeNarrow
, IRExpr
* srcE
)
8150 IRTemp src
= newTempV128();
8151 IRTemp res
= newTempV128();
8152 IROp opSAR
= mkVecSARN(sizeNarrow
+1);
8153 IROp opSHR
= mkVecSHRN(sizeNarrow
+1);
8154 IROp opSHL
= mkVecSHLN(sizeNarrow
+1);
8155 IROp opSxR
= zWiden
? opSHR
: opSAR
;
8157 switch (sizeNarrow
) {
8158 case X10
: amt
= 32; break;
8159 case X01
: amt
= 16; break;
8160 case X00
: amt
= 8; break;
8161 default: vassert(0);
8165 assign(res
, binop(opSxR
, mkexpr(src
), mkU8(amt
)));
8167 assign(res
, binop(opSxR
, binop(opSHL
, mkexpr(src
), mkU8(amt
)),
8174 /* Generate IR that takes two V128s and narrows (takes lower half)
8175 of each lane, producing a single V128 value. */
8177 IRTemp
math_NARROW_LANES ( IRTemp argHi
, IRTemp argLo
, UInt sizeNarrow
)
8179 IRTemp res
= newTempV128();
8180 assign(res
, binop(mkVecCATEVENLANES(sizeNarrow
),
8181 mkexpr(argHi
), mkexpr(argLo
)));
8186 /* Return a temp which holds the vector dup of the lane of width
8187 (1 << size) obtained from src[laneNo]. */
8189 IRTemp
math_DUP_VEC_ELEM ( IRExpr
* src
, UInt size
, UInt laneNo
)
8192 /* Normalise |laneNo| so it is of the form
8193 x000 for D, xx00 for S, xxx0 for H, and xxxx for B.
8194 This puts the bits we want to inspect at constant offsets
8195 regardless of the value of |size|.
8197 UInt ix
= laneNo
<< size
;
8199 IROp ops
[4] = { Iop_INVALID
, Iop_INVALID
, Iop_INVALID
, Iop_INVALID
};
8202 ops
[0] = (ix
& 1) ? Iop_CatOddLanes8x16
: Iop_CatEvenLanes8x16
;
8205 ops
[1] = (ix
& 2) ? Iop_CatOddLanes16x8
: Iop_CatEvenLanes16x8
;
8208 ops
[2] = (ix
& 4) ? Iop_CatOddLanes32x4
: Iop_CatEvenLanes32x4
;
8211 ops
[3] = (ix
& 8) ? Iop_InterleaveHI64x2
: Iop_InterleaveLO64x2
;
8216 IRTemp res
= newTempV128();
8219 for (i
= 3; i
>= 0; i
--) {
8220 if (ops
[i
] == Iop_INVALID
)
8222 IRTemp tmp
= newTempV128();
8223 assign(tmp
, binop(ops
[i
], mkexpr(res
), mkexpr(res
)));
8230 /* Let |srcV| be a V128 value, and let |imm5| be a lane-and-size
8231 selector encoded as shown below. Return a new V128 holding the
8232 selected lane from |srcV| dup'd out to V128, and also return the
8233 lane number, log2 of the lane size in bytes, and width-character via
8234 *laneNo, *laneSzLg2 and *laneCh respectively. It may be that imm5
8235 is an invalid selector, in which case return
8236 IRTemp_INVALID, 0, 0 and '?' respectively.
8238 imm5 = xxxx1 signifies .b[xxxx]
8245 IRTemp
handle_DUP_VEC_ELEM ( /*OUT*/UInt
* laneNo
,
8246 /*OUT*/UInt
* laneSzLg2
, /*OUT*/HChar
* laneCh
,
8247 IRExpr
* srcV
, UInt imm5
)
8254 *laneNo
= (imm5
>> 1) & 15;
8258 else if (imm5
& 2) {
8259 *laneNo
= (imm5
>> 2) & 7;
8263 else if (imm5
& 4) {
8264 *laneNo
= (imm5
>> 3) & 3;
8268 else if (imm5
& 8) {
8269 *laneNo
= (imm5
>> 4) & 1;
8275 return IRTemp_INVALID
;
8278 return math_DUP_VEC_ELEM(srcV
, *laneSzLg2
, *laneNo
);
8282 /* Clone |imm| to every lane of a V128, with lane size log2 of |size|. */
8284 IRTemp
math_VEC_DUP_IMM ( UInt size
, ULong imm
)
8286 IRType ty
= Ity_INVALID
;
8287 IRTemp rcS
= IRTemp_INVALID
;
8290 vassert(imm
<= 0xFFFFULL
);
8292 rcS
= newTemp(ty
); assign(rcS
, mkU16( (UShort
)imm
));
8295 vassert(imm
<= 0xFFFFFFFFULL
);
8297 rcS
= newTemp(ty
); assign(rcS
, mkU32( (UInt
)imm
));
8301 rcS
= newTemp(ty
); assign(rcS
, mkU64(imm
)); break;
8305 IRTemp rcV
= math_DUP_TO_V128(rcS
, ty
);
8310 /* Let |new64| be a V128 in which only the lower 64 bits are interesting,
8311 and the upper can contain any value -- it is ignored. If |is2| is False,
8312 generate IR to put |new64| in the lower half of vector reg |dd| and zero
8313 the upper half. If |is2| is True, generate IR to put |new64| in the upper
8314 half of vector reg |dd| and leave the lower half unchanged. This
8315 simulates the behaviour of the "foo/foo2" instructions in which the
8316 destination is half the width of sources, for example addhn/addhn2.
8319 void putLO64andZUorPutHI64 ( Bool is2
, UInt dd
, IRTemp new64
)
8322 /* Get the old contents of Vdd, zero the upper half, and replace
8324 IRTemp t_zero_oldLO
= newTempV128();
8325 assign(t_zero_oldLO
, unop(Iop_ZeroHI64ofV128
, getQReg128(dd
)));
8326 IRTemp t_newHI_zero
= newTempV128();
8327 assign(t_newHI_zero
, binop(Iop_InterleaveLO64x2
, mkexpr(new64
),
8329 IRTemp res
= newTempV128();
8330 assign(res
, binop(Iop_OrV128
, mkexpr(t_zero_oldLO
),
8331 mkexpr(t_newHI_zero
)));
8332 putQReg128(dd
, mkexpr(res
));
8334 /* This is simple. */
8335 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(new64
)));
8340 /* Compute vector SQABS at lane size |size| for |srcE|, returning
8341 the q result in |*qabs| and the normal result in |*nabs|. */
8343 void math_SQABS ( /*OUT*/IRTemp
* qabs
, /*OUT*/IRTemp
* nabs
,
8344 IRExpr
* srcE
, UInt size
)
8346 IRTemp src
, mask
, maskn
, nsub
, qsub
;
8347 src
= mask
= maskn
= nsub
= qsub
= IRTemp_INVALID
;
8348 newTempsV128_7(&src
, &mask
, &maskn
, &nsub
, &qsub
, nabs
, qabs
);
8350 assign(mask
, binop(mkVecCMPGTS(size
), mkV128(0x0000), mkexpr(src
)));
8351 assign(maskn
, unop(Iop_NotV128
, mkexpr(mask
)));
8352 assign(nsub
, binop(mkVecSUB(size
), mkV128(0x0000), mkexpr(src
)));
8353 assign(qsub
, binop(mkVecQSUBS(size
), mkV128(0x0000), mkexpr(src
)));
8354 assign(*nabs
, binop(Iop_OrV128
,
8355 binop(Iop_AndV128
, mkexpr(nsub
), mkexpr(mask
)),
8356 binop(Iop_AndV128
, mkexpr(src
), mkexpr(maskn
))));
8357 assign(*qabs
, binop(Iop_OrV128
,
8358 binop(Iop_AndV128
, mkexpr(qsub
), mkexpr(mask
)),
8359 binop(Iop_AndV128
, mkexpr(src
), mkexpr(maskn
))));
8363 /* Compute vector SQNEG at lane size |size| for |srcE|, returning
8364 the q result in |*qneg| and the normal result in |*nneg|. */
8366 void math_SQNEG ( /*OUT*/IRTemp
* qneg
, /*OUT*/IRTemp
* nneg
,
8367 IRExpr
* srcE
, UInt size
)
8369 IRTemp src
= IRTemp_INVALID
;
8370 newTempsV128_3(&src
, nneg
, qneg
);
8372 assign(*nneg
, binop(mkVecSUB(size
), mkV128(0x0000), mkexpr(src
)));
8373 assign(*qneg
, binop(mkVecQSUBS(size
), mkV128(0x0000), mkexpr(src
)));
8377 /* Zero all except the least significant lane of |srcE|, where |size|
8378 indicates the lane size in the usual way. */
8379 static IRTemp
math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size
, IRExpr
* srcE
)
8382 IRTemp t
= newTempV128();
8383 assign(t
, unop(mkVecZEROHIxxOFV128(size
), srcE
));
8388 /* Generate IR to compute vector widening MULL from either the lower
8389 (is2==False) or upper (is2==True) halves of vecN and vecM. The
8390 widening multiplies are unsigned when isU==True and signed when
8391 isU==False. |size| is the narrow lane size indication. Optionally,
8392 the product may be added to or subtracted from vecD, at the wide lane
8393 size. This happens when |mas| is 'a' (add) or 's' (sub). When |mas|
8394 is 'm' (only multiply) then the accumulate part does not happen, and
8395 |vecD| is expected to == IRTemp_INVALID.
8397 Only size==0 (h_b_b), size==1 (s_h_h) and size==2 (d_s_s) variants
8398 are allowed. The result is returned in a new IRTemp, which is
8399 returned in *res. */
8401 void math_MULL_ACC ( /*OUT*/IRTemp
* res
,
8402 Bool is2
, Bool isU
, UInt size
, HChar mas
,
8403 IRTemp vecN
, IRTemp vecM
, IRTemp vecD
)
8405 vassert(res
&& *res
== IRTemp_INVALID
);
8407 vassert(mas
== 'm' || mas
== 'a' || mas
== 's');
8408 if (mas
== 'm') vassert(vecD
== IRTemp_INVALID
);
8409 IROp mulOp
= isU
? mkVecMULLU(size
) : mkVecMULLS(size
);
8410 IROp accOp
= (mas
== 'a') ? mkVecADD(size
+1)
8411 : (mas
== 's' ? mkVecSUB(size
+1)
8413 IRTemp mul
= math_BINARY_WIDENING_V128(is2
, mulOp
,
8414 mkexpr(vecN
), mkexpr(vecM
));
8415 *res
= newTempV128();
8416 assign(*res
, mas
== 'm' ? mkexpr(mul
)
8417 : binop(accOp
, mkexpr(vecD
), mkexpr(mul
)));
8421 /* Same as math_MULL_ACC, except the multiply is signed widening,
8422 the multiplied value is then doubled, before being added to or
8423 subtracted from the accumulated value. And everything is
8424 saturated. In all cases, saturation residuals are returned
8425 via (sat1q, sat1n), and in the accumulate cases,
8426 via (sat2q, sat2n) too. All results are returned in new temporaries.
8427 In the no-accumulate case, *sat2q and *sat2n are never instantiated,
8428 so the caller can tell this has happened. */
8430 void math_SQDMULL_ACC ( /*OUT*/IRTemp
* res
,
8431 /*OUT*/IRTemp
* sat1q
, /*OUT*/IRTemp
* sat1n
,
8432 /*OUT*/IRTemp
* sat2q
, /*OUT*/IRTemp
* sat2n
,
8433 Bool is2
, UInt size
, HChar mas
,
8434 IRTemp vecN
, IRTemp vecM
, IRTemp vecD
)
8437 vassert(mas
== 'm' || mas
== 'a' || mas
== 's');
8439 sat1q = vecN.D[is2] *sq vecM.d[is2] *q 2
8440 sat1n = vecN.D[is2] *s vecM.d[is2] * 2
8441 IOW take either the low or high halves of vecN and vecM, signed widen,
8442 multiply, double that, and signedly saturate. Also compute the same
8443 but without saturation.
8445 vassert(sat2q
&& *sat2q
== IRTemp_INVALID
);
8446 vassert(sat2n
&& *sat2n
== IRTemp_INVALID
);
8447 newTempsV128_3(sat1q
, sat1n
, res
);
8448 IRTemp tq
= math_BINARY_WIDENING_V128(is2
, mkVecQDMULLS(size
),
8449 mkexpr(vecN
), mkexpr(vecM
));
8450 IRTemp tn
= math_BINARY_WIDENING_V128(is2
, mkVecMULLS(size
),
8451 mkexpr(vecN
), mkexpr(vecM
));
8452 assign(*sat1q
, mkexpr(tq
));
8453 assign(*sat1n
, binop(mkVecADD(size
+1), mkexpr(tn
), mkexpr(tn
)));
8455 /* If there is no accumulation, the final result is sat1q,
8456 and there's no assignment to sat2q or sat2n. */
8458 assign(*res
, mkexpr(*sat1q
));
8463 sat2q = vecD +sq/-sq sat1q
8464 sat2n = vecD +/- sat1n
8467 newTempsV128_2(sat2q
, sat2n
);
8468 assign(*sat2q
, binop(mas
== 'a' ? mkVecQADDS(size
+1) : mkVecQSUBS(size
+1),
8469 mkexpr(vecD
), mkexpr(*sat1q
)));
8470 assign(*sat2n
, binop(mas
== 'a' ? mkVecADD(size
+1) : mkVecSUB(size
+1),
8471 mkexpr(vecD
), mkexpr(*sat1n
)));
8472 assign(*res
, mkexpr(*sat2q
));
8476 /* Generate IR for widening signed vector multiplies. The operands
8477 have their lane width signedly widened, and they are then multiplied
8478 at the wider width, returning results in two new IRTemps. */
8480 void math_MULLS ( /*OUT*/IRTemp
* resHI
, /*OUT*/IRTemp
* resLO
,
8481 UInt sizeNarrow
, IRTemp argL
, IRTemp argR
)
8483 vassert(sizeNarrow
<= 2);
8484 newTempsV128_2(resHI
, resLO
);
8485 IRTemp argLhi
= newTemp(Ity_I64
);
8486 IRTemp argLlo
= newTemp(Ity_I64
);
8487 IRTemp argRhi
= newTemp(Ity_I64
);
8488 IRTemp argRlo
= newTemp(Ity_I64
);
8489 assign(argLhi
, unop(Iop_V128HIto64
, mkexpr(argL
)));
8490 assign(argLlo
, unop(Iop_V128to64
, mkexpr(argL
)));
8491 assign(argRhi
, unop(Iop_V128HIto64
, mkexpr(argR
)));
8492 assign(argRlo
, unop(Iop_V128to64
, mkexpr(argR
)));
8493 IROp opMulls
= mkVecMULLS(sizeNarrow
);
8494 assign(*resHI
, binop(opMulls
, mkexpr(argLhi
), mkexpr(argRhi
)));
8495 assign(*resLO
, binop(opMulls
, mkexpr(argLlo
), mkexpr(argRlo
)));
8499 /* Generate IR for SQDMULH and SQRDMULH: signedly wideningly multiply,
8500 double that, possibly add a rounding constant (R variants), and take
8503 void math_SQDMULH ( /*OUT*/IRTemp
* res
,
8504 /*OUT*/IRTemp
* sat1q
, /*OUT*/IRTemp
* sat1n
,
8505 Bool isR
, UInt size
, IRTemp vN
, IRTemp vM
)
8507 vassert(size
== X01
|| size
== X10
); /* s or h only */
8509 newTempsV128_3(res
, sat1q
, sat1n
);
8511 IRTemp mullsHI
= IRTemp_INVALID
, mullsLO
= IRTemp_INVALID
;
8512 math_MULLS(&mullsHI
, &mullsLO
, size
, vN
, vM
);
8514 IRTemp addWide
= mkVecADD(size
+1);
8517 assign(*sat1q
, binop(mkVecQRDMULHIS(size
), mkexpr(vN
), mkexpr(vM
)));
8519 Int rcShift
= size
== X01
? 15 : 31;
8520 IRTemp roundConst
= math_VEC_DUP_IMM(size
+1, 1ULL << rcShift
);
8522 binop(mkVecCATODDLANES(size
),
8524 binop(addWide
, mkexpr(mullsHI
), mkexpr(mullsHI
)),
8525 mkexpr(roundConst
)),
8527 binop(addWide
, mkexpr(mullsLO
), mkexpr(mullsLO
)),
8528 mkexpr(roundConst
))));
8530 assign(*sat1q
, binop(mkVecQDMULHIS(size
), mkexpr(vN
), mkexpr(vM
)));
8533 binop(mkVecCATODDLANES(size
),
8534 binop(addWide
, mkexpr(mullsHI
), mkexpr(mullsHI
)),
8535 binop(addWide
, mkexpr(mullsLO
), mkexpr(mullsLO
))));
8538 assign(*res
, mkexpr(*sat1q
));
8541 /* Generate IR for SQRDMLAH and SQRDMLSH: signedly wideningly multiply,
8542 double, add a rounding constant, take the high half and accumulate. */
8544 void math_SQRDMLAH ( /*OUT*/IRTemp
* res
, /*OUT*/IRTemp
* res_nosat
, Bool isAdd
,
8545 UInt size
, IRTemp vD
, IRTemp vN
, IRTemp vM
)
8547 vassert(size
== X01
|| size
== X10
); /* s or h only */
8549 /* SQRDMLAH = SQADD(A, SQRDMULH(B, C)) */
8551 IRTemp mul
, mul_nosat
, dummy
;
8552 mul
= mul_nosat
= dummy
= IRTemp_INVALID
;
8553 math_SQDMULH(&mul
, &dummy
, &mul_nosat
, True
/*R*/, size
, vN
, vM
);
8555 IROp op
= isAdd
? mkVecADD(size
) : mkVecSUB(size
);
8556 IROp qop
= isAdd
? mkVecQADDS(size
) : mkVecQSUBS(size
);
8557 newTempsV128_2(res
, res_nosat
);
8558 assign(*res
, binop(qop
, mkexpr(vD
), mkexpr(mul
)));
8559 assign(*res_nosat
, binop(op
, mkexpr(vD
), mkexpr(mul_nosat
)));
8563 /* Generate IR for SQSHL, UQSHL, SQSHLU by imm. Put the result in
8564 a new temp in *res, and the Q difference pair in new temps in
8565 *qDiff1 and *qDiff2 respectively. |nm| denotes which of the
8566 three operations it is. */
8568 void math_QSHL_IMM ( /*OUT*/IRTemp
* res
,
8569 /*OUT*/IRTemp
* qDiff1
, /*OUT*/IRTemp
* qDiff2
,
8570 IRTemp src
, UInt size
, UInt shift
, const HChar
* nm
)
8573 UInt laneBits
= 8 << size
;
8574 vassert(shift
< laneBits
);
8575 newTempsV128_3(res
, qDiff1
, qDiff2
);
8576 IRTemp z128
= newTempV128();
8577 assign(z128
, mkV128(0x0000));
8580 if (vex_streq(nm
, "uqshl")) {
8581 IROp qop
= mkVecQSHLNSATUU(size
);
8582 assign(*res
, binop(qop
, mkexpr(src
), mkU8(shift
)));
8584 /* No shift means no saturation. */
8585 assign(*qDiff1
, mkexpr(z128
));
8586 assign(*qDiff2
, mkexpr(z128
));
8588 /* Saturation has occurred if any of the shifted-out bits are
8589 nonzero. We get the shifted-out bits by right-shifting the
8591 UInt rshift
= laneBits
- shift
;
8592 vassert(rshift
>= 1 && rshift
< laneBits
);
8593 assign(*qDiff1
, binop(mkVecSHRN(size
), mkexpr(src
), mkU8(rshift
)));
8594 assign(*qDiff2
, mkexpr(z128
));
8600 if (vex_streq(nm
, "sqshl")) {
8601 IROp qop
= mkVecQSHLNSATSS(size
);
8602 assign(*res
, binop(qop
, mkexpr(src
), mkU8(shift
)));
8604 /* No shift means no saturation. */
8605 assign(*qDiff1
, mkexpr(z128
));
8606 assign(*qDiff2
, mkexpr(z128
));
8608 /* Saturation has occurred if any of the shifted-out bits are
8609 different from the top bit of the original value. */
8610 UInt rshift
= laneBits
- 1 - shift
;
8611 vassert(rshift
>= 0 && rshift
< laneBits
-1);
8612 /* qDiff1 is the shifted out bits, and the top bit of the original
8613 value, preceded by zeroes. */
8614 assign(*qDiff1
, binop(mkVecSHRN(size
), mkexpr(src
), mkU8(rshift
)));
8615 /* qDiff2 is the top bit of the original value, cloned the
8616 correct number of times. */
8617 assign(*qDiff2
, binop(mkVecSHRN(size
),
8618 binop(mkVecSARN(size
), mkexpr(src
),
8621 /* This also succeeds in comparing the top bit of the original
8622 value to itself, which is a bit stupid, but not wrong. */
8628 if (vex_streq(nm
, "sqshlu")) {
8629 IROp qop
= mkVecQSHLNSATSU(size
);
8630 assign(*res
, binop(qop
, mkexpr(src
), mkU8(shift
)));
8632 /* If there's no shift, saturation depends on the top bit
8634 assign(*qDiff1
, binop(mkVecSHRN(size
), mkexpr(src
), mkU8(laneBits
-1)));
8635 assign(*qDiff2
, mkexpr(z128
));
8637 /* Saturation has occurred if any of the shifted-out bits are
8638 nonzero. We get the shifted-out bits by right-shifting the
8640 UInt rshift
= laneBits
- shift
;
8641 vassert(rshift
>= 1 && rshift
< laneBits
);
8642 assign(*qDiff1
, binop(mkVecSHRN(size
), mkexpr(src
), mkU8(rshift
)));
8643 assign(*qDiff2
, mkexpr(z128
));
8652 /* Generate IR to do SRHADD and URHADD. */
8654 IRTemp
math_RHADD ( UInt size
, Bool isU
, IRTemp aa
, IRTemp bb
)
8657 (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1)
8660 IROp opSHR
= isU
? mkVecSHRN(size
) : mkVecSARN(size
);
8661 IROp opADD
= mkVecADD(size
);
8662 /* The only tricky bit is to generate the correct vector 1 constant. */
8663 const ULong ones64
[4]
8664 = { 0x0101010101010101ULL
, 0x0001000100010001ULL
,
8665 0x0000000100000001ULL
, 0x0000000000000001ULL
};
8666 IRTemp imm64
= newTemp(Ity_I64
);
8667 assign(imm64
, mkU64(ones64
[size
]));
8668 IRTemp vecOne
= newTempV128();
8669 assign(vecOne
, binop(Iop_64HLtoV128
, mkexpr(imm64
), mkexpr(imm64
)));
8670 IRTemp scaOne
= newTemp(Ity_I8
);
8671 assign(scaOne
, mkU8(1));
8672 IRTemp res
= newTempV128();
8675 binop(opSHR
, mkexpr(aa
), mkexpr(scaOne
)),
8677 binop(opSHR
, mkexpr(bb
), mkexpr(scaOne
)),
8681 binop(Iop_AndV128
, mkexpr(aa
),
8683 binop(Iop_AndV128
, mkexpr(bb
),
8697 /* QCFLAG tracks the SIMD sticky saturation status. Update the status
8698 thusly: if, after application of |opZHI| to both |qres| and |nres|,
8699 they have the same value, leave QCFLAG unchanged. Otherwise, set it
8700 (implicitly) to 1. |opZHI| may only be one of the Iop_ZeroHIxxofV128
8701 operators, or Iop_INVALID, in which case |qres| and |nres| are used
8702 unmodified. The presence |opZHI| means this function can be used to
8703 generate QCFLAG update code for both scalar and vector SIMD operations.
8706 void updateQCFLAGwithDifferenceZHI ( IRTemp qres
, IRTemp nres
, IROp opZHI
)
8708 IRTemp diff
= newTempV128();
8709 IRTemp oldQCFLAG
= newTempV128();
8710 IRTemp newQCFLAG
= newTempV128();
8711 if (opZHI
== Iop_INVALID
) {
8712 assign(diff
, binop(Iop_XorV128
, mkexpr(qres
), mkexpr(nres
)));
8714 vassert(opZHI
== Iop_ZeroHI64ofV128
8715 || opZHI
== Iop_ZeroHI96ofV128
|| opZHI
== Iop_ZeroHI112ofV128
);
8716 assign(diff
, unop(opZHI
, binop(Iop_XorV128
, mkexpr(qres
), mkexpr(nres
))));
8718 assign(oldQCFLAG
, IRExpr_Get(OFFB_QCFLAG
, Ity_V128
));
8719 assign(newQCFLAG
, binop(Iop_OrV128
, mkexpr(oldQCFLAG
), mkexpr(diff
)));
8720 stmt(IRStmt_Put(OFFB_QCFLAG
, mkexpr(newQCFLAG
)));
8724 /* A variant of updateQCFLAGwithDifferenceZHI in which |qres| and |nres|
8725 are used unmodified, hence suitable for QCFLAG updates for whole-vector
8728 void updateQCFLAGwithDifference ( IRTemp qres
, IRTemp nres
)
8730 updateQCFLAGwithDifferenceZHI(qres
, nres
, Iop_INVALID
);
8734 /* Generate IR to rearrange two vector values in a way which is useful
8735 for doing S/D add-pair etc operations. There are 3 cases:
8737 2d: [m1 m0] [n1 n0] --> [m1 n1] [m0 n0]
8739 4s: [m3 m2 m1 m0] [n3 n2 n1 n0] --> [m3 m1 n3 n1] [m2 m0 n2 n0]
8741 2s: [m2 m2 m1 m0] [n3 n2 n1 n0] --> [0 0 m1 n1] [0 0 m0 n0]
8743 The cases are distinguished as follows:
8744 isD == True, bitQ == 1 => 2d
8745 isD == False, bitQ == 1 => 4s
8746 isD == False, bitQ == 0 => 2s
8749 void math_REARRANGE_FOR_FLOATING_PAIRWISE (
8750 /*OUT*/IRTemp
* rearrL
, /*OUT*/IRTemp
* rearrR
,
8751 IRTemp vecM
, IRTemp vecN
, Bool isD
, UInt bitQ
8754 vassert(rearrL
&& *rearrL
== IRTemp_INVALID
);
8755 vassert(rearrR
&& *rearrR
== IRTemp_INVALID
);
8756 *rearrL
= newTempV128();
8757 *rearrR
= newTempV128();
8761 assign(*rearrL
, binop(Iop_InterleaveHI64x2
, mkexpr(vecM
), mkexpr(vecN
)));
8762 assign(*rearrR
, binop(Iop_InterleaveLO64x2
, mkexpr(vecM
), mkexpr(vecN
)));
8764 else if (!isD
&& bitQ
== 1) {
8766 assign(*rearrL
, binop(Iop_CatOddLanes32x4
, mkexpr(vecM
), mkexpr(vecN
)));
8767 assign(*rearrR
, binop(Iop_CatEvenLanes32x4
, mkexpr(vecM
), mkexpr(vecN
)));
8770 vassert(!isD
&& bitQ
== 0);
8771 IRTemp m1n1m0n0
= newTempV128();
8772 IRTemp m0n0m1n1
= newTempV128();
8773 assign(m1n1m0n0
, binop(Iop_InterleaveLO32x4
,
8774 mkexpr(vecM
), mkexpr(vecN
)));
8775 assign(m0n0m1n1
, triop(Iop_SliceV128
,
8776 mkexpr(m1n1m0n0
), mkexpr(m1n1m0n0
), mkU8(8)));
8777 assign(*rearrL
, unop(Iop_ZeroHI64ofV128
, mkexpr(m1n1m0n0
)));
8778 assign(*rearrR
, unop(Iop_ZeroHI64ofV128
, mkexpr(m0n0m1n1
)));
8783 /* Returns 2.0 ^ (-n) for n in 1 .. 64 */
8784 static Double
two_to_the_minus ( Int n
)
8786 if (n
== 1) return 0.5;
8787 vassert(n
>= 2 && n
<= 64);
8789 return two_to_the_minus(half
) * two_to_the_minus(n
- half
);
8793 /* Returns 2.0 ^ n for n in 1 .. 64 */
8794 static Double
two_to_the_plus ( Int n
)
8796 if (n
== 1) return 2.0;
8797 vassert(n
>= 2 && n
<= 64);
8799 return two_to_the_plus(half
) * two_to_the_plus(n
- half
);
8803 /*------------------------------------------------------------*/
8804 /*--- SIMD and FP instructions ---*/
8805 /*------------------------------------------------------------*/
8808 Bool
dis_AdvSIMD_EXT(/*MB_OUT*/DisResult
* dres
, UInt insn
)
8810 /* 31 29 23 21 20 15 14 10 9 4
8811 0 q 101110 op2 0 m 0 imm4 0 n d
8814 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8815 if (INSN(31,31) != 0
8816 || INSN(29,24) != BITS6(1,0,1,1,1,0)
8817 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) {
8820 UInt bitQ
= INSN(30,30);
8821 UInt op2
= INSN(23,22);
8822 UInt mm
= INSN(20,16);
8823 UInt imm4
= INSN(14,11);
8824 UInt nn
= INSN(9,5);
8825 UInt dd
= INSN(4,0);
8827 if (op2
== BITS2(0,0)) {
8828 /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */
8829 IRTemp sHi
= newTempV128();
8830 IRTemp sLo
= newTempV128();
8831 IRTemp res
= newTempV128();
8832 assign(sHi
, getQReg128(mm
));
8833 assign(sLo
, getQReg128(nn
));
8836 assign(res
, mkexpr(sLo
));
8838 vassert(imm4
>= 1 && imm4
<= 15);
8839 assign(res
, triop(Iop_SliceV128
,
8840 mkexpr(sHi
), mkexpr(sLo
), mkU8(imm4
)));
8842 putQReg128(dd
, mkexpr(res
));
8843 DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd
, nn
, mm
, imm4
);
8845 if (imm4
>= 8) return False
;
8847 assign(res
, mkexpr(sLo
));
8849 vassert(imm4
>= 1 && imm4
<= 7);
8850 IRTemp hi64lo64
= newTempV128();
8851 assign(hi64lo64
, binop(Iop_InterleaveLO64x2
,
8852 mkexpr(sHi
), mkexpr(sLo
)));
8853 assign(res
, triop(Iop_SliceV128
,
8854 mkexpr(hi64lo64
), mkexpr(hi64lo64
), mkU8(imm4
)));
8856 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
8857 DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd
, nn
, mm
, imm4
);
8868 Bool
dis_AdvSIMD_TBL_TBX(/*MB_OUT*/DisResult
* dres
, UInt insn
)
8870 /* 31 29 23 21 20 15 14 12 11 9 4
8871 0 q 001110 op2 0 m 0 len op 00 n d
8872 Decode fields: op2,len,op
8874 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8875 if (INSN(31,31) != 0
8876 || INSN(29,24) != BITS6(0,0,1,1,1,0)
8879 || INSN(11,10) != BITS2(0,0)) {
8882 UInt bitQ
= INSN(30,30);
8883 UInt op2
= INSN(23,22);
8884 UInt mm
= INSN(20,16);
8885 UInt len
= INSN(14,13);
8886 UInt bitOP
= INSN(12,12);
8887 UInt nn
= INSN(9,5);
8888 UInt dd
= INSN(4,0);
8891 /* -------- 00,xx,0 TBL, xx register table -------- */
8892 /* -------- 00,xx,1 TBX, xx register table -------- */
8893 /* 31 28 20 15 14 12 9 4
8894 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8895 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8896 where Ta = 16b(q=1) or 8b(q=0)
8898 Bool isTBX
= bitOP
== 1;
8899 /* The out-of-range values to use. */
8900 IRTemp oor_values
= newTempV128();
8901 assign(oor_values
, isTBX
? getQReg128(dd
) : mkV128(0));
8903 IRTemp src
= newTempV128();
8904 assign(src
, getQReg128(mm
));
8905 /* The table values */
8908 for (i
= 0; i
<= len
; i
++) {
8910 tab
[i
] = newTempV128();
8911 assign(tab
[i
], getQReg128((nn
+ i
) % 32));
8913 IRTemp res
= math_TBL_TBX(tab
, len
, src
, oor_values
);
8914 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
8915 const HChar
* Ta
= bitQ
==1 ? "16b" : "8b";
8916 const HChar
* nm
= isTBX
? "tbx" : "tbl";
8917 DIP("%s %s.%s, {v%u.16b .. v%u.16b}, %s.%s\n",
8918 nm
, nameQReg128(dd
), Ta
, nn
, (nn
+ len
) % 32, nameQReg128(mm
), Ta
);
8922 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8929 Bool
dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult
* dres
, UInt insn
)
8931 /* 31 29 23 21 20 15 14 11 9 4
8932 0 q 001110 size 0 m 0 opcode 10 n d
8933 Decode fields: opcode
8935 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8936 if (INSN(31,31) != 0
8937 || INSN(29,24) != BITS6(0,0,1,1,1,0)
8938 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(11,10) != BITS2(1,0)) {
8941 UInt bitQ
= INSN(30,30);
8942 UInt size
= INSN(23,22);
8943 UInt mm
= INSN(20,16);
8944 UInt opcode
= INSN(14,12);
8945 UInt nn
= INSN(9,5);
8946 UInt dd
= INSN(4,0);
8948 if (opcode
== BITS3(0,0,1) || opcode
== BITS3(1,0,1)) {
8949 /* -------- 001 UZP1 std7_std7_std7 -------- */
8950 /* -------- 101 UZP2 std7_std7_std7 -------- */
8951 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
8952 Bool isUZP1
= opcode
== BITS3(0,0,1);
8953 IROp op
= isUZP1
? mkVecCATEVENLANES(size
)
8954 : mkVecCATODDLANES(size
);
8955 IRTemp preL
= newTempV128();
8956 IRTemp preR
= newTempV128();
8957 IRTemp res
= newTempV128();
8959 assign(preL
, binop(Iop_InterleaveLO64x2
, getQReg128(mm
),
8961 assign(preR
, mkexpr(preL
));
8963 assign(preL
, getQReg128(mm
));
8964 assign(preR
, getQReg128(nn
));
8966 assign(res
, binop(op
, mkexpr(preL
), mkexpr(preR
)));
8967 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
8968 const HChar
* nm
= isUZP1
? "uzp1" : "uzp2";
8969 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
8970 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
8971 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
8975 if (opcode
== BITS3(0,1,0) || opcode
== BITS3(1,1,0)) {
8976 /* -------- 010 TRN1 std7_std7_std7 -------- */
8977 /* -------- 110 TRN2 std7_std7_std7 -------- */
8978 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
8979 Bool isTRN1
= opcode
== BITS3(0,1,0);
8980 IROp op1
= isTRN1
? mkVecCATEVENLANES(size
)
8981 : mkVecCATODDLANES(size
);
8982 IROp op2
= mkVecINTERLEAVEHI(size
);
8983 IRTemp srcM
= newTempV128();
8984 IRTemp srcN
= newTempV128();
8985 IRTemp res
= newTempV128();
8986 assign(srcM
, getQReg128(mm
));
8987 assign(srcN
, getQReg128(nn
));
8988 assign(res
, binop(op2
, binop(op1
, mkexpr(srcM
), mkexpr(srcM
)),
8989 binop(op1
, mkexpr(srcN
), mkexpr(srcN
))));
8990 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
8991 const HChar
* nm
= isTRN1
? "trn1" : "trn2";
8992 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
8993 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
8994 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
8998 if (opcode
== BITS3(0,1,1) || opcode
== BITS3(1,1,1)) {
8999 /* -------- 011 ZIP1 std7_std7_std7 -------- */
9000 /* -------- 111 ZIP2 std7_std7_std7 -------- */
9001 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
9002 Bool isZIP1
= opcode
== BITS3(0,1,1);
9003 IROp op
= isZIP1
? mkVecINTERLEAVELO(size
)
9004 : mkVecINTERLEAVEHI(size
);
9005 IRTemp preL
= newTempV128();
9006 IRTemp preR
= newTempV128();
9007 IRTemp res
= newTempV128();
9008 if (bitQ
== 0 && !isZIP1
) {
9009 IRTemp z128
= newTempV128();
9010 assign(z128
, mkV128(0x0000));
9011 // preL = Vm shifted left 32 bits
9012 // preR = Vn shifted left 32 bits
9013 assign(preL
, triop(Iop_SliceV128
,
9014 getQReg128(mm
), mkexpr(z128
), mkU8(12)));
9015 assign(preR
, triop(Iop_SliceV128
,
9016 getQReg128(nn
), mkexpr(z128
), mkU8(12)));
9019 assign(preL
, getQReg128(mm
));
9020 assign(preR
, getQReg128(nn
));
9022 assign(res
, binop(op
, mkexpr(preL
), mkexpr(preR
)));
9023 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
9024 const HChar
* nm
= isZIP1
? "zip1" : "zip2";
9025 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
9026 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
9027 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
9037 Bool
dis_AdvSIMD_across_lanes(/*MB_OUT*/DisResult
* dres
, UInt insn
)
9039 /* 31 28 23 21 16 11 9 4
9040 0 q u 01110 size 11000 opcode 10 n d
9041 Decode fields: u,size,opcode
9043 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9044 if (INSN(31,31) != 0
9045 || INSN(28,24) != BITS5(0,1,1,1,0)
9046 || INSN(21,17) != BITS5(1,1,0,0,0) || INSN(11,10) != BITS2(1,0)) {
9049 UInt bitQ
= INSN(30,30);
9050 UInt bitU
= INSN(29,29);
9051 UInt size
= INSN(23,22);
9052 UInt opcode
= INSN(16,12);
9053 UInt nn
= INSN(9,5);
9054 UInt dd
= INSN(4,0);
9056 if (opcode
== BITS5(0,0,0,1,1)) {
9057 /* -------- 0,xx,00011 SADDLV -------- */
9058 /* -------- 1,xx,00011 UADDLV -------- */
9059 /* size is the narrow size */
9060 if (size
== X11
|| (size
== X10
&& bitQ
== 0)) return False
;
9061 Bool isU
= bitU
== 1;
9062 IRTemp src
= newTempV128();
9063 assign(src
, getQReg128(nn
));
9064 /* The basic plan is to widen the lower half, and if Q = 1,
9065 the upper half too. Add them together (if Q = 1), and in
9066 either case fold with add at twice the lane width.
9069 = mkexpr(math_WIDEN_LO_OR_HI_LANES(
9070 isU
, False
/*!fromUpperHalf*/, size
, mkexpr(src
)));
9073 = binop(mkVecADD(size
+1),
9075 mkexpr(math_WIDEN_LO_OR_HI_LANES(
9076 isU
, True
/*fromUpperHalf*/, size
, mkexpr(src
)))
9080 IRTemp tWi
= newTempV128();
9081 assign(tWi
, widened
);
9082 IRTemp res
= math_FOLDV(tWi
, mkVecADD(size
+1));
9083 putQReg128(dd
, mkexpr(res
));
9084 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
9085 const HChar ch
= "bhsd"[size
];
9086 DIP("%s %s.%c, %s.%s\n", isU
? "uaddlv" : "saddlv",
9087 nameQReg128(dd
), ch
, nameQReg128(nn
), arr
);
9092 /**/ if (opcode
== BITS5(0,1,0,1,0)) { ix
= bitU
== 0 ? 1 : 2; }
9093 else if (opcode
== BITS5(1,1,0,1,0)) { ix
= bitU
== 0 ? 3 : 4; }
9094 else if (opcode
== BITS5(1,1,0,1,1) && bitU
== 0) { ix
= 5; }
9097 /* -------- 0,xx,01010: SMAXV -------- (1) */
9098 /* -------- 1,xx,01010: UMAXV -------- (2) */
9099 /* -------- 0,xx,11010: SMINV -------- (3) */
9100 /* -------- 1,xx,11010: UMINV -------- (4) */
9101 /* -------- 0,xx,11011: ADDV -------- (5) */
9102 vassert(ix
>= 1 && ix
<= 5);
9103 if (size
== X11
) return False
; // 1d,2d cases not allowed
9104 if (size
== X10
&& bitQ
== 0) return False
; // 2s case not allowed
9105 const IROp opMAXS
[3]
9106 = { Iop_Max8Sx16
, Iop_Max16Sx8
, Iop_Max32Sx4
};
9107 const IROp opMAXU
[3]
9108 = { Iop_Max8Ux16
, Iop_Max16Ux8
, Iop_Max32Ux4
};
9109 const IROp opMINS
[3]
9110 = { Iop_Min8Sx16
, Iop_Min16Sx8
, Iop_Min32Sx4
};
9111 const IROp opMINU
[3]
9112 = { Iop_Min8Ux16
, Iop_Min16Ux8
, Iop_Min32Ux4
};
9114 = { Iop_Add8x16
, Iop_Add16x8
, Iop_Add32x4
};
9116 IROp op
= Iop_INVALID
;
9117 const HChar
* nm
= NULL
;
9119 case 1: op
= opMAXS
[size
]; nm
= "smaxv"; break;
9120 case 2: op
= opMAXU
[size
]; nm
= "umaxv"; break;
9121 case 3: op
= opMINS
[size
]; nm
= "sminv"; break;
9122 case 4: op
= opMINU
[size
]; nm
= "uminv"; break;
9123 case 5: op
= opADD
[size
]; nm
= "addv"; break;
9124 default: vassert(0);
9126 vassert(op
!= Iop_INVALID
&& nm
!= NULL
);
9127 IRTemp tN1
= newTempV128();
9128 assign(tN1
, getQReg128(nn
));
9129 /* If Q == 0, we're just folding lanes in the lower half of
9130 the value. In which case, copy the lower half of the
9131 source into the upper half, so we can then treat it the
9132 same as the full width case. Except for the addition case,
9133 in which we have to zero out the upper half. */
9134 IRTemp tN2
= newTempV128();
9135 assign(tN2
, bitQ
== 0
9136 ? (ix
== 5 ? unop(Iop_ZeroHI64ofV128
, mkexpr(tN1
))
9137 : mk_CatEvenLanes64x2(tN1
,tN1
))
9139 IRTemp res
= math_FOLDV(tN2
, op
);
9140 if (res
== IRTemp_INVALID
)
9141 return False
; /* means math_FOLDV
9142 doesn't handle this case yet */
9143 putQReg128(dd
, mkexpr(res
));
9144 const IRType tys
[3] = { Ity_I8
, Ity_I16
, Ity_I32
};
9145 IRType laneTy
= tys
[size
];
9146 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
9147 DIP("%s %s, %s.%s\n", nm
,
9148 nameQRegLO(dd
, laneTy
), nameQReg128(nn
), arr
);
9152 if ((size
== X00
|| size
== X10
)
9153 && (opcode
== BITS5(0,1,1,0,0) || opcode
== BITS5(0,1,1,1,1))) {
9154 /* -------- 0,00,01100: FMAXMNV s_4s -------- */
9155 /* -------- 0,10,01100: FMINMNV s_4s -------- */
9156 /* -------- 1,00,01111: FMAXV s_4s -------- */
9157 /* -------- 1,10,01111: FMINV s_4s -------- */
9158 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
9159 if (bitQ
== 0) return False
; // Only 4s is allowed
9160 Bool isMIN
= (size
& 2) == 2;
9161 Bool isNM
= opcode
== BITS5(0,1,1,0,0);
9162 IROp opMXX
= (isMIN
? mkVecMINF
: mkVecMAXF
)(2);
9163 IRTemp src
= newTempV128();
9164 assign(src
, getQReg128(nn
));
9165 IRTemp res
= math_FOLDV(src
, opMXX
);
9166 putQReg128(dd
, mkexpr(res
));
9167 DIP("%s%sv s%u, %u.4s\n",
9168 isMIN
? "fmin" : "fmax", isNM
? "nm" : "", dd
, nn
);
9172 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9179 Bool
dis_AdvSIMD_copy(/*MB_OUT*/DisResult
* dres
, UInt insn
)
9181 /* 31 28 20 15 14 10 9 4
9182 0 q op 01110000 imm5 0 imm4 1 n d
9183 Decode fields: q,op,imm4
9185 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9186 if (INSN(31,31) != 0
9187 || INSN(28,21) != BITS8(0,1,1,1,0,0,0,0)
9188 || INSN(15,15) != 0 || INSN(10,10) != 1) {
9191 UInt bitQ
= INSN(30,30);
9192 UInt bitOP
= INSN(29,29);
9193 UInt imm5
= INSN(20,16);
9194 UInt imm4
= INSN(14,11);
9195 UInt nn
= INSN(9,5);
9196 UInt dd
= INSN(4,0);
9198 /* -------- x,0,0000: DUP (element, vector) -------- */
9200 0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index]
9202 if (bitOP
== 0 && imm4
== BITS4(0,0,0,0)) {
9206 IRTemp res
= handle_DUP_VEC_ELEM(&laneNo
, &laneSzLg2
, &laneCh
,
9207 getQReg128(nn
), imm5
);
9208 if (res
== IRTemp_INVALID
)
9210 if (bitQ
== 0 && laneSzLg2
== X11
)
9211 return False
; /* .1d case */
9212 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
9213 const HChar
* arT
= nameArr_Q_SZ(bitQ
, laneSzLg2
);
9214 DIP("dup %s.%s, %s.%c[%u]\n",
9215 nameQReg128(dd
), arT
, nameQReg128(nn
), laneCh
, laneNo
);
9219 /* -------- x,0,0001: DUP (general, vector) -------- */
9221 0q0 01110000 imm5 0 0001 1 n d DUP Vd.T, Rn
9222 Q=0 writes 64, Q=1 writes 128
9223 imm5: xxxx1 8B(q=0) or 16b(q=1), R=W
9224 xxx10 4H(q=0) or 8H(q=1), R=W
9225 xx100 2S(q=0) or 4S(q=1), R=W
9226 x1000 Invalid(q=0) or 2D(q=1), R=X
9227 x0000 Invalid(q=0) or Invalid(q=1)
9228 Require op=0, imm4=0001
9230 if (bitOP
== 0 && imm4
== BITS4(0,0,0,1)) {
9231 Bool isQ
= bitQ
== 1;
9232 IRTemp w0
= newTemp(Ity_I64
);
9233 const HChar
* arT
= "??";
9234 IRType laneTy
= Ity_INVALID
;
9236 arT
= isQ
? "16b" : "8b";
9238 assign(w0
, unop(Iop_8Uto64
, unop(Iop_64to8
, getIReg64orZR(nn
))));
9240 else if (imm5
& 2) {
9241 arT
= isQ
? "8h" : "4h";
9243 assign(w0
, unop(Iop_16Uto64
, unop(Iop_64to16
, getIReg64orZR(nn
))));
9245 else if (imm5
& 4) {
9246 arT
= isQ
? "4s" : "2s";
9248 assign(w0
, unop(Iop_32Uto64
, unop(Iop_64to32
, getIReg64orZR(nn
))));
9250 else if ((imm5
& 8) && isQ
) {
9253 assign(w0
, getIReg64orZR(nn
));
9256 /* invalid; leave laneTy unchanged. */
9259 if (laneTy
!= Ity_INVALID
) {
9260 IRTemp w1
= math_DUP_TO_64(w0
, laneTy
);
9261 putQReg128(dd
, binop(Iop_64HLtoV128
,
9262 isQ
? mkexpr(w1
) : mkU64(0), mkexpr(w1
)));
9263 DIP("dup %s.%s, %s\n",
9264 nameQReg128(dd
), arT
, nameIRegOrZR(laneTy
== Ity_I64
, nn
));
9271 /* -------- 1,0,0011: INS (general) -------- */
9273 010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn
9274 where Ts,ix = case imm5 of xxxx1 -> B, xxxx
9279 if (bitQ
== 1 && bitOP
== 0 && imm4
== BITS4(0,0,1,1)) {
9284 src
= unop(Iop_64to8
, getIReg64orZR(nn
));
9285 laneNo
= (imm5
>> 1) & 15;
9288 else if (imm5
& 2) {
9289 src
= unop(Iop_64to16
, getIReg64orZR(nn
));
9290 laneNo
= (imm5
>> 2) & 7;
9293 else if (imm5
& 4) {
9294 src
= unop(Iop_64to32
, getIReg64orZR(nn
));
9295 laneNo
= (imm5
>> 3) & 3;
9298 else if (imm5
& 8) {
9299 src
= getIReg64orZR(nn
);
9300 laneNo
= (imm5
>> 4) & 1;
9305 vassert(laneNo
< 16);
9306 putQRegLane(dd
, laneNo
, src
);
9307 DIP("ins %s.%c[%u], %s\n",
9308 nameQReg128(dd
), ts
, laneNo
, nameIReg64orZR(nn
));
9315 /* -------- x,0,0101: SMOV -------- */
9316 /* -------- x,0,0111: UMOV -------- */
9318 0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index]
9319 0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index]
9320 dest is Xd when q==1, Wd when q==0
9322 Ts,index,ops = case q:imm5 of
9323 0:xxxx1 -> B, xxxx, 8Uto64
9325 0:xxx10 -> H, xxx, 16Uto64
9327 0:xx100 -> S, xx, 32Uto64
9329 1:x1000 -> D, x, copy64
9332 Ts,index,ops = case q:imm5 of
9333 0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
9334 1:xxxx1 -> B, xxxx, 8Sto64
9335 0:xxx10 -> H, xxx, (32Uto64 . 16Sto32)
9336 1:xxx10 -> H, xxx, 16Sto64
9338 1:xx100 -> S, xx, 32Sto64
9342 if (bitOP
== 0 && (imm4
== BITS4(0,1,0,1) || imm4
== BITS4(0,1,1,1))) {
9343 Bool isU
= (imm4
& 2) == 2;
9344 const HChar
* arTs
= "??";
9345 UInt laneNo
= 16; /* invalid */
9346 // Setting 'res' to non-NULL determines valid/invalid
9348 if (!bitQ
&& (imm5
& 1)) { // 0:xxxx1
9349 laneNo
= (imm5
>> 1) & 15;
9350 IRExpr
* lane
= getQRegLane(nn
, laneNo
, Ity_I8
);
9351 res
= isU
? unop(Iop_8Uto64
, lane
)
9352 : unop(Iop_32Uto64
, unop(Iop_8Sto32
, lane
));
9355 else if (bitQ
&& (imm5
& 1)) { // 1:xxxx1
9356 laneNo
= (imm5
>> 1) & 15;
9357 IRExpr
* lane
= getQRegLane(nn
, laneNo
, Ity_I8
);
9359 : unop(Iop_8Sto64
, lane
);
9362 else if (!bitQ
&& (imm5
& 2)) { // 0:xxx10
9363 laneNo
= (imm5
>> 2) & 7;
9364 IRExpr
* lane
= getQRegLane(nn
, laneNo
, Ity_I16
);
9365 res
= isU
? unop(Iop_16Uto64
, lane
)
9366 : unop(Iop_32Uto64
, unop(Iop_16Sto32
, lane
));
9369 else if (bitQ
&& (imm5
& 2)) { // 1:xxx10
9370 laneNo
= (imm5
>> 2) & 7;
9371 IRExpr
* lane
= getQRegLane(nn
, laneNo
, Ity_I16
);
9373 : unop(Iop_16Sto64
, lane
);
9376 else if (!bitQ
&& (imm5
& 4)) { // 0:xx100
9377 laneNo
= (imm5
>> 3) & 3;
9378 IRExpr
* lane
= getQRegLane(nn
, laneNo
, Ity_I32
);
9379 res
= isU
? unop(Iop_32Uto64
, lane
)
9383 else if (bitQ
&& (imm5
& 4)) { // 1:xxx10
9384 laneNo
= (imm5
>> 3) & 3;
9385 IRExpr
* lane
= getQRegLane(nn
, laneNo
, Ity_I32
);
9387 : unop(Iop_32Sto64
, lane
);
9390 else if (bitQ
&& (imm5
& 8)) { // 1:x1000
9391 laneNo
= (imm5
>> 4) & 1;
9392 IRExpr
* lane
= getQRegLane(nn
, laneNo
, Ity_I64
);
9399 vassert(laneNo
< 16);
9400 putIReg64orZR(dd
, res
);
9401 DIP("%cmov %s, %s.%s[%u]\n", isU
? 'u' : 's',
9402 nameIRegOrZR(bitQ
== 1, dd
),
9403 nameQReg128(nn
), arTs
, laneNo
);
9410 /* -------- 1,1,xxxx: INS (element) -------- */
9412 011 01110000 imm5 0 imm4 n d INS Vd.Ts[ix1], Vn.Ts[ix2]
9414 = case imm5 of xxxx1 -> B, xxxx, imm4[3:0]
9415 xxx10 -> H, xxx, imm4[3:1]
9416 xx100 -> S, xx, imm4[3:2]
9417 x1000 -> D, x, imm4[3:3]
9419 if (bitQ
== 1 && bitOP
== 1) {
9421 IRType ity
= Ity_INVALID
;
9427 ix1
= (imm5
>> 1) & 15;
9428 ix2
= (imm4
>> 0) & 15;
9430 else if (imm5
& 2) {
9433 ix1
= (imm5
>> 2) & 7;
9434 ix2
= (imm4
>> 1) & 7;
9436 else if (imm5
& 4) {
9439 ix1
= (imm5
>> 3) & 3;
9440 ix2
= (imm4
>> 2) & 3;
9442 else if (imm5
& 8) {
9445 ix1
= (imm5
>> 4) & 1;
9446 ix2
= (imm4
>> 3) & 1;
9449 if (ity
!= Ity_INVALID
) {
9452 putQRegLane(dd
, ix1
, getQRegLane(nn
, ix2
, ity
));
9453 DIP("ins %s.%c[%u], %s.%c[%u]\n",
9454 nameQReg128(dd
), ts
, ix1
, nameQReg128(nn
), ts
, ix2
);
9467 Bool
dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult
* dres
, UInt insn
)
9469 /* 31 28 18 15 11 9 4
9470 0q op 01111 00000 abc cmode 01 defgh d
9471 Decode fields: q,op,cmode
9472 Bit 11 is really "o2", but it is always zero.
9474 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9475 if (INSN(31,31) != 0
9476 || INSN(28,19) != BITS10(0,1,1,1,1,0,0,0,0,0)
9477 || INSN(11,10) != BITS2(0,1)) {
9480 UInt bitQ
= INSN(30,30);
9481 UInt bitOP
= INSN(29,29);
9482 UInt cmode
= INSN(15,12);
9483 UInt abcdefgh
= (INSN(18,16) << 5) | INSN(9,5);
9484 UInt dd
= INSN(4,0);
9487 UInt op_cmode
= (bitOP
<< 4) | cmode
;
9493 Bool isFMOV
= False
;
9495 /* -------- x,0,0000 MOVI 32-bit shifted imm -------- */
9496 /* -------- x,0,0010 MOVI 32-bit shifted imm -------- */
9497 /* -------- x,0,0100 MOVI 32-bit shifted imm -------- */
9498 /* -------- x,0,0110 MOVI 32-bit shifted imm -------- */
9499 case BITS5(0,0,0,0,0): case BITS5(0,0,0,1,0):
9500 case BITS5(0,0,1,0,0): case BITS5(0,0,1,1,0): // 0:0xx0
9501 ok
= True
; isMOV
= True
; break;
9503 /* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */
9504 /* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */
9505 /* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */
9506 /* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */
9507 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1):
9508 case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1
9509 ok
= True
; isORR
= True
; break;
9511 /* -------- x,0,1000 MOVI 16-bit shifted imm -------- */
9512 /* -------- x,0,1010 MOVI 16-bit shifted imm -------- */
9513 case BITS5(0,1,0,0,0): case BITS5(0,1,0,1,0): // 0:10x0
9514 ok
= True
; isMOV
= True
; break;
9516 /* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */
9517 /* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */
9518 case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1
9519 ok
= True
; isORR
= True
; break;
9521 /* -------- x,0,1100 MOVI 32-bit shifting ones -------- */
9522 /* -------- x,0,1101 MOVI 32-bit shifting ones -------- */
9523 case BITS5(0,1,1,0,0): case BITS5(0,1,1,0,1): // 0:110x
9524 ok
= True
; isMOV
= True
; break;
9526 /* -------- x,0,1110 MOVI 8-bit -------- */
9527 case BITS5(0,1,1,1,0):
9528 ok
= True
; isMOV
= True
; break;
9530 /* -------- x,0,1111 FMOV (vector, immediate, F32) -------- */
9531 case BITS5(0,1,1,1,1): // 0:1111
9532 ok
= True
; isFMOV
= True
; break;
9534 /* -------- x,1,0000 MVNI 32-bit shifted imm -------- */
9535 /* -------- x,1,0010 MVNI 32-bit shifted imm -------- */
9536 /* -------- x,1,0100 MVNI 32-bit shifted imm -------- */
9537 /* -------- x,1,0110 MVNI 32-bit shifted imm -------- */
9538 case BITS5(1,0,0,0,0): case BITS5(1,0,0,1,0):
9539 case BITS5(1,0,1,0,0): case BITS5(1,0,1,1,0): // 1:0xx0
9540 ok
= True
; isMVN
= True
; break;
9542 /* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */
9543 /* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */
9544 /* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */
9545 /* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */
9546 case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1):
9547 case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1
9548 ok
= True
; isBIC
= True
; break;
9550 /* -------- x,1,1000 MVNI 16-bit shifted imm -------- */
9551 /* -------- x,1,1010 MVNI 16-bit shifted imm -------- */
9552 case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
9553 ok
= True
; isMVN
= True
; break;
9555 /* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */
9556 /* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */
9557 case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1
9558 ok
= True
; isBIC
= True
; break;
9560 /* -------- x,1,1100 MVNI 32-bit shifting ones -------- */
9561 /* -------- x,1,1101 MVNI 32-bit shifting ones -------- */
9562 case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
9563 ok
= True
; isMVN
= True
; break;
9565 /* -------- 0,1,1110 MOVI 64-bit scalar -------- */
9566 /* -------- 1,1,1110 MOVI 64-bit vector -------- */
9567 case BITS5(1,1,1,1,0):
9568 ok
= True
; isMOV
= True
; break;
9570 /* -------- 1,1,1111 FMOV (vector, immediate, F64) -------- */
9571 case BITS5(1,1,1,1,1): // 1:1111
9572 ok
= bitQ
== 1; isFMOV
= True
; break;
9578 vassert(1 == (isMOV
? 1 : 0) + (isMVN
? 1 : 0)
9579 + (isORR
? 1 : 0) + (isBIC
? 1 : 0) + (isFMOV
? 1 : 0));
9580 ok
= AdvSIMDExpandImm(&imm64lo
, bitOP
, cmode
, abcdefgh
);
9583 if (isORR
|| isBIC
) {
9585 = isORR
? 0ULL : ~0ULL;
9587 = binop(Iop_64HLtoV128
, mkU64(inv
^ imm64lo
), mkU64(inv
^ imm64lo
));
9589 = binop(isORR
? Iop_OrV128
: Iop_AndV128
, getQReg128(dd
), immV128
);
9590 const HChar
* nm
= isORR
? "orr" : "bic";
9592 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, res
));
9593 DIP("%s %s.1d, %016llx\n", nm
, nameQReg128(dd
), imm64lo
);
9595 putQReg128(dd
, res
);
9596 DIP("%s %s.2d, #0x%016llx'%016llx\n", nm
,
9597 nameQReg128(dd
), imm64lo
, imm64lo
);
9600 else if (isMOV
|| isMVN
|| isFMOV
) {
9601 if (isMVN
) imm64lo
= ~imm64lo
;
9602 ULong imm64hi
= bitQ
== 0 ? 0 : imm64lo
;
9603 IRExpr
* immV128
= binop(Iop_64HLtoV128
, mkU64(imm64hi
),
9605 putQReg128(dd
, immV128
);
9606 DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd
), imm64hi
, imm64lo
);
9610 /* else fall through */
9618 Bool
dis_AdvSIMD_scalar_copy(/*MB_OUT*/DisResult
* dres
, UInt insn
)
9620 /* 31 28 20 15 14 10 9 4
9621 01 op 11110000 imm5 0 imm4 1 n d
9622 Decode fields: op,imm4
9624 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9625 if (INSN(31,30) != BITS2(0,1)
9626 || INSN(28,21) != BITS8(1,1,1,1,0,0,0,0)
9627 || INSN(15,15) != 0 || INSN(10,10) != 1) {
9630 UInt bitOP
= INSN(29,29);
9631 UInt imm5
= INSN(20,16);
9632 UInt imm4
= INSN(14,11);
9633 UInt nn
= INSN(9,5);
9634 UInt dd
= INSN(4,0);
9636 if (bitOP
== 0 && imm4
== BITS4(0,0,0,0)) {
9637 /* -------- 0,0000 DUP (element, scalar) -------- */
9638 IRTemp w0
= newTemp(Ity_I64
);
9639 const HChar
* arTs
= "??";
9640 IRType laneTy
= Ity_INVALID
;
9641 UInt laneNo
= 16; /* invalid */
9644 laneNo
= (imm5
>> 1) & 15;
9646 assign(w0
, unop(Iop_8Uto64
, getQRegLane(nn
, laneNo
, laneTy
)));
9648 else if (imm5
& 2) {
9650 laneNo
= (imm5
>> 2) & 7;
9652 assign(w0
, unop(Iop_16Uto64
, getQRegLane(nn
, laneNo
, laneTy
)));
9654 else if (imm5
& 4) {
9656 laneNo
= (imm5
>> 3) & 3;
9658 assign(w0
, unop(Iop_32Uto64
, getQRegLane(nn
, laneNo
, laneTy
)));
9660 else if (imm5
& 8) {
9662 laneNo
= (imm5
>> 4) & 1;
9664 assign(w0
, getQRegLane(nn
, laneNo
, laneTy
));
9667 /* invalid; leave laneTy unchanged. */
9670 if (laneTy
!= Ity_INVALID
) {
9671 vassert(laneNo
< 16);
9672 putQReg128(dd
, binop(Iop_64HLtoV128
, mkU64(0), mkexpr(w0
)));
9673 DIP("dup %s, %s.%s[%u]\n",
9674 nameQRegLO(dd
, laneTy
), nameQReg128(nn
), arTs
, laneNo
);
9677 /* else fall through */
9686 Bool
dis_AdvSIMD_scalar_pairwise(/*MB_OUT*/DisResult
* dres
, UInt insn
)
9688 /* 31 28 23 21 16 11 9 4
9689 01 u 11110 sz 11000 opcode 10 n d
9690 Decode fields: u,sz,opcode
9692 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9693 if (INSN(31,30) != BITS2(0,1)
9694 || INSN(28,24) != BITS5(1,1,1,1,0)
9695 || INSN(21,17) != BITS5(1,1,0,0,0)
9696 || INSN(11,10) != BITS2(1,0)) {
9699 UInt bitU
= INSN(29,29);
9700 UInt sz
= INSN(23,22);
9701 UInt opcode
= INSN(16,12);
9702 UInt nn
= INSN(9,5);
9703 UInt dd
= INSN(4,0);
9705 if (bitU
== 0 && sz
== X11
&& opcode
== BITS5(1,1,0,1,1)) {
9706 /* -------- 0,11,11011 ADDP d_2d -------- */
9707 IRTemp xy
= newTempV128();
9708 IRTemp xx
= newTempV128();
9709 assign(xy
, getQReg128(nn
));
9710 assign(xx
, binop(Iop_InterleaveHI64x2
, mkexpr(xy
), mkexpr(xy
)));
9711 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
,
9712 binop(Iop_Add64x2
, mkexpr(xy
), mkexpr(xx
))));
9713 DIP("addp d%u, %s.2d\n", dd
, nameQReg128(nn
));
9717 if (bitU
== 1 && sz
<= X01
&& opcode
== BITS5(0,1,1,0,1)) {
9718 /* -------- 1,00,01101 ADDP s_2s -------- */
9719 /* -------- 1,01,01101 ADDP d_2d -------- */
9720 Bool isD
= sz
== X01
;
9721 IROp opZHI
= mkVecZEROHIxxOFV128(isD
? 3 : 2);
9722 IROp opADD
= mkVecADDF(isD
? 3 : 2);
9723 IRTemp src
= newTempV128();
9724 IRTemp argL
= newTempV128();
9725 IRTemp argR
= newTempV128();
9726 assign(src
, getQReg128(nn
));
9727 assign(argL
, unop(opZHI
, mkexpr(src
)));
9728 assign(argR
, unop(opZHI
, triop(Iop_SliceV128
, mkexpr(src
), mkexpr(src
),
9729 mkU8(isD
? 8 : 4))));
9730 putQReg128(dd
, unop(opZHI
,
9731 triop(opADD
, mkexpr(mk_get_IR_rounding_mode()),
9732 mkexpr(argL
), mkexpr(argR
))));
9733 DIP(isD
? "faddp d%u, v%u.2d\n" : "faddp s%u, v%u.2s\n", dd
, nn
);
9738 && (opcode
== BITS5(0,1,1,0,0) || opcode
== BITS5(0,1,1,1,1))) {
9739 /* -------- 1,0x,01100 FMAXNMP d_2d, s_2s -------- */
9740 /* -------- 1,1x,01100 FMINNMP d_2d, s_2s -------- */
9741 /* -------- 1,0x,01111 FMAXP d_2d, s_2s -------- */
9742 /* -------- 1,1x,01111 FMINP d_2d, s_2s -------- */
9743 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
9744 Bool isD
= (sz
& 1) == 1;
9745 Bool isMIN
= (sz
& 2) == 2;
9746 Bool isNM
= opcode
== BITS5(0,1,1,0,0);
9747 IROp opZHI
= mkVecZEROHIxxOFV128(isD
? 3 : 2);
9748 IROp opMXX
= (isMIN
? mkVecMINF
: mkVecMAXF
)(isD
? 3 : 2);
9749 IRTemp src
= newTempV128();
9750 IRTemp argL
= newTempV128();
9751 IRTemp argR
= newTempV128();
9752 assign(src
, getQReg128(nn
));
9753 assign(argL
, unop(opZHI
, mkexpr(src
)));
9754 assign(argR
, unop(opZHI
, triop(Iop_SliceV128
, mkexpr(src
), mkexpr(src
),
9755 mkU8(isD
? 8 : 4))));
9756 putQReg128(dd
, unop(opZHI
,
9757 binop(opMXX
, mkexpr(argL
), mkexpr(argR
))));
9758 HChar c
= isD
? 'd' : 's';
9759 DIP("%s%sp %c%u, v%u.2%c\n",
9760 isMIN
? "fmin" : "fmax", isNM
? "nm" : "", c
, dd
, nn
, c
);
9770 Bool
dis_AdvSIMD_scalar_shift_by_imm(/*MB_OUT*/DisResult
* dres
, UInt insn
)
9772 /* 31 28 22 18 15 10 9 4
9773 01 u 111110 immh immb opcode 1 n d
9774 Decode fields: u,immh,opcode
9776 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9777 if (INSN(31,30) != BITS2(0,1)
9778 || INSN(28,23) != BITS6(1,1,1,1,1,0) || INSN(10,10) != 1) {
9781 UInt bitU
= INSN(29,29);
9782 UInt immh
= INSN(22,19);
9783 UInt immb
= INSN(18,16);
9784 UInt opcode
= INSN(15,11);
9785 UInt nn
= INSN(9,5);
9786 UInt dd
= INSN(4,0);
9787 UInt immhb
= (immh
<< 3) | immb
;
9790 && (opcode
== BITS5(0,0,0,0,0) || opcode
== BITS5(0,0,0,1,0))) {
9791 /* -------- 0,1xxx,00000 SSHR d_d_#imm -------- */
9792 /* -------- 1,1xxx,00000 USHR d_d_#imm -------- */
9793 /* -------- 0,1xxx,00010 SSRA d_d_#imm -------- */
9794 /* -------- 1,1xxx,00010 USRA d_d_#imm -------- */
9795 Bool isU
= bitU
== 1;
9796 Bool isAcc
= opcode
== BITS5(0,0,0,1,0);
9797 UInt sh
= 128 - immhb
;
9798 vassert(sh
>= 1 && sh
<= 64);
9799 IROp op
= isU
? Iop_ShrN64x2
: Iop_SarN64x2
;
9800 IRExpr
* src
= getQReg128(nn
);
9801 IRTemp shf
= newTempV128();
9802 IRTemp res
= newTempV128();
9803 if (sh
== 64 && isU
) {
9804 assign(shf
, mkV128(0x0000));
9811 assign(shf
, binop(op
, src
, mkU8(sh
- nudge
)));
9813 assign(res
, isAcc
? binop(Iop_Add64x2
, getQReg128(dd
), mkexpr(shf
))
9815 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
9816 const HChar
* nm
= isAcc
? (isU
? "usra" : "ssra")
9817 : (isU
? "ushr" : "sshr");
9818 DIP("%s d%u, d%u, #%u\n", nm
, dd
, nn
, sh
);
9823 && (opcode
== BITS5(0,0,1,0,0) || opcode
== BITS5(0,0,1,1,0))) {
9824 /* -------- 0,1xxx,00100 SRSHR d_d_#imm -------- */
9825 /* -------- 1,1xxx,00100 URSHR d_d_#imm -------- */
9826 /* -------- 0,1xxx,00110 SRSRA d_d_#imm -------- */
9827 /* -------- 1,1xxx,00110 URSRA d_d_#imm -------- */
9828 Bool isU
= bitU
== 1;
9829 Bool isAcc
= opcode
== BITS5(0,0,1,1,0);
9830 UInt sh
= 128 - immhb
;
9831 vassert(sh
>= 1 && sh
<= 64);
9832 IROp op
= isU
? Iop_Rsh64Ux2
: Iop_Rsh64Sx2
;
9833 vassert(sh
>= 1 && sh
<= 64);
9834 IRExpr
* src
= getQReg128(nn
);
9835 IRTemp imm8
= newTemp(Ity_I8
);
9836 assign(imm8
, mkU8((UChar
)(-sh
)));
9837 IRExpr
* amt
= mkexpr(math_DUP_TO_V128(imm8
, Ity_I8
));
9838 IRTemp shf
= newTempV128();
9839 IRTemp res
= newTempV128();
9840 assign(shf
, binop(op
, src
, amt
));
9841 assign(res
, isAcc
? binop(Iop_Add64x2
, getQReg128(dd
), mkexpr(shf
))
9843 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
9844 const HChar
* nm
= isAcc
? (isU
? "ursra" : "srsra")
9845 : (isU
? "urshr" : "srshr");
9846 DIP("%s d%u, d%u, #%u\n", nm
, dd
, nn
, sh
);
9850 if (bitU
== 1 && (immh
& 8) == 8 && opcode
== BITS5(0,1,0,0,0)) {
9851 /* -------- 1,1xxx,01000 SRI d_d_#imm -------- */
9852 UInt sh
= 128 - immhb
;
9853 vassert(sh
>= 1 && sh
<= 64);
9855 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, getQReg128(dd
)));
9857 /* sh is in range 1 .. 63 */
9858 ULong nmask
= (ULong
)(((Long
)0x8000000000000000ULL
) >> (sh
-1));
9859 IRExpr
* nmaskV
= binop(Iop_64HLtoV128
, mkU64(nmask
), mkU64(nmask
));
9860 IRTemp res
= newTempV128();
9861 assign(res
, binop(Iop_OrV128
,
9862 binop(Iop_AndV128
, getQReg128(dd
), nmaskV
),
9863 binop(Iop_ShrN64x2
, getQReg128(nn
), mkU8(sh
))));
9864 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
9866 DIP("sri d%u, d%u, #%u\n", dd
, nn
, sh
);
9870 if (bitU
== 0 && (immh
& 8) == 8 && opcode
== BITS5(0,1,0,1,0)) {
9871 /* -------- 0,1xxx,01010 SHL d_d_#imm -------- */
9872 UInt sh
= immhb
- 64;
9873 vassert(sh
>= 0 && sh
< 64);
9875 unop(Iop_ZeroHI64ofV128
,
9876 sh
== 0 ? getQReg128(nn
)
9877 : binop(Iop_ShlN64x2
, getQReg128(nn
), mkU8(sh
))));
9878 DIP("shl d%u, d%u, #%u\n", dd
, nn
, sh
);
9882 if (bitU
== 1 && (immh
& 8) == 8 && opcode
== BITS5(0,1,0,1,0)) {
9883 /* -------- 1,1xxx,01010 SLI d_d_#imm -------- */
9884 UInt sh
= immhb
- 64;
9885 vassert(sh
>= 0 && sh
< 64);
9887 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, getQReg128(nn
)));
9889 /* sh is in range 1 .. 63 */
9890 ULong nmask
= (1ULL << sh
) - 1;
9891 IRExpr
* nmaskV
= binop(Iop_64HLtoV128
, mkU64(nmask
), mkU64(nmask
));
9892 IRTemp res
= newTempV128();
9893 assign(res
, binop(Iop_OrV128
,
9894 binop(Iop_AndV128
, getQReg128(dd
), nmaskV
),
9895 binop(Iop_ShlN64x2
, getQReg128(nn
), mkU8(sh
))));
9896 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
9898 DIP("sli d%u, d%u, #%u\n", dd
, nn
, sh
);
9902 if (opcode
== BITS5(0,1,1,1,0)
9903 || (bitU
== 1 && opcode
== BITS5(0,1,1,0,0))) {
9904 /* -------- 0,01110 SQSHL #imm -------- */
9905 /* -------- 1,01110 UQSHL #imm -------- */
9906 /* -------- 1,01100 SQSHLU #imm -------- */
9909 Bool ok
= getLaneInfo_IMMH_IMMB(&shift
, &size
, immh
, immb
);
9910 if (!ok
) return False
;
9911 vassert(size
>= 0 && size
<= 3);
9912 /* The shift encoding has opposite sign for the leftwards case.
9913 Adjust shift to compensate. */
9914 UInt lanebits
= 8 << size
;
9915 shift
= lanebits
- shift
;
9916 vassert(shift
>= 0 && shift
< lanebits
);
9917 const HChar
* nm
= NULL
;
9918 /**/ if (bitU
== 0 && opcode
== BITS5(0,1,1,1,0)) nm
= "sqshl";
9919 else if (bitU
== 1 && opcode
== BITS5(0,1,1,1,0)) nm
= "uqshl";
9920 else if (bitU
== 1 && opcode
== BITS5(0,1,1,0,0)) nm
= "sqshlu";
9922 IRTemp qDiff1
= IRTemp_INVALID
;
9923 IRTemp qDiff2
= IRTemp_INVALID
;
9924 IRTemp res
= IRTemp_INVALID
;
9925 IRTemp src
= math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, getQReg128(nn
));
9926 /* This relies on the fact that the zeroed out lanes generate zeroed
9927 result lanes and don't saturate, so there's no point in trimming
9928 the resulting res, qDiff1 or qDiff2 values. */
9929 math_QSHL_IMM(&res
, &qDiff1
, &qDiff2
, src
, size
, shift
, nm
);
9930 putQReg128(dd
, mkexpr(res
));
9931 updateQCFLAGwithDifference(qDiff1
, qDiff2
);
9932 const HChar arr
= "bhsd"[size
];
9933 DIP("%s %c%u, %c%u, #%u\n", nm
, arr
, dd
, arr
, nn
, shift
);
9937 if (opcode
== BITS5(1,0,0,1,0) || opcode
== BITS5(1,0,0,1,1)
9939 && (opcode
== BITS5(1,0,0,0,0) || opcode
== BITS5(1,0,0,0,1)))) {
9940 /* -------- 0,10010 SQSHRN #imm -------- */
9941 /* -------- 1,10010 UQSHRN #imm -------- */
9942 /* -------- 0,10011 SQRSHRN #imm -------- */
9943 /* -------- 1,10011 UQRSHRN #imm -------- */
9944 /* -------- 1,10000 SQSHRUN #imm -------- */
9945 /* -------- 1,10001 SQRSHRUN #imm -------- */
9948 Bool ok
= getLaneInfo_IMMH_IMMB(&shift
, &size
, immh
, immb
);
9949 if (!ok
|| size
== X11
) return False
;
9950 vassert(size
>= X00
&& size
<= X10
);
9951 vassert(shift
>= 1 && shift
<= (8 << size
));
9952 const HChar
* nm
= "??";
9953 IROp op
= Iop_INVALID
;
9954 /* Decide on the name and the operation. */
9955 /**/ if (bitU
== 0 && opcode
== BITS5(1,0,0,1,0)) {
9956 nm
= "sqshrn"; op
= mkVecQANDqsarNNARROWSS(size
);
9958 else if (bitU
== 1 && opcode
== BITS5(1,0,0,1,0)) {
9959 nm
= "uqshrn"; op
= mkVecQANDqshrNNARROWUU(size
);
9961 else if (bitU
== 0 && opcode
== BITS5(1,0,0,1,1)) {
9962 nm
= "sqrshrn"; op
= mkVecQANDqrsarNNARROWSS(size
);
9964 else if (bitU
== 1 && opcode
== BITS5(1,0,0,1,1)) {
9965 nm
= "uqrshrn"; op
= mkVecQANDqrshrNNARROWUU(size
);
9967 else if (bitU
== 1 && opcode
== BITS5(1,0,0,0,0)) {
9968 nm
= "sqshrun"; op
= mkVecQANDqsarNNARROWSU(size
);
9970 else if (bitU
== 1 && opcode
== BITS5(1,0,0,0,1)) {
9971 nm
= "sqrshrun"; op
= mkVecQANDqrsarNNARROWSU(size
);
9974 /* Compute the result (Q, shifted value) pair. */
9975 IRTemp src128
= math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
+1, getQReg128(nn
));
9976 IRTemp pair
= newTempV128();
9977 assign(pair
, binop(op
, mkexpr(src128
), mkU8(shift
)));
9978 /* Update the result reg */
9979 IRTemp res64in128
= newTempV128();
9980 assign(res64in128
, unop(Iop_ZeroHI64ofV128
, mkexpr(pair
)));
9981 putQReg128(dd
, mkexpr(res64in128
));
9982 /* Update the Q flag. */
9983 IRTemp q64q64
= newTempV128();
9984 assign(q64q64
, binop(Iop_InterleaveHI64x2
, mkexpr(pair
), mkexpr(pair
)));
9985 IRTemp z128
= newTempV128();
9986 assign(z128
, mkV128(0x0000));
9987 updateQCFLAGwithDifference(q64q64
, z128
);
9989 const HChar arrNarrow
= "bhsd"[size
];
9990 const HChar arrWide
= "bhsd"[size
+1];
9991 DIP("%s %c%u, %c%u, #%u\n", nm
, arrNarrow
, dd
, arrWide
, nn
, shift
);
9995 if (immh
>= BITS4(0,1,0,0) && opcode
== BITS5(1,1,1,0,0)) {
9996 /* -------- 0,!=00xx,11100 SCVTF d_d_imm, s_s_imm -------- */
9997 /* -------- 1,!=00xx,11100 UCVTF d_d_imm, s_s_imm -------- */
10000 Bool ok
= getLaneInfo_IMMH_IMMB(&fbits
, &size
, immh
, immb
);
10001 /* The following holds because immh is never zero. */
10003 /* The following holds because immh >= 0100. */
10004 vassert(size
== X10
|| size
== X11
);
10005 Bool isD
= size
== X11
;
10006 Bool isU
= bitU
== 1;
10007 vassert(fbits
>= 1 && fbits
<= (isD
? 64 : 32));
10008 Double scale
= two_to_the_minus(fbits
);
10009 IRExpr
* scaleE
= isD
? IRExpr_Const(IRConst_F64(scale
))
10010 : IRExpr_Const(IRConst_F32( (Float
)scale
));
10011 IROp opMUL
= isD
? Iop_MulF64
: Iop_MulF32
;
10012 IROp opCVT
= isU
? (isD
? Iop_I64UtoF64
: Iop_I32UtoF32
)
10013 : (isD
? Iop_I64StoF64
: Iop_I32StoF32
);
10014 IRType tyF
= isD
? Ity_F64
: Ity_F32
;
10015 IRType tyI
= isD
? Ity_I64
: Ity_I32
;
10016 IRTemp src
= newTemp(tyI
);
10017 IRTemp res
= newTemp(tyF
);
10018 IRTemp rm
= mk_get_IR_rounding_mode();
10019 assign(src
, getQRegLane(nn
, 0, tyI
));
10020 assign(res
, triop(opMUL
, mkexpr(rm
),
10021 binop(opCVT
, mkexpr(rm
), mkexpr(src
)), scaleE
));
10022 putQRegLane(dd
, 0, mkexpr(res
));
10024 putQRegLane(dd
, 1, mkU32(0));
10026 putQRegLane(dd
, 1, mkU64(0));
10027 const HChar ch
= isD
? 'd' : 's';
10028 DIP("%s %c%u, %c%u, #%u\n", isU
? "ucvtf" : "scvtf",
10029 ch
, dd
, ch
, nn
, fbits
);
10033 if (immh
>= BITS4(0,1,0,0) && opcode
== BITS5(1,1,1,1,1)) {
10034 /* -------- 0,!=00xx,11111 FCVTZS d_d_imm, s_s_imm -------- */
10035 /* -------- 1,!=00xx,11111 FCVTZU d_d_imm, s_s_imm -------- */
10038 Bool ok
= getLaneInfo_IMMH_IMMB(&fbits
, &size
, immh
, immb
);
10039 /* The following holds because immh is never zero. */
10041 /* The following holds because immh >= 0100. */
10042 vassert(size
== X10
|| size
== X11
);
10043 Bool isD
= size
== X11
;
10044 Bool isU
= bitU
== 1;
10045 vassert(fbits
>= 1 && fbits
<= (isD
? 64 : 32));
10046 Double scale
= two_to_the_plus(fbits
);
10047 IRExpr
* scaleE
= isD
? IRExpr_Const(IRConst_F64(scale
))
10048 : IRExpr_Const(IRConst_F32( (Float
)scale
));
10049 IROp opMUL
= isD
? Iop_MulF64
: Iop_MulF32
;
10050 IROp opCVT
= isU
? (isD
? Iop_F64toI64U
: Iop_F32toI32U
)
10051 : (isD
? Iop_F64toI64S
: Iop_F32toI32S
);
10052 IRType tyF
= isD
? Ity_F64
: Ity_F32
;
10053 IRType tyI
= isD
? Ity_I64
: Ity_I32
;
10054 IRTemp src
= newTemp(tyF
);
10055 IRTemp res
= newTemp(tyI
);
10056 IRTemp rm
= newTemp(Ity_I32
);
10057 assign(src
, getQRegLane(nn
, 0, tyF
));
10058 assign(rm
, mkU32(Irrm_ZERO
));
10059 assign(res
, binop(opCVT
, mkexpr(rm
),
10060 triop(opMUL
, mkexpr(rm
), mkexpr(src
), scaleE
)));
10061 putQRegLane(dd
, 0, mkexpr(res
));
10063 putQRegLane(dd
, 1, mkU32(0));
10065 putQRegLane(dd
, 1, mkU64(0));
10066 const HChar ch
= isD
? 'd' : 's';
10067 DIP("%s %c%u, %c%u, #%u\n", isU
? "fcvtzu" : "fcvtzs",
10068 ch
, dd
, ch
, nn
, fbits
);
10072 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10079 Bool
dis_AdvSIMD_scalar_three_different(/*MB_OUT*/DisResult
* dres
, UInt insn
)
10081 /* 31 29 28 23 21 20 15 11 9 4
10082 01 U 11110 size 1 m opcode 00 n d
10083 Decode fields: u,opcode
10085 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10086 if (INSN(31,30) != BITS2(0,1)
10087 || INSN(28,24) != BITS5(1,1,1,1,0)
10088 || INSN(21,21) != 1
10089 || INSN(11,10) != BITS2(0,0)) {
10092 UInt bitU
= INSN(29,29);
10093 UInt size
= INSN(23,22);
10094 UInt mm
= INSN(20,16);
10095 UInt opcode
= INSN(15,12);
10096 UInt nn
= INSN(9,5);
10097 UInt dd
= INSN(4,0);
10101 && (opcode
== BITS4(1,1,0,1)
10102 || opcode
== BITS4(1,0,0,1) || opcode
== BITS4(1,0,1,1))) {
10103 /* -------- 0,1101 SQDMULL -------- */ // 0 (ks)
10104 /* -------- 0,1001 SQDMLAL -------- */ // 1
10105 /* -------- 0,1011 SQDMLSL -------- */ // 2
10106 /* Widens, and size refers to the narrowed lanes. */
10109 case BITS4(1,1,0,1): ks
= 0; break;
10110 case BITS4(1,0,0,1): ks
= 1; break;
10111 case BITS4(1,0,1,1): ks
= 2; break;
10112 default: vassert(0);
10114 vassert(ks
>= 0 && ks
<= 2);
10115 if (size
== X00
|| size
== X11
) return False
;
10116 vassert(size
<= 2);
10117 IRTemp vecN
, vecM
, vecD
, res
, sat1q
, sat1n
, sat2q
, sat2n
;
10118 vecN
= vecM
= vecD
= res
= sat1q
= sat1n
= sat2q
= sat2n
= IRTemp_INVALID
;
10119 newTempsV128_3(&vecN
, &vecM
, &vecD
);
10120 assign(vecN
, getQReg128(nn
));
10121 assign(vecM
, getQReg128(mm
));
10122 assign(vecD
, getQReg128(dd
));
10123 math_SQDMULL_ACC(&res
, &sat1q
, &sat1n
, &sat2q
, &sat2n
,
10124 False
/*!is2*/, size
, "mas"[ks
],
10125 vecN
, vecM
, ks
== 0 ? IRTemp_INVALID
: vecD
);
10126 IROp opZHI
= mkVecZEROHIxxOFV128(size
+1);
10127 putQReg128(dd
, unop(opZHI
, mkexpr(res
)));
10128 vassert(sat1q
!= IRTemp_INVALID
&& sat1n
!= IRTemp_INVALID
);
10129 updateQCFLAGwithDifferenceZHI(sat1q
, sat1n
, opZHI
);
10130 if (sat2q
!= IRTemp_INVALID
|| sat2n
!= IRTemp_INVALID
) {
10131 updateQCFLAGwithDifferenceZHI(sat2q
, sat2n
, opZHI
);
10133 const HChar
* nm
= ks
== 0 ? "sqdmull"
10134 : (ks
== 1 ? "sqdmlal" : "sqdmlsl");
10135 const HChar arrNarrow
= "bhsd"[size
];
10136 const HChar arrWide
= "bhsd"[size
+1];
10137 DIP("%s %c%u, %c%u, %c%u\n",
10138 nm
, arrWide
, dd
, arrNarrow
, nn
, arrNarrow
, mm
);
10148 Bool
dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult
* dres
, UInt insn
)
10150 /* 31 29 28 23 21 20 15 10 9 4
10151 01 U 11110 size 1 m opcode 1 n d
10152 Decode fields: u,size,opcode
10154 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10155 if (INSN(31,30) != BITS2(0,1)
10156 || INSN(28,24) != BITS5(1,1,1,1,0)
10157 || INSN(21,21) != 1
10158 || INSN(10,10) != 1) {
10161 UInt bitU
= INSN(29,29);
10162 UInt size
= INSN(23,22);
10163 UInt mm
= INSN(20,16);
10164 UInt opcode
= INSN(15,11);
10165 UInt nn
= INSN(9,5);
10166 UInt dd
= INSN(4,0);
10169 if (opcode
== BITS5(0,0,0,0,1) || opcode
== BITS5(0,0,1,0,1)) {
10170 /* -------- 0,xx,00001 SQADD std4_std4_std4 -------- */
10171 /* -------- 1,xx,00001 UQADD std4_std4_std4 -------- */
10172 /* -------- 0,xx,00101 SQSUB std4_std4_std4 -------- */
10173 /* -------- 1,xx,00101 UQSUB std4_std4_std4 -------- */
10174 Bool isADD
= opcode
== BITS5(0,0,0,0,1);
10175 Bool isU
= bitU
== 1;
10176 IROp qop
= Iop_INVALID
;
10177 IROp nop
= Iop_INVALID
;
10179 qop
= isU
? mkVecQADDU(size
) : mkVecQADDS(size
);
10180 nop
= mkVecADD(size
);
10182 qop
= isU
? mkVecQSUBU(size
) : mkVecQSUBS(size
);
10183 nop
= mkVecSUB(size
);
10185 IRTemp argL
= newTempV128();
10186 IRTemp argR
= newTempV128();
10187 IRTemp qres
= newTempV128();
10188 IRTemp nres
= newTempV128();
10189 assign(argL
, getQReg128(nn
));
10190 assign(argR
, getQReg128(mm
));
10191 assign(qres
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10192 size
, binop(qop
, mkexpr(argL
), mkexpr(argR
)))));
10193 assign(nres
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10194 size
, binop(nop
, mkexpr(argL
), mkexpr(argR
)))));
10195 putQReg128(dd
, mkexpr(qres
));
10196 updateQCFLAGwithDifference(qres
, nres
);
10197 const HChar
* nm
= isADD
? (isU
? "uqadd" : "sqadd")
10198 : (isU
? "uqsub" : "sqsub");
10199 const HChar arr
= "bhsd"[size
];
10200 DIP("%s %c%u, %c%u, %c%u\n", nm
, arr
, dd
, arr
, nn
, arr
, mm
);
10204 if (size
== X11
&& opcode
== BITS5(0,0,1,1,0)) {
10205 /* -------- 0,11,00110 CMGT d_d_d -------- */ // >s
10206 /* -------- 1,11,00110 CMHI d_d_d -------- */ // >u
10207 Bool isGT
= bitU
== 0;
10208 IRExpr
* argL
= getQReg128(nn
);
10209 IRExpr
* argR
= getQReg128(mm
);
10210 IRTemp res
= newTempV128();
10212 isGT
? binop(Iop_CmpGT64Sx2
, argL
, argR
)
10213 : binop(Iop_CmpGT64Ux2
, argL
, argR
));
10214 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
10215 DIP("%s %s, %s, %s\n",isGT
? "cmgt" : "cmhi",
10216 nameQRegLO(dd
, Ity_I64
),
10217 nameQRegLO(nn
, Ity_I64
), nameQRegLO(mm
, Ity_I64
));
10221 if (size
== X11
&& opcode
== BITS5(0,0,1,1,1)) {
10222 /* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s
10223 /* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u
10224 Bool isGE
= bitU
== 0;
10225 IRExpr
* argL
= getQReg128(nn
);
10226 IRExpr
* argR
= getQReg128(mm
);
10227 IRTemp res
= newTempV128();
10229 isGE
? unop(Iop_NotV128
, binop(Iop_CmpGT64Sx2
, argR
, argL
))
10230 : unop(Iop_NotV128
, binop(Iop_CmpGT64Ux2
, argR
, argL
)));
10231 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
10232 DIP("%s %s, %s, %s\n", isGE
? "cmge" : "cmhs",
10233 nameQRegLO(dd
, Ity_I64
),
10234 nameQRegLO(nn
, Ity_I64
), nameQRegLO(mm
, Ity_I64
));
10238 if (size
== X11
&& (opcode
== BITS5(0,1,0,0,0)
10239 || opcode
== BITS5(0,1,0,1,0))) {
10240 /* -------- 0,xx,01000 SSHL d_d_d -------- */
10241 /* -------- 0,xx,01010 SRSHL d_d_d -------- */
10242 /* -------- 1,xx,01000 USHL d_d_d -------- */
10243 /* -------- 1,xx,01010 URSHL d_d_d -------- */
10244 Bool isU
= bitU
== 1;
10245 Bool isR
= opcode
== BITS5(0,1,0,1,0);
10246 IROp op
= isR
? (isU
? mkVecRSHU(size
) : mkVecRSHS(size
))
10247 : (isU
? mkVecSHU(size
) : mkVecSHS(size
));
10248 IRTemp res
= newTempV128();
10249 assign(res
, binop(op
, getQReg128(nn
), getQReg128(mm
)));
10250 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
10251 const HChar
* nm
= isR
? (isU
? "urshl" : "srshl")
10252 : (isU
? "ushl" : "sshl");
10253 DIP("%s %s, %s, %s\n", nm
,
10254 nameQRegLO(dd
, Ity_I64
),
10255 nameQRegLO(nn
, Ity_I64
), nameQRegLO(mm
, Ity_I64
));
10259 if (opcode
== BITS5(0,1,0,0,1) || opcode
== BITS5(0,1,0,1,1)) {
10260 /* -------- 0,xx,01001 SQSHL std4_std4_std4 -------- */
10261 /* -------- 0,xx,01011 SQRSHL std4_std4_std4 -------- */
10262 /* -------- 1,xx,01001 UQSHL std4_std4_std4 -------- */
10263 /* -------- 1,xx,01011 UQRSHL std4_std4_std4 -------- */
10264 Bool isU
= bitU
== 1;
10265 Bool isR
= opcode
== BITS5(0,1,0,1,1);
10266 IROp op
= isR
? (isU
? mkVecQANDUQRSH(size
) : mkVecQANDSQRSH(size
))
10267 : (isU
? mkVecQANDUQSH(size
) : mkVecQANDSQSH(size
));
10268 /* This is a bit tricky. Since we're only interested in the lowest
10269 lane of the result, we zero out all the rest in the operands, so
10270 as to ensure that other lanes don't pollute the returned Q value.
10271 This works because it means, for the lanes we don't care about, we
10272 are shifting zero by zero, which can never saturate. */
10273 IRTemp res256
= newTemp(Ity_V256
);
10274 IRTemp resSH
= newTempV128();
10275 IRTemp resQ
= newTempV128();
10276 IRTemp zero
= newTempV128();
10280 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, getQReg128(nn
))),
10281 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, getQReg128(mm
)))));
10282 assign(resSH
, unop(Iop_V256toV128_0
, mkexpr(res256
)));
10283 assign(resQ
, unop(Iop_V256toV128_1
, mkexpr(res256
)));
10284 assign(zero
, mkV128(0x0000));
10285 putQReg128(dd
, mkexpr(resSH
));
10286 updateQCFLAGwithDifference(resQ
, zero
);
10287 const HChar
* nm
= isR
? (isU
? "uqrshl" : "sqrshl")
10288 : (isU
? "uqshl" : "sqshl");
10289 const HChar arr
= "bhsd"[size
];
10290 DIP("%s %c%u, %c%u, %c%u\n", nm
, arr
, dd
, arr
, nn
, arr
, mm
);
10294 if (size
== X11
&& opcode
== BITS5(1,0,0,0,0)) {
10295 /* -------- 0,11,10000 ADD d_d_d -------- */
10296 /* -------- 1,11,10000 SUB d_d_d -------- */
10297 Bool isSUB
= bitU
== 1;
10298 IRTemp res
= newTemp(Ity_I64
);
10299 assign(res
, binop(isSUB
? Iop_Sub64
: Iop_Add64
,
10300 getQRegLane(nn
, 0, Ity_I64
),
10301 getQRegLane(mm
, 0, Ity_I64
)));
10302 putQRegLane(dd
, 0, mkexpr(res
));
10303 putQRegLane(dd
, 1, mkU64(0));
10304 DIP("%s %s, %s, %s\n", isSUB
? "sub" : "add",
10305 nameQRegLO(dd
, Ity_I64
),
10306 nameQRegLO(nn
, Ity_I64
), nameQRegLO(mm
, Ity_I64
));
10310 if (size
== X11
&& opcode
== BITS5(1,0,0,0,1)) {
10311 /* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0
10312 /* -------- 1,11,10001 CMEQ d_d_d -------- */ // ==
10313 Bool isEQ
= bitU
== 1;
10314 IRExpr
* argL
= getQReg128(nn
);
10315 IRExpr
* argR
= getQReg128(mm
);
10316 IRTemp res
= newTempV128();
10318 isEQ
? binop(Iop_CmpEQ64x2
, argL
, argR
)
10319 : unop(Iop_NotV128
, binop(Iop_CmpEQ64x2
,
10320 binop(Iop_AndV128
, argL
, argR
),
10322 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
10323 DIP("%s %s, %s, %s\n", isEQ
? "cmeq" : "cmtst",
10324 nameQRegLO(dd
, Ity_I64
),
10325 nameQRegLO(nn
, Ity_I64
), nameQRegLO(mm
, Ity_I64
));
10329 if (opcode
== BITS5(1,0,1,1,0)) {
10330 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
10331 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
10332 if (size
== X00
|| size
== X11
) return False
;
10333 Bool isR
= bitU
== 1;
10334 IRTemp res
, sat1q
, sat1n
, vN
, vM
;
10335 res
= sat1q
= sat1n
= vN
= vM
= IRTemp_INVALID
;
10336 newTempsV128_2(&vN
, &vM
);
10337 assign(vN
, getQReg128(nn
));
10338 assign(vM
, getQReg128(mm
));
10339 math_SQDMULH(&res
, &sat1q
, &sat1n
, isR
, size
, vN
, vM
);
10341 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, mkexpr(res
))));
10342 updateQCFLAGwithDifference(
10343 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, mkexpr(sat1q
)),
10344 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, mkexpr(sat1n
)));
10345 const HChar arr
= "bhsd"[size
];
10346 const HChar
* nm
= isR
? "sqrdmulh" : "sqdmulh";
10347 DIP("%s %c%u, %c%u, %c%u\n", nm
, arr
, dd
, arr
, nn
, arr
, mm
);
10351 if (bitU
== 1 && size
>= X10
&& opcode
== BITS5(1,1,0,1,0)) {
10352 /* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */
10353 IRType ity
= size
== X11
? Ity_F64
: Ity_F32
;
10354 IRTemp res
= newTemp(ity
);
10355 assign(res
, unop(mkABSF(ity
),
10357 mkexpr(mk_get_IR_rounding_mode()),
10358 getQRegLO(nn
,ity
), getQRegLO(mm
,ity
))));
10359 putQReg128(dd
, mkV128(0x0000));
10360 putQRegLO(dd
, mkexpr(res
));
10361 DIP("fabd %s, %s, %s\n",
10362 nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
));
10366 if (bitU
== 0 && size
<= X01
&& opcode
== BITS5(1,1,0,1,1)) {
10367 /* -------- 0,0x,11011 FMULX d_d_d, s_s_s -------- */
10368 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
10369 IRType ity
= size
== X01
? Ity_F64
: Ity_F32
;
10370 IRTemp res
= newTemp(ity
);
10371 assign(res
, triop(mkMULF(ity
),
10372 mkexpr(mk_get_IR_rounding_mode()),
10373 getQRegLO(nn
,ity
), getQRegLO(mm
,ity
)));
10374 putQReg128(dd
, mkV128(0x0000));
10375 putQRegLO(dd
, mkexpr(res
));
10376 DIP("fmulx %s, %s, %s\n",
10377 nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
));
10381 if (size
<= X01
&& opcode
== BITS5(1,1,1,0,0)) {
10382 /* -------- 0,0x,11100 FCMEQ d_d_d, s_s_s -------- */
10383 /* -------- 1,0x,11100 FCMGE d_d_d, s_s_s -------- */
10384 Bool isD
= size
== X01
;
10385 IRType ity
= isD
? Ity_F64
: Ity_F32
;
10386 Bool isGE
= bitU
== 1;
10387 IROp opCMP
= isGE
? (isD
? Iop_CmpLE64Fx2
: Iop_CmpLE32Fx4
)
10388 : (isD
? Iop_CmpEQ64Fx2
: Iop_CmpEQ32Fx4
);
10389 IRTemp res
= newTempV128();
10390 assign(res
, isGE
? binop(opCMP
, getQReg128(mm
), getQReg128(nn
)) // swapd
10391 : binop(opCMP
, getQReg128(nn
), getQReg128(mm
)));
10392 putQReg128(dd
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD
? X11
: X10
,
10394 DIP("%s %s, %s, %s\n", isGE
? "fcmge" : "fcmeq",
10395 nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
));
10399 if (bitU
== 1 && size
>= X10
&& opcode
== BITS5(1,1,1,0,0)) {
10400 /* -------- 1,1x,11100 FCMGT d_d_d, s_s_s -------- */
10401 Bool isD
= size
== X11
;
10402 IRType ity
= isD
? Ity_F64
: Ity_F32
;
10403 IROp opCMP
= isD
? Iop_CmpLT64Fx2
: Iop_CmpLT32Fx4
;
10404 IRTemp res
= newTempV128();
10405 assign(res
, binop(opCMP
, getQReg128(mm
), getQReg128(nn
))); // swapd
10406 putQReg128(dd
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD
? X11
: X10
,
10408 DIP("%s %s, %s, %s\n", "fcmgt",
10409 nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
));
10413 if (bitU
== 1 && opcode
== BITS5(1,1,1,0,1)) {
10414 /* -------- 1,0x,11101 FACGE d_d_d, s_s_s -------- */
10415 /* -------- 1,1x,11101 FACGT d_d_d, s_s_s -------- */
10416 Bool isD
= (size
& 1) == 1;
10417 IRType ity
= isD
? Ity_F64
: Ity_F32
;
10418 Bool isGT
= (size
& 2) == 2;
10419 IROp opCMP
= isGT
? (isD
? Iop_CmpLT64Fx2
: Iop_CmpLT32Fx4
)
10420 : (isD
? Iop_CmpLE64Fx2
: Iop_CmpLE32Fx4
);
10421 IROp opABS
= isD
? Iop_Abs64Fx2
: Iop_Abs32Fx4
;
10422 IRTemp res
= newTempV128();
10423 assign(res
, binop(opCMP
, unop(opABS
, getQReg128(mm
)),
10424 unop(opABS
, getQReg128(nn
)))); // swapd
10425 putQReg128(dd
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD
? X11
: X10
,
10427 DIP("%s %s, %s, %s\n", isGT
? "facgt" : "facge",
10428 nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
));
10432 if (bitU
== 0 && opcode
== BITS5(1,1,1,1,1)) {
10433 /* -------- 0,0x,11111: FRECPS d_d_d, s_s_s -------- */
10434 /* -------- 0,1x,11111: FRSQRTS d_d_d, s_s_s -------- */
10435 Bool isSQRT
= (size
& 2) == 2;
10436 Bool isD
= (size
& 1) == 1;
10437 IROp op
= isSQRT
? (isD
? Iop_RSqrtStep64Fx2
: Iop_RSqrtStep32Fx4
)
10438 : (isD
? Iop_RecipStep64Fx2
: Iop_RecipStep32Fx4
);
10439 IRTemp res
= newTempV128();
10440 assign(res
, binop(op
, getQReg128(nn
), getQReg128(mm
)));
10441 putQReg128(dd
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD
? X11
: X10
,
10443 HChar c
= isD
? 'd' : 's';
10444 DIP("%s %c%u, %c%u, %c%u\n", isSQRT
? "frsqrts" : "frecps",
10445 c
, dd
, c
, nn
, c
, mm
);
10454 Bool
dis_AdvSIMD_scalar_three_same_extra(/*MB_OUT*/DisResult
* dres
, UInt insn
)
10456 /* 31 29 28 23 21 20 15 10 9 4
10457 01 U 11110 size 0 m opcode 1 n d
10458 Decode fields: u,size,opcode
10460 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10461 if (INSN(31,30) != BITS2(0,1)
10462 || INSN(28,24) != BITS5(1,1,1,1,0)
10463 || INSN(21,21) != 0
10464 || INSN(10,10) != 1) {
10467 UInt bitU
= INSN(29,29);
10468 UInt size
= INSN(23,22);
10469 UInt mm
= INSN(20,16);
10470 UInt opcode
= INSN(15,11);
10471 UInt nn
= INSN(9,5);
10472 UInt dd
= INSN(4,0);
10474 vassert(mm
< 32 && nn
< 32 && dd
< 32);
10476 if (bitU
== 1 && (opcode
== BITS5(1,0,0,0,0) || opcode
== BITS5(1,0,0,0,1))) {
10477 /* -------- xx,10000 SQRDMLAH s and h variants only -------- */
10478 /* -------- xx,10001 SQRDMLSH s and h variants only -------- */
10479 if (size
== X00
|| size
== X11
) return False
;
10480 Bool isAdd
= opcode
== BITS5(1,0,0,0,0);
10482 IRTemp res
, res_nosat
, vD
, vN
, vM
;
10483 res
= res_nosat
= vD
= vN
= vM
= IRTemp_INVALID
;
10484 newTempsV128_3(&vD
, &vN
, &vM
);
10485 assign(vD
, getQReg128(dd
));
10486 assign(vN
, getQReg128(nn
));
10487 assign(vM
, getQReg128(mm
));
10489 math_SQRDMLAH(&res
, &res_nosat
, isAdd
, size
, vD
, vN
, vM
);
10491 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, mkexpr(res
))));
10492 updateQCFLAGwithDifference(
10493 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, mkexpr(res
)),
10494 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, mkexpr(res_nosat
)));
10496 const HChar arr
= "hs"[size
];
10497 const HChar
* nm
= isAdd
? "sqrdmlah" : "sqrdmlsh";
10498 DIP("%s %c%u, %c%u, %c%u\n", nm
, arr
, dd
, arr
, nn
, arr
, mm
);
10508 Bool
dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult
* dres
, UInt insn
)
10510 /* 31 29 28 23 21 16 11 9 4
10511 01 U 11110 size 10000 opcode 10 n d
10512 Decode fields: u,size,opcode
10514 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10515 if (INSN(31,30) != BITS2(0,1)
10516 || INSN(28,24) != BITS5(1,1,1,1,0)
10517 || INSN(21,17) != BITS5(1,0,0,0,0)
10518 || INSN(11,10) != BITS2(1,0)) {
10521 UInt bitU
= INSN(29,29);
10522 UInt size
= INSN(23,22);
10523 UInt opcode
= INSN(16,12);
10524 UInt nn
= INSN(9,5);
10525 UInt dd
= INSN(4,0);
10528 if (opcode
== BITS5(0,0,0,1,1)) {
10529 /* -------- 0,xx,00011: SUQADD std4_std4 -------- */
10530 /* -------- 1,xx,00011: USQADD std4_std4 -------- */
10531 /* These are a bit tricky (to say the least). See comments on
10532 the vector variants (in dis_AdvSIMD_two_reg_misc) below for
10534 Bool isUSQADD
= bitU
== 1;
10535 IROp qop
= isUSQADD
? mkVecQADDEXTSUSATUU(size
)
10536 : mkVecQADDEXTUSSATSS(size
);
10537 IROp nop
= mkVecADD(size
);
10538 IRTemp argL
= newTempV128();
10539 IRTemp argR
= newTempV128();
10540 assign(argL
, getQReg128(nn
));
10541 assign(argR
, getQReg128(dd
));
10542 IRTemp qres
= math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10543 size
, binop(qop
, mkexpr(argL
), mkexpr(argR
)));
10544 IRTemp nres
= math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10545 size
, binop(nop
, mkexpr(argL
), mkexpr(argR
)));
10546 putQReg128(dd
, mkexpr(qres
));
10547 updateQCFLAGwithDifference(qres
, nres
);
10548 const HChar arr
= "bhsd"[size
];
10549 DIP("%s %c%u, %c%u\n", isUSQADD
? "usqadd" : "suqadd", arr
, dd
, arr
, nn
);
10553 if (opcode
== BITS5(0,0,1,1,1)) {
10554 /* -------- 0,xx,00111 SQABS std4_std4 -------- */
10555 /* -------- 1,xx,00111 SQNEG std4_std4 -------- */
10556 Bool isNEG
= bitU
== 1;
10557 IRTemp qresFW
= IRTemp_INVALID
, nresFW
= IRTemp_INVALID
;
10558 (isNEG
? math_SQNEG
: math_SQABS
)( &qresFW
, &nresFW
,
10559 getQReg128(nn
), size
);
10560 IRTemp qres
= math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, mkexpr(qresFW
));
10561 IRTemp nres
= math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, mkexpr(nresFW
));
10562 putQReg128(dd
, mkexpr(qres
));
10563 updateQCFLAGwithDifference(qres
, nres
);
10564 const HChar arr
= "bhsd"[size
];
10565 DIP("%s %c%u, %c%u\n", isNEG
? "sqneg" : "sqabs", arr
, dd
, arr
, nn
);
10569 if (size
== X11
&& opcode
== BITS5(0,1,0,0,0)) {
10570 /* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0
10571 /* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0
10572 Bool isGT
= bitU
== 0;
10573 IRExpr
* argL
= getQReg128(nn
);
10574 IRExpr
* argR
= mkV128(0x0000);
10575 IRTemp res
= newTempV128();
10576 assign(res
, isGT
? binop(Iop_CmpGT64Sx2
, argL
, argR
)
10577 : unop(Iop_NotV128
, binop(Iop_CmpGT64Sx2
, argR
, argL
)));
10578 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
10579 DIP("cm%s d%u, d%u, #0\n", isGT
? "gt" : "ge", dd
, nn
);
10583 if (size
== X11
&& opcode
== BITS5(0,1,0,0,1)) {
10584 /* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0
10585 /* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0
10586 Bool isEQ
= bitU
== 0;
10587 IRExpr
* argL
= getQReg128(nn
);
10588 IRExpr
* argR
= mkV128(0x0000);
10589 IRTemp res
= newTempV128();
10590 assign(res
, isEQ
? binop(Iop_CmpEQ64x2
, argL
, argR
)
10591 : unop(Iop_NotV128
,
10592 binop(Iop_CmpGT64Sx2
, argL
, argR
)));
10593 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
10594 DIP("cm%s d%u, d%u, #0\n", isEQ
? "eq" : "le", dd
, nn
);
10598 if (bitU
== 0 && size
== X11
&& opcode
== BITS5(0,1,0,1,0)) {
10599 /* -------- 0,11,01010: CMLT d_d_#0 -------- */ // <s 0
10600 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
,
10601 binop(Iop_CmpGT64Sx2
, mkV128(0x0000),
10603 DIP("cm%s d%u, d%u, #0\n", "lt", dd
, nn
);
10607 if (bitU
== 0 && size
== X11
&& opcode
== BITS5(0,1,0,1,1)) {
10608 /* -------- 0,11,01011 ABS d_d -------- */
10609 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
,
10610 unop(Iop_Abs64x2
, getQReg128(nn
))));
10611 DIP("abs d%u, d%u\n", dd
, nn
);
10615 if (bitU
== 1 && size
== X11
&& opcode
== BITS5(0,1,0,1,1)) {
10616 /* -------- 1,11,01011 NEG d_d -------- */
10617 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
,
10618 binop(Iop_Sub64x2
, mkV128(0x0000), getQReg128(nn
))));
10619 DIP("neg d%u, d%u\n", dd
, nn
);
10623 UInt ix
= 0; /*INVALID*/
10626 case BITS5(0,1,1,0,0): ix
= (bitU
== 1) ? 4 : 1; break;
10627 case BITS5(0,1,1,0,1): ix
= (bitU
== 1) ? 5 : 2; break;
10628 case BITS5(0,1,1,1,0): if (bitU
== 0) ix
= 3; break;
10633 /* -------- 0,1x,01100 FCMGT d_d_#0.0, s_s_#0.0 (ix 1) -------- */
10634 /* -------- 0,1x,01101 FCMEQ d_d_#0.0, s_s_#0.0 (ix 2) -------- */
10635 /* -------- 0,1x,01110 FCMLT d_d_#0.0, s_s_#0.0 (ix 3) -------- */
10636 /* -------- 1,1x,01100 FCMGE d_d_#0.0, s_s_#0.0 (ix 4) -------- */
10637 /* -------- 1,1x,01101 FCMLE d_d_#0.0, s_s_#0.0 (ix 5) -------- */
10638 Bool isD
= size
== X11
;
10639 IRType ity
= isD
? Ity_F64
: Ity_F32
;
10640 IROp opCmpEQ
= isD
? Iop_CmpEQ64Fx2
: Iop_CmpEQ32Fx4
;
10641 IROp opCmpLE
= isD
? Iop_CmpLE64Fx2
: Iop_CmpLE32Fx4
;
10642 IROp opCmpLT
= isD
? Iop_CmpLT64Fx2
: Iop_CmpLT32Fx4
;
10643 IROp opCmp
= Iop_INVALID
;
10645 const HChar
* nm
= "??";
10647 case 1: nm
= "fcmgt"; opCmp
= opCmpLT
; swap
= True
; break;
10648 case 2: nm
= "fcmeq"; opCmp
= opCmpEQ
; break;
10649 case 3: nm
= "fcmlt"; opCmp
= opCmpLT
; break;
10650 case 4: nm
= "fcmge"; opCmp
= opCmpLE
; swap
= True
; break;
10651 case 5: nm
= "fcmle"; opCmp
= opCmpLE
; break;
10652 default: vassert(0);
10654 IRExpr
* zero
= mkV128(0x0000);
10655 IRTemp res
= newTempV128();
10656 assign(res
, swap
? binop(opCmp
, zero
, getQReg128(nn
))
10657 : binop(opCmp
, getQReg128(nn
), zero
));
10658 putQReg128(dd
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD
? X11
: X10
,
10661 DIP("%s %s, %s, #0.0\n", nm
, nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
));
10665 if (opcode
== BITS5(1,0,1,0,0)
10666 || (bitU
== 1 && opcode
== BITS5(1,0,0,1,0))) {
10667 /* -------- 0,xx,10100: SQXTN -------- */
10668 /* -------- 1,xx,10100: UQXTN -------- */
10669 /* -------- 1,xx,10010: SQXTUN -------- */
10670 if (size
== X11
) return False
;
10672 IROp opN
= Iop_INVALID
;
10673 Bool zWiden
= True
;
10674 const HChar
* nm
= "??";
10675 /**/ if (bitU
== 0 && opcode
== BITS5(1,0,1,0,0)) {
10676 opN
= mkVecQNARROWUNSS(size
); nm
= "sqxtn"; zWiden
= False
;
10678 else if (bitU
== 1 && opcode
== BITS5(1,0,1,0,0)) {
10679 opN
= mkVecQNARROWUNUU(size
); nm
= "uqxtn";
10681 else if (bitU
== 1 && opcode
== BITS5(1,0,0,1,0)) {
10682 opN
= mkVecQNARROWUNSU(size
); nm
= "sqxtun";
10685 IRTemp src
= math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10686 size
+1, getQReg128(nn
));
10687 IRTemp resN
= math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10688 size
, unop(Iop_64UtoV128
, unop(opN
, mkexpr(src
))));
10689 putQReg128(dd
, mkexpr(resN
));
10690 /* This widens zero lanes to zero, and compares it against zero, so all
10691 of the non-participating lanes make no contribution to the
10693 IRTemp resW
= math_WIDEN_LO_OR_HI_LANES(zWiden
, False
/*!fromUpperHalf*/,
10694 size
, mkexpr(resN
));
10695 updateQCFLAGwithDifference(src
, resW
);
10696 const HChar arrNarrow
= "bhsd"[size
];
10697 const HChar arrWide
= "bhsd"[size
+1];
10698 DIP("%s %c%u, %c%u\n", nm
, arrNarrow
, dd
, arrWide
, nn
);
10702 if (opcode
== BITS5(1,0,1,1,0) && bitU
== 1 && size
== X01
) {
10703 /* -------- 1,01,10110 FCVTXN s_d -------- */
10704 /* Using Irrm_NEAREST here isn't right. The docs say "round to
10705 odd" but I don't know what that really means. */
10707 binop(Iop_F64toF32
, mkU32(Irrm_NEAREST
),
10708 getQRegLO(nn
, Ity_F64
)));
10709 putQRegLane(dd
, 1, mkU32(0));
10710 putQRegLane(dd
, 1, mkU64(0));
10711 DIP("fcvtxn s%u, d%u\n", dd
, nn
);
10715 ix
= 0; /*INVALID*/
10717 case BITS5(1,1,0,1,0): ix
= ((size
& 2) == 2) ? 4 : 1; break;
10718 case BITS5(1,1,0,1,1): ix
= ((size
& 2) == 2) ? 5 : 2; break;
10719 case BITS5(1,1,1,0,0): if ((size
& 2) == 0) ix
= 3; break;
10723 /* -------- 0,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
10724 /* -------- 0,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
10725 /* -------- 0,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
10726 /* -------- 0,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
10727 /* -------- 0,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
10728 /* -------- 1,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
10729 /* -------- 1,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
10730 /* -------- 1,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
10731 /* -------- 1,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
10732 /* -------- 1,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
10733 Bool isD
= (size
& 1) == 1;
10734 IRType tyF
= isD
? Ity_F64
: Ity_F32
;
10735 IRType tyI
= isD
? Ity_I64
: Ity_I32
;
10736 IRRoundingMode irrm
= 8; /*impossible*/
10739 case 1: ch
= 'n'; irrm
= Irrm_NEAREST
; break;
10740 case 2: ch
= 'm'; irrm
= Irrm_NegINF
; break;
10741 case 3: ch
= 'a'; irrm
= Irrm_NEAREST
; break; /* kludge? */
10742 case 4: ch
= 'p'; irrm
= Irrm_PosINF
; break;
10743 case 5: ch
= 'z'; irrm
= Irrm_ZERO
; break;
10744 default: vassert(0);
10746 IROp cvt
= Iop_INVALID
;
10748 cvt
= isD
? Iop_F64toI64U
: Iop_F32toI32U
;
10750 cvt
= isD
? Iop_F64toI64S
: Iop_F32toI32S
;
10752 IRTemp src
= newTemp(tyF
);
10753 IRTemp res
= newTemp(tyI
);
10754 assign(src
, getQRegLane(nn
, 0, tyF
));
10755 assign(res
, binop(cvt
, mkU32(irrm
), mkexpr(src
)));
10756 putQRegLane(dd
, 0, mkexpr(res
)); /* bits 31-0 or 63-0 */
10758 putQRegLane(dd
, 1, mkU32(0)); /* bits 63-32 */
10760 putQRegLane(dd
, 1, mkU64(0)); /* bits 127-64 */
10761 HChar sOrD
= isD
? 'd' : 's';
10762 DIP("fcvt%c%c %c%u, %c%u\n", ch
, bitU
== 1 ? 'u' : 's',
10763 sOrD
, dd
, sOrD
, nn
);
10767 if (size
<= X01
&& opcode
== BITS5(1,1,1,0,1)) {
10768 /* -------- 0,0x,11101: SCVTF d_d, s_s -------- */
10769 /* -------- 1,0x,11101: UCVTF d_d, s_s -------- */
10770 Bool isU
= bitU
== 1;
10771 Bool isD
= (size
& 1) == 1;
10772 IRType tyI
= isD
? Ity_I64
: Ity_I32
;
10773 IROp iop
= isU
? (isD
? Iop_I64UtoF64
: Iop_I32UtoF32
)
10774 : (isD
? Iop_I64StoF64
: Iop_I32StoF32
);
10775 IRTemp rm
= mk_get_IR_rounding_mode();
10776 putQRegLO(dd
, binop(iop
, mkexpr(rm
), getQRegLO(nn
, tyI
)));
10778 putQRegLane(dd
, 1, mkU32(0)); /* bits 63-32 */
10780 putQRegLane(dd
, 1, mkU64(0)); /* bits 127-64 */
10781 HChar c
= isD
? 'd' : 's';
10782 DIP("%ccvtf %c%u, %c%u\n", isU
? 'u' : 's', c
, dd
, c
, nn
);
10786 if (size
>= X10
&& opcode
== BITS5(1,1,1,0,1)) {
10787 /* -------- 0,1x,11101: FRECPE d_d, s_s -------- */
10788 /* -------- 1,1x,11101: FRSQRTE d_d, s_s -------- */
10789 Bool isSQRT
= bitU
== 1;
10790 Bool isD
= (size
& 1) == 1;
10791 IROp op
= isSQRT
? (isD
? Iop_RSqrtEst64Fx2
: Iop_RSqrtEst32Fx4
)
10792 : (isD
? Iop_RecipEst64Fx2
: Iop_RecipEst32Fx4
);
10793 IRTemp resV
= newTempV128();
10794 assign(resV
, unop(op
, getQReg128(nn
)));
10795 putQReg128(dd
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD
? X11
: X10
,
10797 HChar c
= isD
? 'd' : 's';
10798 DIP("%s %c%u, %c%u\n", isSQRT
? "frsqrte" : "frecpe", c
, dd
, c
, nn
);
10802 if (bitU
== 0 && size
>= X10
&& opcode
== BITS5(1,1,1,1,1)) {
10803 /* -------- 0,1x,11111: FRECPX d_d, s_s -------- */
10804 Bool isD
= (size
& 1) == 1;
10805 IRType ty
= isD
? Ity_F64
: Ity_F32
;
10806 IROp op
= isD
? Iop_RecpExpF64
: Iop_RecpExpF32
;
10807 IRTemp res
= newTemp(ty
);
10808 IRTemp rm
= mk_get_IR_rounding_mode();
10809 assign(res
, binop(op
, mkexpr(rm
), getQRegLane(nn
, 0, ty
)));
10810 putQReg128(dd
, mkV128(0x0000));
10811 putQRegLane(dd
, 0, mkexpr(res
));
10812 HChar c
= isD
? 'd' : 's';
10813 DIP("%s %c%u, %c%u\n", "frecpx", c
, dd
, c
, nn
);
10823 Bool
dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult
* dres
, UInt insn
)
10825 /* 31 28 23 21 20 19 15 11 9 4
10826 01 U 11111 size L M m opcode H 0 n d
10827 Decode fields are: u,size,opcode
10828 M is really part of the mm register number. Individual
10829 cases need to inspect L and H though.
10831 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10832 if (INSN(31,30) != BITS2(0,1)
10833 || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) != 0) {
10836 UInt bitU
= INSN(29,29);
10837 UInt size
= INSN(23,22);
10838 UInt bitL
= INSN(21,21);
10839 UInt bitM
= INSN(20,20);
10840 UInt mmLO4
= INSN(19,16);
10841 UInt opcode
= INSN(15,12);
10842 UInt bitH
= INSN(11,11);
10843 UInt nn
= INSN(9,5);
10844 UInt dd
= INSN(4,0);
10846 vassert(bitH
< 2 && bitM
< 2 && bitL
< 2);
10848 if (bitU
== 0 && size
>= X10
10849 && (opcode
== BITS4(0,0,0,1) || opcode
== BITS4(0,1,0,1))) {
10850 /* -------- 0,1x,0001 FMLA d_d_d[], s_s_s[] -------- */
10851 /* -------- 0,1x,0101 FMLS d_d_d[], s_s_s[] -------- */
10852 Bool isD
= (size
& 1) == 1;
10853 Bool isSUB
= opcode
== BITS4(0,1,0,1);
10855 if (!isD
) index
= (bitH
<< 1) | bitL
;
10856 else if (isD
&& bitL
== 0) index
= bitH
;
10857 else return False
; // sz:L == x11 => unallocated encoding
10858 vassert(index
< (isD
? 2 : 4));
10859 IRType ity
= isD
? Ity_F64
: Ity_F32
;
10860 IRTemp elem
= newTemp(ity
);
10861 UInt mm
= (bitM
<< 4) | mmLO4
;
10862 assign(elem
, getQRegLane(mm
, index
, ity
));
10863 IRTemp dupd
= math_DUP_TO_V128(elem
, ity
);
10864 IROp opADD
= isD
? Iop_Add64Fx2
: Iop_Add32Fx4
;
10865 IROp opSUB
= isD
? Iop_Sub64Fx2
: Iop_Sub32Fx4
;
10866 IROp opMUL
= isD
? Iop_Mul64Fx2
: Iop_Mul32Fx4
;
10867 IRTemp rm
= mk_get_IR_rounding_mode();
10868 IRTemp t1
= newTempV128();
10869 IRTemp t2
= newTempV128();
10870 // FIXME: double rounding; use FMA primops instead
10871 assign(t1
, triop(opMUL
, mkexpr(rm
), getQReg128(nn
), mkexpr(dupd
)));
10872 assign(t2
, triop(isSUB
? opSUB
: opADD
,
10873 mkexpr(rm
), getQReg128(dd
), mkexpr(t1
)));
10875 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD
? 3 : 2,
10877 const HChar c
= isD
? 'd' : 's';
10878 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isSUB
? "fmls" : "fmla",
10879 c
, dd
, c
, nn
, nameQReg128(mm
), c
, index
);
10883 if (size
>= X10
&& opcode
== BITS4(1,0,0,1)) {
10884 /* -------- 0,1x,1001 FMUL d_d_d[], s_s_s[] -------- */
10885 /* -------- 1,1x,1001 FMULX d_d_d[], s_s_s[] -------- */
10886 Bool isD
= (size
& 1) == 1;
10887 Bool isMULX
= bitU
== 1;
10889 if (!isD
) index
= (bitH
<< 1) | bitL
;
10890 else if (isD
&& bitL
== 0) index
= bitH
;
10891 else return False
; // sz:L == x11 => unallocated encoding
10892 vassert(index
< (isD
? 2 : 4));
10893 IRType ity
= isD
? Ity_F64
: Ity_F32
;
10894 IRTemp elem
= newTemp(ity
);
10895 UInt mm
= (bitM
<< 4) | mmLO4
;
10896 assign(elem
, getQRegLane(mm
, index
, ity
));
10897 IRTemp dupd
= math_DUP_TO_V128(elem
, ity
);
10898 IROp opMUL
= isD
? Iop_Mul64Fx2
: Iop_Mul32Fx4
;
10899 IRTemp rm
= mk_get_IR_rounding_mode();
10900 IRTemp t1
= newTempV128();
10901 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
10902 assign(t1
, triop(opMUL
, mkexpr(rm
), getQReg128(nn
), mkexpr(dupd
)));
10904 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD
? 3 : 2,
10906 const HChar c
= isD
? 'd' : 's';
10907 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isMULX
? "fmulx" : "fmul",
10908 c
, dd
, c
, nn
, nameQReg128(mm
), c
, index
);
10913 && (opcode
== BITS4(1,0,1,1)
10914 || opcode
== BITS4(0,0,1,1) || opcode
== BITS4(0,1,1,1))) {
10915 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
10916 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
10917 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
10918 /* Widens, and size refers to the narrowed lanes. */
10921 case BITS4(1,0,1,1): ks
= 0; break;
10922 case BITS4(0,0,1,1): ks
= 1; break;
10923 case BITS4(0,1,1,1): ks
= 2; break;
10924 default: vassert(0);
10926 vassert(ks
>= 0 && ks
<= 2);
10927 UInt mm
= 32; // invalid
10928 UInt ix
= 16; // invalid
10931 return False
; // h_b_b[] case is not allowed
10933 mm
= mmLO4
; ix
= (bitH
<< 2) | (bitL
<< 1) | (bitM
<< 0); break;
10935 mm
= (bitM
<< 4) | mmLO4
; ix
= (bitH
<< 1) | (bitL
<< 0); break;
10937 return False
; // q_d_d[] case is not allowed
10941 vassert(mm
< 32 && ix
< 16);
10942 IRTemp vecN
, vecD
, res
, sat1q
, sat1n
, sat2q
, sat2n
;
10943 vecN
= vecD
= res
= sat1q
= sat1n
= sat2q
= sat2n
= IRTemp_INVALID
;
10944 newTempsV128_2(&vecN
, &vecD
);
10945 assign(vecN
, getQReg128(nn
));
10946 IRTemp vecM
= math_DUP_VEC_ELEM(getQReg128(mm
), size
, ix
);
10947 assign(vecD
, getQReg128(dd
));
10948 math_SQDMULL_ACC(&res
, &sat1q
, &sat1n
, &sat2q
, &sat2n
,
10949 False
/*!is2*/, size
, "mas"[ks
],
10950 vecN
, vecM
, ks
== 0 ? IRTemp_INVALID
: vecD
);
10951 IROp opZHI
= mkVecZEROHIxxOFV128(size
+1);
10952 putQReg128(dd
, unop(opZHI
, mkexpr(res
)));
10953 vassert(sat1q
!= IRTemp_INVALID
&& sat1n
!= IRTemp_INVALID
);
10954 updateQCFLAGwithDifferenceZHI(sat1q
, sat1n
, opZHI
);
10955 if (sat2q
!= IRTemp_INVALID
|| sat2n
!= IRTemp_INVALID
) {
10956 updateQCFLAGwithDifferenceZHI(sat2q
, sat2n
, opZHI
);
10958 const HChar
* nm
= ks
== 0 ? "sqmull"
10959 : (ks
== 1 ? "sqdmlal" : "sqdmlsl");
10960 const HChar arrNarrow
= "bhsd"[size
];
10961 const HChar arrWide
= "bhsd"[size
+1];
10962 DIP("%s %c%u, %c%u, v%u.%c[%u]\n",
10963 nm
, arrWide
, dd
, arrNarrow
, nn
, dd
, arrNarrow
, ix
);
10967 if (bitU
== 0 && (opcode
== BITS4(1,1,0,0) || opcode
== BITS4(1,1,0,1))) {
10968 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
10969 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
10970 UInt mm
= 32; // invalid
10971 UInt ix
= 16; // invalid
10974 return False
; // b case is not allowed
10976 mm
= mmLO4
; ix
= (bitH
<< 2) | (bitL
<< 1) | (bitM
<< 0); break;
10978 mm
= (bitM
<< 4) | mmLO4
; ix
= (bitH
<< 1) | (bitL
<< 0); break;
10980 return False
; // q case is not allowed
10984 vassert(mm
< 32 && ix
< 16);
10985 Bool isR
= opcode
== BITS4(1,1,0,1);
10986 IRTemp res
, sat1q
, sat1n
, vN
, vM
;
10987 res
= sat1q
= sat1n
= vN
= vM
= IRTemp_INVALID
;
10988 vN
= newTempV128();
10989 assign(vN
, getQReg128(nn
));
10990 vM
= math_DUP_VEC_ELEM(getQReg128(mm
), size
, ix
);
10991 math_SQDMULH(&res
, &sat1q
, &sat1n
, isR
, size
, vN
, vM
);
10992 IROp opZHI
= mkVecZEROHIxxOFV128(size
);
10993 putQReg128(dd
, unop(opZHI
, mkexpr(res
)));
10994 updateQCFLAGwithDifferenceZHI(sat1q
, sat1n
, opZHI
);
10995 const HChar
* nm
= isR
? "sqrdmulh" : "sqdmulh";
10996 HChar ch
= size
== X01
? 'h' : 's';
10997 DIP("%s %c%u, %c%u, v%d.%c[%u]\n", nm
, ch
, dd
, ch
, nn
, ch
, (Int
)dd
, ix
);
11001 if (bitU
== 1 && (opcode
== BITS4(1,1,0,1) || opcode
== BITS4(1,1,1,1))) {
11002 /* -------- 0,xx,1101 SQRDMLAH s and h variants only -------- */
11003 /* -------- 0,xx,1111 SQRDMLSH s and h variants only -------- */
11004 UInt mm
= 32; // invalid
11005 UInt ix
= 16; // invalid
11008 return False
; // b case is not allowed
11010 mm
= mmLO4
; ix
= (bitH
<< 2) | (bitL
<< 1) | (bitM
<< 0); break;
11012 mm
= (bitM
<< 4) | mmLO4
; ix
= (bitH
<< 1) | (bitL
<< 0); break;
11014 return False
; // d case is not allowed
11019 vassert(mm
< 32 && ix
< 16);
11020 Bool isAdd
= opcode
== BITS4(1,1,0,1);
11022 IRTemp res
, res_nosat
, vD
, vN
, vM
;
11023 res
= res_nosat
= vD
= vN
= vM
= IRTemp_INVALID
;
11024 newTempsV128_2(&vD
, &vN
);
11025 assign(vD
, getQReg128(dd
));
11026 assign(vN
, getQReg128(nn
));
11027 vM
= math_DUP_VEC_ELEM(getQReg128(mm
), size
, ix
);
11029 math_SQRDMLAH(&res
, &res_nosat
, isAdd
, size
, vD
, vN
, vM
);
11030 IROp opZHI
= mkVecZEROHIxxOFV128(size
);
11031 putQReg128(dd
, unop(opZHI
, mkexpr(res
)));
11032 updateQCFLAGwithDifferenceZHI(res
, res_nosat
, opZHI
);
11034 const HChar
* nm
= isAdd
? "sqrdmlah" : "sqrdmlsh";
11035 HChar ch
= size
== X01
? 'h' : 's';
11036 DIP("%s %c%u, %c%u, v%d.%c[%u]\n", nm
, ch
, dd
, ch
, nn
, ch
, (Int
)dd
, ix
);
11046 Bool
dis_AdvSIMD_shift_by_immediate(/*MB_OUT*/DisResult
* dres
, UInt insn
)
11048 /* 31 28 22 18 15 10 9 4
11049 0 q u 011110 immh immb opcode 1 n d
11050 Decode fields: u,opcode
11052 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11053 if (INSN(31,31) != 0
11054 || INSN(28,23) != BITS6(0,1,1,1,1,0) || INSN(10,10) != 1) {
11057 UInt bitQ
= INSN(30,30);
11058 UInt bitU
= INSN(29,29);
11059 UInt immh
= INSN(22,19);
11060 UInt immb
= INSN(18,16);
11061 UInt opcode
= INSN(15,11);
11062 UInt nn
= INSN(9,5);
11063 UInt dd
= INSN(4,0);
11065 if (opcode
== BITS5(0,0,0,0,0) || opcode
== BITS5(0,0,0,1,0)) {
11066 /* -------- 0,00000 SSHR std7_std7_#imm -------- */
11067 /* -------- 1,00000 USHR std7_std7_#imm -------- */
11068 /* -------- 0,00010 SSRA std7_std7_#imm -------- */
11069 /* -------- 1,00010 USRA std7_std7_#imm -------- */
11070 /* laneTy, shift = case immh:immb of
11071 0001:xxx -> B, SHR:8-xxx
11072 001x:xxx -> H, SHR:16-xxxx
11073 01xx:xxx -> S, SHR:32-xxxxx
11074 1xxx:xxx -> D, SHR:64-xxxxxx
11079 Bool isQ
= bitQ
== 1;
11080 Bool isU
= bitU
== 1;
11081 Bool isAcc
= opcode
== BITS5(0,0,0,1,0);
11082 Bool ok
= getLaneInfo_IMMH_IMMB(&shift
, &size
, immh
, immb
);
11083 if (!ok
|| (bitQ
== 0 && size
== X11
)) return False
;
11084 vassert(size
>= 0 && size
<= 3);
11085 UInt lanebits
= 8 << size
;
11086 vassert(shift
>= 1 && shift
<= lanebits
);
11087 IROp op
= isU
? mkVecSHRN(size
) : mkVecSARN(size
);
11088 IRExpr
* src
= getQReg128(nn
);
11089 IRTemp shf
= newTempV128();
11090 IRTemp res
= newTempV128();
11091 if (shift
== lanebits
&& isU
) {
11092 assign(shf
, mkV128(0x0000));
11095 if (shift
== lanebits
) {
11099 assign(shf
, binop(op
, src
, mkU8(shift
- nudge
)));
11101 assign(res
, isAcc
? binop(mkVecADD(size
), getQReg128(dd
), mkexpr(shf
))
11103 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
11104 HChar laneCh
= "bhsd"[size
];
11105 UInt nLanes
= (isQ
? 128 : 64) / lanebits
;
11106 const HChar
* nm
= isAcc
? (isU
? "usra" : "ssra")
11107 : (isU
? "ushr" : "sshr");
11108 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm
,
11109 nameQReg128(dd
), nLanes
, laneCh
,
11110 nameQReg128(nn
), nLanes
, laneCh
, shift
);
11114 if (opcode
== BITS5(0,0,1,0,0) || opcode
== BITS5(0,0,1,1,0)) {
11115 /* -------- 0,00100 SRSHR std7_std7_#imm -------- */
11116 /* -------- 1,00100 URSHR std7_std7_#imm -------- */
11117 /* -------- 0,00110 SRSRA std7_std7_#imm -------- */
11118 /* -------- 1,00110 URSRA std7_std7_#imm -------- */
11119 /* laneTy, shift = case immh:immb of
11120 0001:xxx -> B, SHR:8-xxx
11121 001x:xxx -> H, SHR:16-xxxx
11122 01xx:xxx -> S, SHR:32-xxxxx
11123 1xxx:xxx -> D, SHR:64-xxxxxx
11128 Bool isQ
= bitQ
== 1;
11129 Bool isU
= bitU
== 1;
11130 Bool isAcc
= opcode
== BITS5(0,0,1,1,0);
11131 Bool ok
= getLaneInfo_IMMH_IMMB(&shift
, &size
, immh
, immb
);
11132 if (!ok
|| (bitQ
== 0 && size
== X11
)) return False
;
11133 vassert(size
>= 0 && size
<= 3);
11134 UInt lanebits
= 8 << size
;
11135 vassert(shift
>= 1 && shift
<= lanebits
);
11136 IROp op
= isU
? mkVecRSHU(size
) : mkVecRSHS(size
);
11137 IRExpr
* src
= getQReg128(nn
);
11138 IRTemp imm8
= newTemp(Ity_I8
);
11139 assign(imm8
, mkU8((UChar
)(-shift
)));
11140 IRExpr
* amt
= mkexpr(math_DUP_TO_V128(imm8
, Ity_I8
));
11141 IRTemp shf
= newTempV128();
11142 IRTemp res
= newTempV128();
11143 assign(shf
, binop(op
, src
, amt
));
11144 assign(res
, isAcc
? binop(mkVecADD(size
), getQReg128(dd
), mkexpr(shf
))
11146 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
11147 HChar laneCh
= "bhsd"[size
];
11148 UInt nLanes
= (isQ
? 128 : 64) / lanebits
;
11149 const HChar
* nm
= isAcc
? (isU
? "ursra" : "srsra")
11150 : (isU
? "urshr" : "srshr");
11151 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm
,
11152 nameQReg128(dd
), nLanes
, laneCh
,
11153 nameQReg128(nn
), nLanes
, laneCh
, shift
);
11157 if (bitU
== 1 && opcode
== BITS5(0,1,0,0,0)) {
11158 /* -------- 1,01000 SRI std7_std7_#imm -------- */
11159 /* laneTy, shift = case immh:immb of
11160 0001:xxx -> B, SHR:8-xxx
11161 001x:xxx -> H, SHR:16-xxxx
11162 01xx:xxx -> S, SHR:32-xxxxx
11163 1xxx:xxx -> D, SHR:64-xxxxxx
11168 Bool isQ
= bitQ
== 1;
11169 Bool ok
= getLaneInfo_IMMH_IMMB(&shift
, &size
, immh
, immb
);
11170 if (!ok
|| (bitQ
== 0 && size
== X11
)) return False
;
11171 vassert(size
>= 0 && size
<= 3);
11172 UInt lanebits
= 8 << size
;
11173 vassert(shift
>= 1 && shift
<= lanebits
);
11174 IRExpr
* src
= getQReg128(nn
);
11175 IRTemp res
= newTempV128();
11176 if (shift
== lanebits
) {
11177 assign(res
, getQReg128(dd
));
11179 assign(res
, binop(mkVecSHRN(size
), src
, mkU8(shift
)));
11180 IRExpr
* nmask
= binop(mkVecSHLN(size
),
11181 mkV128(0xFFFF), mkU8(lanebits
- shift
));
11182 IRTemp tmp
= newTempV128();
11183 assign(tmp
, binop(Iop_OrV128
,
11185 binop(Iop_AndV128
, getQReg128(dd
), nmask
)));
11188 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
11189 HChar laneCh
= "bhsd"[size
];
11190 UInt nLanes
= (isQ
? 128 : 64) / lanebits
;
11191 DIP("%s %s.%u%c, %s.%u%c, #%u\n", "sri",
11192 nameQReg128(dd
), nLanes
, laneCh
,
11193 nameQReg128(nn
), nLanes
, laneCh
, shift
);
11197 if (opcode
== BITS5(0,1,0,1,0)) {
11198 /* -------- 0,01010 SHL std7_std7_#imm -------- */
11199 /* -------- 1,01010 SLI std7_std7_#imm -------- */
11200 /* laneTy, shift = case immh:immb of
11202 001x:xxx -> H, xxxx
11203 01xx:xxx -> S, xxxxx
11204 1xxx:xxx -> D, xxxxxx
11209 Bool isSLI
= bitU
== 1;
11210 Bool isQ
= bitQ
== 1;
11211 Bool ok
= getLaneInfo_IMMH_IMMB(&shift
, &size
, immh
, immb
);
11212 if (!ok
|| (bitQ
== 0 && size
== X11
)) return False
;
11213 vassert(size
>= 0 && size
<= 3);
11214 /* The shift encoding has opposite sign for the leftwards case.
11215 Adjust shift to compensate. */
11216 UInt lanebits
= 8 << size
;
11217 shift
= lanebits
- shift
;
11218 vassert(shift
>= 0 && shift
< lanebits
);
11219 IROp op
= mkVecSHLN(size
);
11220 IRExpr
* src
= getQReg128(nn
);
11221 IRTemp res
= newTempV128();
11225 assign(res
, binop(op
, src
, mkU8(shift
)));
11227 IRExpr
* nmask
= binop(mkVecSHRN(size
),
11228 mkV128(0xFFFF), mkU8(lanebits
- shift
));
11229 IRTemp tmp
= newTempV128();
11230 assign(tmp
, binop(Iop_OrV128
,
11232 binop(Iop_AndV128
, getQReg128(dd
), nmask
)));
11236 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
11237 HChar laneCh
= "bhsd"[size
];
11238 UInt nLanes
= (isQ
? 128 : 64) / lanebits
;
11239 const HChar
* nm
= isSLI
? "sli" : "shl";
11240 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm
,
11241 nameQReg128(dd
), nLanes
, laneCh
,
11242 nameQReg128(nn
), nLanes
, laneCh
, shift
);
11246 if (opcode
== BITS5(0,1,1,1,0)
11247 || (bitU
== 1 && opcode
== BITS5(0,1,1,0,0))) {
11248 /* -------- 0,01110 SQSHL std7_std7_#imm -------- */
11249 /* -------- 1,01110 UQSHL std7_std7_#imm -------- */
11250 /* -------- 1,01100 SQSHLU std7_std7_#imm -------- */
11253 Bool isQ
= bitQ
== 1;
11254 Bool ok
= getLaneInfo_IMMH_IMMB(&shift
, &size
, immh
, immb
);
11255 if (!ok
|| (bitQ
== 0 && size
== X11
)) return False
;
11256 vassert(size
>= 0 && size
<= 3);
11257 /* The shift encoding has opposite sign for the leftwards case.
11258 Adjust shift to compensate. */
11259 UInt lanebits
= 8 << size
;
11260 shift
= lanebits
- shift
;
11261 vassert(shift
>= 0 && shift
< lanebits
);
11262 const HChar
* nm
= NULL
;
11263 /**/ if (bitU
== 0 && opcode
== BITS5(0,1,1,1,0)) nm
= "sqshl";
11264 else if (bitU
== 1 && opcode
== BITS5(0,1,1,1,0)) nm
= "uqshl";
11265 else if (bitU
== 1 && opcode
== BITS5(0,1,1,0,0)) nm
= "sqshlu";
11267 IRTemp qDiff1
= IRTemp_INVALID
;
11268 IRTemp qDiff2
= IRTemp_INVALID
;
11269 IRTemp res
= IRTemp_INVALID
;
11270 IRTemp src
= newTempV128();
11271 assign(src
, getQReg128(nn
));
11272 math_QSHL_IMM(&res
, &qDiff1
, &qDiff2
, src
, size
, shift
, nm
);
11273 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
11274 updateQCFLAGwithDifferenceZHI(qDiff1
, qDiff2
,
11275 isQ
? Iop_INVALID
: Iop_ZeroHI64ofV128
);
11276 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
11277 DIP("%s %s.%s, %s.%s, #%u\n", nm
,
11278 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, shift
);
11283 && (opcode
== BITS5(1,0,0,0,0) || opcode
== BITS5(1,0,0,0,1))) {
11284 /* -------- 0,10000 SHRN{,2} #imm -------- */
11285 /* -------- 0,10001 RSHRN{,2} #imm -------- */
11286 /* Narrows, and size is the narrow size. */
11289 Bool is2
= bitQ
== 1;
11290 Bool isR
= opcode
== BITS5(1,0,0,0,1);
11291 Bool ok
= getLaneInfo_IMMH_IMMB(&shift
, &size
, immh
, immb
);
11292 if (!ok
|| size
== X11
) return False
;
11293 vassert(shift
>= 1);
11294 IRTemp t1
= newTempV128();
11295 IRTemp t2
= newTempV128();
11296 IRTemp t3
= newTempV128();
11297 assign(t1
, getQReg128(nn
));
11298 assign(t2
, isR
? binop(mkVecADD(size
+1),
11300 mkexpr(math_VEC_DUP_IMM(size
+1, 1ULL<<(shift
-1))))
11302 assign(t3
, binop(mkVecSHRN(size
+1), mkexpr(t2
), mkU8(shift
)));
11303 IRTemp t4
= math_NARROW_LANES(t3
, t3
, size
);
11304 putLO64andZUorPutHI64(is2
, dd
, t4
);
11305 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
11306 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
11307 DIP("%s %s.%s, %s.%s, #%u\n", isR
? "rshrn" : "shrn",
11308 nameQReg128(dd
), arrNarrow
, nameQReg128(nn
), arrWide
, shift
);
11312 if (opcode
== BITS5(1,0,0,1,0) || opcode
== BITS5(1,0,0,1,1)
11314 && (opcode
== BITS5(1,0,0,0,0) || opcode
== BITS5(1,0,0,0,1)))) {
11315 /* -------- 0,10010 SQSHRN{,2} #imm -------- */
11316 /* -------- 1,10010 UQSHRN{,2} #imm -------- */
11317 /* -------- 0,10011 SQRSHRN{,2} #imm -------- */
11318 /* -------- 1,10011 UQRSHRN{,2} #imm -------- */
11319 /* -------- 1,10000 SQSHRUN{,2} #imm -------- */
11320 /* -------- 1,10001 SQRSHRUN{,2} #imm -------- */
11323 Bool is2
= bitQ
== 1;
11324 Bool ok
= getLaneInfo_IMMH_IMMB(&shift
, &size
, immh
, immb
);
11325 if (!ok
|| size
== X11
) return False
;
11326 vassert(shift
>= 1 && shift
<= (8 << size
));
11327 const HChar
* nm
= "??";
11328 IROp op
= Iop_INVALID
;
11329 /* Decide on the name and the operation. */
11330 /**/ if (bitU
== 0 && opcode
== BITS5(1,0,0,1,0)) {
11331 nm
= "sqshrn"; op
= mkVecQANDqsarNNARROWSS(size
);
11333 else if (bitU
== 1 && opcode
== BITS5(1,0,0,1,0)) {
11334 nm
= "uqshrn"; op
= mkVecQANDqshrNNARROWUU(size
);
11336 else if (bitU
== 0 && opcode
== BITS5(1,0,0,1,1)) {
11337 nm
= "sqrshrn"; op
= mkVecQANDqrsarNNARROWSS(size
);
11339 else if (bitU
== 1 && opcode
== BITS5(1,0,0,1,1)) {
11340 nm
= "uqrshrn"; op
= mkVecQANDqrshrNNARROWUU(size
);
11342 else if (bitU
== 1 && opcode
== BITS5(1,0,0,0,0)) {
11343 nm
= "sqshrun"; op
= mkVecQANDqsarNNARROWSU(size
);
11345 else if (bitU
== 1 && opcode
== BITS5(1,0,0,0,1)) {
11346 nm
= "sqrshrun"; op
= mkVecQANDqrsarNNARROWSU(size
);
11349 /* Compute the result (Q, shifted value) pair. */
11350 IRTemp src128
= newTempV128();
11351 assign(src128
, getQReg128(nn
));
11352 IRTemp pair
= newTempV128();
11353 assign(pair
, binop(op
, mkexpr(src128
), mkU8(shift
)));
11354 /* Update the result reg */
11355 IRTemp res64in128
= newTempV128();
11356 assign(res64in128
, unop(Iop_ZeroHI64ofV128
, mkexpr(pair
)));
11357 putLO64andZUorPutHI64(is2
, dd
, res64in128
);
11358 /* Update the Q flag. */
11359 IRTemp q64q64
= newTempV128();
11360 assign(q64q64
, binop(Iop_InterleaveHI64x2
, mkexpr(pair
), mkexpr(pair
)));
11361 IRTemp z128
= newTempV128();
11362 assign(z128
, mkV128(0x0000));
11363 updateQCFLAGwithDifference(q64q64
, z128
);
11365 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
11366 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
11367 DIP("%s %s.%s, %s.%s, #%u\n", nm
,
11368 nameQReg128(dd
), arrNarrow
, nameQReg128(nn
), arrWide
, shift
);
11372 if (opcode
== BITS5(1,0,1,0,0)) {
11373 /* -------- 0,10100 SSHLL{,2} #imm -------- */
11374 /* -------- 1,10100 USHLL{,2} #imm -------- */
11375 /* 31 28 22 18 15 9 4
11376 0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh
11377 0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh
11379 = case immh of 1xxx -> invalid
11380 01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31)
11381 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15)
11382 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7)
11383 0000 -> AdvSIMD modified immediate (???)
11385 Bool isQ
= bitQ
== 1;
11386 Bool isU
= bitU
== 1;
11387 UInt immhb
= (immh
<< 3) | immb
;
11388 IRTemp src
= newTempV128();
11389 IRTemp zero
= newTempV128();
11390 IRExpr
* res
= NULL
;
11392 const HChar
* ta
= "??";
11393 const HChar
* tb
= "??";
11394 assign(src
, getQReg128(nn
));
11395 assign(zero
, mkV128(0x0000));
11397 /* invalid; don't assign to res */
11399 else if (immh
& 4) {
11401 vassert(sh
< 32); /* so 32-sh is 1..32 */
11403 tb
= isQ
? "4s" : "2s";
11404 IRExpr
* tmp
= isQ
? mk_InterleaveHI32x4(src
, zero
)
11405 : mk_InterleaveLO32x4(src
, zero
);
11406 res
= binop(isU
? Iop_ShrN64x2
: Iop_SarN64x2
, tmp
, mkU8(32-sh
));
11408 else if (immh
& 2) {
11410 vassert(sh
< 16); /* so 16-sh is 1..16 */
11412 tb
= isQ
? "8h" : "4h";
11413 IRExpr
* tmp
= isQ
? mk_InterleaveHI16x8(src
, zero
)
11414 : mk_InterleaveLO16x8(src
, zero
);
11415 res
= binop(isU
? Iop_ShrN32x4
: Iop_SarN32x4
, tmp
, mkU8(16-sh
));
11417 else if (immh
& 1) {
11419 vassert(sh
< 8); /* so 8-sh is 1..8 */
11421 tb
= isQ
? "16b" : "8b";
11422 IRExpr
* tmp
= isQ
? mk_InterleaveHI8x16(src
, zero
)
11423 : mk_InterleaveLO8x16(src
, zero
);
11424 res
= binop(isU
? Iop_ShrN16x8
: Iop_SarN16x8
, tmp
, mkU8(8-sh
));
11426 vassert(immh
== 0);
11427 /* invalid; don't assign to res */
11431 putQReg128(dd
, res
);
11432 DIP("%cshll%s %s.%s, %s.%s, #%u\n",
11433 isU
? 'u' : 's', isQ
? "2" : "",
11434 nameQReg128(dd
), ta
, nameQReg128(nn
), tb
, sh
);
11440 if (opcode
== BITS5(1,1,1,0,0)) {
11441 /* -------- 0,11100 SCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
11442 /* -------- 1,11100 UCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
11443 /* If immh is of the form 00xx, the insn is invalid. */
11444 if (immh
< BITS4(0,1,0,0)) return False
;
11447 Bool ok
= getLaneInfo_IMMH_IMMB(&fbits
, &size
, immh
, immb
);
11448 /* The following holds because immh is never zero. */
11450 /* The following holds because immh >= 0100. */
11451 vassert(size
== X10
|| size
== X11
);
11452 Bool isD
= size
== X11
;
11453 Bool isU
= bitU
== 1;
11454 Bool isQ
= bitQ
== 1;
11455 if (isD
&& !isQ
) return False
; /* reject .1d case */
11456 vassert(fbits
>= 1 && fbits
<= (isD
? 64 : 32));
11457 Double scale
= two_to_the_minus(fbits
);
11458 IRExpr
* scaleE
= isD
? IRExpr_Const(IRConst_F64(scale
))
11459 : IRExpr_Const(IRConst_F32( (Float
)scale
));
11460 IROp opMUL
= isD
? Iop_MulF64
: Iop_MulF32
;
11461 IROp opCVT
= isU
? (isD
? Iop_I64UtoF64
: Iop_I32UtoF32
)
11462 : (isD
? Iop_I64StoF64
: Iop_I32StoF32
);
11463 IRType tyF
= isD
? Ity_F64
: Ity_F32
;
11464 IRType tyI
= isD
? Ity_I64
: Ity_I32
;
11465 UInt nLanes
= (isQ
? 2 : 1) * (isD
? 1 : 2);
11466 vassert(nLanes
== 2 || nLanes
== 4);
11467 for (UInt i
= 0; i
< nLanes
; i
++) {
11468 IRTemp src
= newTemp(tyI
);
11469 IRTemp res
= newTemp(tyF
);
11470 IRTemp rm
= mk_get_IR_rounding_mode();
11471 assign(src
, getQRegLane(nn
, i
, tyI
));
11472 assign(res
, triop(opMUL
, mkexpr(rm
),
11473 binop(opCVT
, mkexpr(rm
), mkexpr(src
)),
11475 putQRegLane(dd
, i
, mkexpr(res
));
11478 putQRegLane(dd
, 1, mkU64(0));
11480 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
11481 DIP("%s %s.%s, %s.%s, #%u\n", isU
? "ucvtf" : "scvtf",
11482 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, fbits
);
11486 if (opcode
== BITS5(1,1,1,1,1)) {
11487 /* -------- 0,11111 FCVTZS {2d_2d,4s_4s,2s_2s}_imm -------- */
11488 /* -------- 1,11111 FCVTZU {2d_2d,4s_4s,2s_2s}_imm -------- */
11489 /* If immh is of the form 00xx, the insn is invalid. */
11490 if (immh
< BITS4(0,1,0,0)) return False
;
11493 Bool ok
= getLaneInfo_IMMH_IMMB(&fbits
, &size
, immh
, immb
);
11494 /* The following holds because immh is never zero. */
11496 /* The following holds because immh >= 0100. */
11497 vassert(size
== X10
|| size
== X11
);
11498 Bool isD
= size
== X11
;
11499 Bool isU
= bitU
== 1;
11500 Bool isQ
= bitQ
== 1;
11501 if (isD
&& !isQ
) return False
; /* reject .1d case */
11502 vassert(fbits
>= 1 && fbits
<= (isD
? 64 : 32));
11503 Double scale
= two_to_the_plus(fbits
);
11504 IRExpr
* scaleE
= isD
? IRExpr_Const(IRConst_F64(scale
))
11505 : IRExpr_Const(IRConst_F32( (Float
)scale
));
11506 IROp opMUL
= isD
? Iop_MulF64
: Iop_MulF32
;
11507 IROp opCVT
= isU
? (isD
? Iop_F64toI64U
: Iop_F32toI32U
)
11508 : (isD
? Iop_F64toI64S
: Iop_F32toI32S
);
11509 IRType tyF
= isD
? Ity_F64
: Ity_F32
;
11510 IRType tyI
= isD
? Ity_I64
: Ity_I32
;
11511 UInt nLanes
= (isQ
? 2 : 1) * (isD
? 1 : 2);
11512 vassert(nLanes
== 2 || nLanes
== 4);
11513 for (UInt i
= 0; i
< nLanes
; i
++) {
11514 IRTemp src
= newTemp(tyF
);
11515 IRTemp res
= newTemp(tyI
);
11516 IRTemp rm
= newTemp(Ity_I32
);
11517 assign(src
, getQRegLane(nn
, i
, tyF
));
11518 assign(rm
, mkU32(Irrm_ZERO
));
11519 assign(res
, binop(opCVT
, mkexpr(rm
),
11520 triop(opMUL
, mkexpr(rm
),
11521 mkexpr(src
), scaleE
)));
11522 putQRegLane(dd
, i
, mkexpr(res
));
11525 putQRegLane(dd
, 1, mkU64(0));
11527 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
11528 DIP("%s %s.%s, %s.%s, #%u\n", isU
? "fcvtzu" : "fcvtzs",
11529 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, fbits
);
11533 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11540 Bool
dis_AdvSIMD_three_different(/*MB_OUT*/DisResult
* dres
, UInt insn
)
11542 /* 31 30 29 28 23 21 20 15 11 9 4
11543 0 Q U 01110 size 1 m opcode 00 n d
11544 Decode fields: u,opcode
11546 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11547 if (INSN(31,31) != 0
11548 || INSN(28,24) != BITS5(0,1,1,1,0)
11549 || INSN(21,21) != 1
11550 || INSN(11,10) != BITS2(0,0)) {
11553 UInt bitQ
= INSN(30,30);
11554 UInt bitU
= INSN(29,29);
11555 UInt size
= INSN(23,22);
11556 UInt mm
= INSN(20,16);
11557 UInt opcode
= INSN(15,12);
11558 UInt nn
= INSN(9,5);
11559 UInt dd
= INSN(4,0);
11561 Bool is2
= bitQ
== 1;
11563 if (opcode
== BITS4(0,0,0,0) || opcode
== BITS4(0,0,1,0)) {
11564 /* -------- 0,0000 SADDL{2} -------- */
11565 /* -------- 1,0000 UADDL{2} -------- */
11566 /* -------- 0,0010 SSUBL{2} -------- */
11567 /* -------- 1,0010 USUBL{2} -------- */
11568 /* Widens, and size refers to the narrow lanes. */
11569 if (size
== X11
) return False
;
11570 vassert(size
<= 2);
11571 Bool isU
= bitU
== 1;
11572 Bool isADD
= opcode
== BITS4(0,0,0,0);
11573 IRTemp argL
= math_WIDEN_LO_OR_HI_LANES(isU
, is2
, size
, getQReg128(nn
));
11574 IRTemp argR
= math_WIDEN_LO_OR_HI_LANES(isU
, is2
, size
, getQReg128(mm
));
11575 IRTemp res
= newTempV128();
11576 assign(res
, binop(isADD
? mkVecADD(size
+1) : mkVecSUB(size
+1),
11577 mkexpr(argL
), mkexpr(argR
)));
11578 putQReg128(dd
, mkexpr(res
));
11579 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
11580 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
11581 const HChar
* nm
= isADD
? (isU
? "uaddl" : "saddl")
11582 : (isU
? "usubl" : "ssubl");
11583 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm
, is2
? "2" : "",
11584 nameQReg128(dd
), arrWide
,
11585 nameQReg128(nn
), arrNarrow
, nameQReg128(mm
), arrNarrow
);
11589 if (opcode
== BITS4(0,0,0,1) || opcode
== BITS4(0,0,1,1)) {
11590 /* -------- 0,0001 SADDW{2} -------- */
11591 /* -------- 1,0001 UADDW{2} -------- */
11592 /* -------- 0,0011 SSUBW{2} -------- */
11593 /* -------- 1,0011 USUBW{2} -------- */
11594 /* Widens, and size refers to the narrow lanes. */
11595 if (size
== X11
) return False
;
11596 vassert(size
<= 2);
11597 Bool isU
= bitU
== 1;
11598 Bool isADD
= opcode
== BITS4(0,0,0,1);
11599 IRTemp argR
= math_WIDEN_LO_OR_HI_LANES(isU
, is2
, size
, getQReg128(mm
));
11600 IRTemp res
= newTempV128();
11601 assign(res
, binop(isADD
? mkVecADD(size
+1) : mkVecSUB(size
+1),
11602 getQReg128(nn
), mkexpr(argR
)));
11603 putQReg128(dd
, mkexpr(res
));
11604 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
11605 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
11606 const HChar
* nm
= isADD
? (isU
? "uaddw" : "saddw")
11607 : (isU
? "usubw" : "ssubw");
11608 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm
, is2
? "2" : "",
11609 nameQReg128(dd
), arrWide
,
11610 nameQReg128(nn
), arrWide
, nameQReg128(mm
), arrNarrow
);
11614 if (opcode
== BITS4(0,1,0,0) || opcode
== BITS4(0,1,1,0)) {
11615 /* -------- 0,0100 ADDHN{2} -------- */
11616 /* -------- 1,0100 RADDHN{2} -------- */
11617 /* -------- 0,0110 SUBHN{2} -------- */
11618 /* -------- 1,0110 RSUBHN{2} -------- */
11619 /* Narrows, and size refers to the narrowed lanes. */
11620 if (size
== X11
) return False
;
11621 vassert(size
<= 2);
11622 const UInt shift
[3] = { 8, 16, 32 };
11623 Bool isADD
= opcode
== BITS4(0,1,0,0);
11624 Bool isR
= bitU
== 1;
11625 /* Combined elements in wide lanes */
11626 IRTemp wide
= newTempV128();
11627 IRExpr
* wideE
= binop(isADD
? mkVecADD(size
+1) : mkVecSUB(size
+1),
11628 getQReg128(nn
), getQReg128(mm
));
11630 wideE
= binop(mkVecADD(size
+1),
11632 mkexpr(math_VEC_DUP_IMM(size
+1,
11633 1ULL << (shift
[size
]-1))));
11635 assign(wide
, wideE
);
11636 /* Top halves of elements, still in wide lanes */
11637 IRTemp shrd
= newTempV128();
11638 assign(shrd
, binop(mkVecSHRN(size
+1), mkexpr(wide
), mkU8(shift
[size
])));
11639 /* Elements now compacted into lower 64 bits */
11640 IRTemp new64
= newTempV128();
11641 assign(new64
, binop(mkVecCATEVENLANES(size
), mkexpr(shrd
), mkexpr(shrd
)));
11642 putLO64andZUorPutHI64(is2
, dd
, new64
);
11643 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
11644 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
11645 const HChar
* nm
= isADD
? (isR
? "raddhn" : "addhn")
11646 : (isR
? "rsubhn" : "subhn");
11647 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm
, is2
? "2" : "",
11648 nameQReg128(dd
), arrNarrow
,
11649 nameQReg128(nn
), arrWide
, nameQReg128(mm
), arrWide
);
11653 if (opcode
== BITS4(0,1,0,1) || opcode
== BITS4(0,1,1,1)) {
11654 /* -------- 0,0101 SABAL{2} -------- */
11655 /* -------- 1,0101 UABAL{2} -------- */
11656 /* -------- 0,0111 SABDL{2} -------- */
11657 /* -------- 1,0111 UABDL{2} -------- */
11658 /* Widens, and size refers to the narrow lanes. */
11659 if (size
== X11
) return False
;
11660 vassert(size
<= 2);
11661 Bool isU
= bitU
== 1;
11662 Bool isACC
= opcode
== BITS4(0,1,0,1);
11663 IRTemp argL
= math_WIDEN_LO_OR_HI_LANES(isU
, is2
, size
, getQReg128(nn
));
11664 IRTemp argR
= math_WIDEN_LO_OR_HI_LANES(isU
, is2
, size
, getQReg128(mm
));
11665 IRTemp abd
= math_ABD(isU
, size
+1, mkexpr(argL
), mkexpr(argR
));
11666 IRTemp res
= newTempV128();
11667 assign(res
, isACC
? binop(mkVecADD(size
+1), mkexpr(abd
), getQReg128(dd
))
11669 putQReg128(dd
, mkexpr(res
));
11670 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
11671 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
11672 const HChar
* nm
= isACC
? (isU
? "uabal" : "sabal")
11673 : (isU
? "uabdl" : "sabdl");
11674 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm
, is2
? "2" : "",
11675 nameQReg128(dd
), arrWide
,
11676 nameQReg128(nn
), arrNarrow
, nameQReg128(mm
), arrNarrow
);
11680 if (opcode
== BITS4(1,1,0,0)
11681 || opcode
== BITS4(1,0,0,0) || opcode
== BITS4(1,0,1,0)) {
11682 /* -------- 0,1100 SMULL{2} -------- */ // 0 (ks)
11683 /* -------- 1,1100 UMULL{2} -------- */ // 0
11684 /* -------- 0,1000 SMLAL{2} -------- */ // 1
11685 /* -------- 1,1000 UMLAL{2} -------- */ // 1
11686 /* -------- 0,1010 SMLSL{2} -------- */ // 2
11687 /* -------- 1,1010 UMLSL{2} -------- */ // 2
11688 /* Widens, and size refers to the narrow lanes. */
11691 case BITS4(1,1,0,0): ks
= 0; break;
11692 case BITS4(1,0,0,0): ks
= 1; break;
11693 case BITS4(1,0,1,0): ks
= 2; break;
11694 default: vassert(0);
11696 vassert(ks
>= 0 && ks
<= 2);
11697 if (size
== X11
) return False
;
11698 vassert(size
<= 2);
11699 Bool isU
= bitU
== 1;
11700 IRTemp vecN
= newTempV128();
11701 IRTemp vecM
= newTempV128();
11702 IRTemp vecD
= newTempV128();
11703 assign(vecN
, getQReg128(nn
));
11704 assign(vecM
, getQReg128(mm
));
11705 assign(vecD
, getQReg128(dd
));
11706 IRTemp res
= IRTemp_INVALID
;
11707 math_MULL_ACC(&res
, is2
, isU
, size
, "mas"[ks
],
11708 vecN
, vecM
, ks
== 0 ? IRTemp_INVALID
: vecD
);
11709 putQReg128(dd
, mkexpr(res
));
11710 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
11711 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
11712 const HChar
* nm
= ks
== 0 ? "mull" : (ks
== 1 ? "mlal" : "mlsl");
11713 DIP("%c%s%s %s.%s, %s.%s, %s.%s\n", isU
? 'u' : 's', nm
, is2
? "2" : "",
11714 nameQReg128(dd
), arrWide
,
11715 nameQReg128(nn
), arrNarrow
, nameQReg128(mm
), arrNarrow
);
11720 && (opcode
== BITS4(1,1,0,1)
11721 || opcode
== BITS4(1,0,0,1) || opcode
== BITS4(1,0,1,1))) {
11722 /* -------- 0,1101 SQDMULL{2} -------- */ // 0 (ks)
11723 /* -------- 0,1001 SQDMLAL{2} -------- */ // 1
11724 /* -------- 0,1011 SQDMLSL{2} -------- */ // 2
11725 /* Widens, and size refers to the narrow lanes. */
11728 case BITS4(1,1,0,1): ks
= 0; break;
11729 case BITS4(1,0,0,1): ks
= 1; break;
11730 case BITS4(1,0,1,1): ks
= 2; break;
11731 default: vassert(0);
11733 vassert(ks
>= 0 && ks
<= 2);
11734 if (size
== X00
|| size
== X11
) return False
;
11735 vassert(size
<= 2);
11736 IRTemp vecN
, vecM
, vecD
, res
, sat1q
, sat1n
, sat2q
, sat2n
;
11737 vecN
= vecM
= vecD
= res
= sat1q
= sat1n
= sat2q
= sat2n
= IRTemp_INVALID
;
11738 newTempsV128_3(&vecN
, &vecM
, &vecD
);
11739 assign(vecN
, getQReg128(nn
));
11740 assign(vecM
, getQReg128(mm
));
11741 assign(vecD
, getQReg128(dd
));
11742 math_SQDMULL_ACC(&res
, &sat1q
, &sat1n
, &sat2q
, &sat2n
,
11743 is2
, size
, "mas"[ks
],
11744 vecN
, vecM
, ks
== 0 ? IRTemp_INVALID
: vecD
);
11745 putQReg128(dd
, mkexpr(res
));
11746 vassert(sat1q
!= IRTemp_INVALID
&& sat1n
!= IRTemp_INVALID
);
11747 updateQCFLAGwithDifference(sat1q
, sat1n
);
11748 if (sat2q
!= IRTemp_INVALID
|| sat2n
!= IRTemp_INVALID
) {
11749 updateQCFLAGwithDifference(sat2q
, sat2n
);
11751 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
11752 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
11753 const HChar
* nm
= ks
== 0 ? "sqdmull"
11754 : (ks
== 1 ? "sqdmlal" : "sqdmlsl");
11755 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm
, is2
? "2" : "",
11756 nameQReg128(dd
), arrWide
,
11757 nameQReg128(nn
), arrNarrow
, nameQReg128(mm
), arrNarrow
);
11761 if (bitU
== 0 && opcode
== BITS4(1,1,1,0)) {
11762 /* -------- 0,1110 PMULL{2} -------- */
11763 /* Widens, and size refers to the narrow lanes. */
11764 if (size
!= X00
&& size
!= X11
) return False
;
11765 IRTemp res
= IRTemp_INVALID
;
11766 IRExpr
* srcN
= getQReg128(nn
);
11767 IRExpr
* srcM
= getQReg128(mm
);
11768 const HChar
* arrNarrow
= NULL
;
11769 const HChar
* arrWide
= NULL
;
11771 res
= math_BINARY_WIDENING_V128(is2
, Iop_PolynomialMull8x8
,
11773 arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
11774 arrWide
= nameArr_Q_SZ(1, size
+1);
11776 /* The same thing as the X00 case, except we have to call
11777 a helper to do it. */
11778 vassert(size
== X11
);
11779 res
= newTemp(Ity_V128
);
11781 = is2
? Iop_V128HIto64
: Iop_V128to64
;
11783 = mkIRExprVec_3( IRExpr_VECRET(),
11784 unop(slice
, srcN
), unop(slice
, srcM
));
11786 = unsafeIRDirty_1_N( res
, 0/*regparms*/,
11787 "arm64g_dirtyhelper_PMULLQ",
11788 &arm64g_dirtyhelper_PMULLQ
, args
);
11789 stmt(IRStmt_Dirty(di
));
11790 /* We can't use nameArr_Q_SZ for this because it can't deal with
11791 Q-sized (128 bit) results. Hence do it by hand. */
11792 arrNarrow
= bitQ
== 0 ? "1d" : "2d";
11795 putQReg128(dd
, mkexpr(res
));
11796 DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2
? "2" : "",
11797 nameQReg128(dd
), arrWide
,
11798 nameQReg128(nn
), arrNarrow
, nameQReg128(mm
), arrNarrow
);
11808 Bool
dis_AdvSIMD_three_same(/*MB_OUT*/DisResult
* dres
, UInt insn
)
11810 /* 31 30 29 28 23 21 20 15 10 9 4
11811 0 Q U 01110 size 1 m opcode 1 n d
11812 Decode fields: u,size,opcode
11814 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11815 if (INSN(31,31) != 0
11816 || INSN(28,24) != BITS5(0,1,1,1,0)
11817 || INSN(21,21) != 1
11818 || INSN(10,10) != 1) {
11821 UInt bitQ
= INSN(30,30);
11822 UInt bitU
= INSN(29,29);
11823 UInt size
= INSN(23,22);
11824 UInt mm
= INSN(20,16);
11825 UInt opcode
= INSN(15,11);
11826 UInt nn
= INSN(9,5);
11827 UInt dd
= INSN(4,0);
11830 if (opcode
== BITS5(0,0,0,0,0) || opcode
== BITS5(0,0,1,0,0)) {
11831 /* -------- 0,xx,00000 SHADD std6_std6_std6 -------- */
11832 /* -------- 1,xx,00000 UHADD std6_std6_std6 -------- */
11833 /* -------- 0,xx,00100 SHSUB std6_std6_std6 -------- */
11834 /* -------- 1,xx,00100 UHSUB std6_std6_std6 -------- */
11835 if (size
== X11
) return False
;
11836 Bool isADD
= opcode
== BITS5(0,0,0,0,0);
11837 Bool isU
= bitU
== 1;
11838 /* Widen both args out, do the math, narrow to final result. */
11839 IRTemp argL
= newTempV128();
11840 IRTemp argLhi
= IRTemp_INVALID
;
11841 IRTemp argLlo
= IRTemp_INVALID
;
11842 IRTemp argR
= newTempV128();
11843 IRTemp argRhi
= IRTemp_INVALID
;
11844 IRTemp argRlo
= IRTemp_INVALID
;
11845 IRTemp resHi
= newTempV128();
11846 IRTemp resLo
= newTempV128();
11847 IRTemp res
= IRTemp_INVALID
;
11848 assign(argL
, getQReg128(nn
));
11849 argLlo
= math_WIDEN_LO_OR_HI_LANES(isU
, False
, size
, mkexpr(argL
));
11850 argLhi
= math_WIDEN_LO_OR_HI_LANES(isU
, True
, size
, mkexpr(argL
));
11851 assign(argR
, getQReg128(mm
));
11852 argRlo
= math_WIDEN_LO_OR_HI_LANES(isU
, False
, size
, mkexpr(argR
));
11853 argRhi
= math_WIDEN_LO_OR_HI_LANES(isU
, True
, size
, mkexpr(argR
));
11854 IROp opADDSUB
= isADD
? mkVecADD(size
+1) : mkVecSUB(size
+1);
11855 IROp opSxR
= isU
? mkVecSHRN(size
+1) : mkVecSARN(size
+1);
11856 assign(resHi
, binop(opSxR
,
11857 binop(opADDSUB
, mkexpr(argLhi
), mkexpr(argRhi
)),
11859 assign(resLo
, binop(opSxR
,
11860 binop(opADDSUB
, mkexpr(argLlo
), mkexpr(argRlo
)),
11862 res
= math_NARROW_LANES ( resHi
, resLo
, size
);
11863 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
11864 const HChar
* nm
= isADD
? (isU
? "uhadd" : "shadd")
11865 : (isU
? "uhsub" : "shsub");
11866 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
11867 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
11868 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
11872 if (opcode
== BITS5(0,0,0,1,0)) {
11873 /* -------- 0,xx,00010 SRHADD std7_std7_std7 -------- */
11874 /* -------- 1,xx,00010 URHADD std7_std7_std7 -------- */
11875 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
11876 Bool isU
= bitU
== 1;
11877 IRTemp argL
= newTempV128();
11878 IRTemp argR
= newTempV128();
11879 assign(argL
, getQReg128(nn
));
11880 assign(argR
, getQReg128(mm
));
11881 IRTemp res
= math_RHADD(size
, isU
, argL
, argR
);
11882 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
11883 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
11884 DIP("%s %s.%s, %s.%s, %s.%s\n", isU
? "urhadd" : "srhadd",
11885 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
11889 if (opcode
== BITS5(0,0,0,0,1) || opcode
== BITS5(0,0,1,0,1)) {
11890 /* -------- 0,xx,00001 SQADD std7_std7_std7 -------- */
11891 /* -------- 1,xx,00001 UQADD std7_std7_std7 -------- */
11892 /* -------- 0,xx,00101 SQSUB std7_std7_std7 -------- */
11893 /* -------- 1,xx,00101 UQSUB std7_std7_std7 -------- */
11894 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
11895 Bool isADD
= opcode
== BITS5(0,0,0,0,1);
11896 Bool isU
= bitU
== 1;
11897 IROp qop
= Iop_INVALID
;
11898 IROp nop
= Iop_INVALID
;
11900 qop
= isU
? mkVecQADDU(size
) : mkVecQADDS(size
);
11901 nop
= mkVecADD(size
);
11903 qop
= isU
? mkVecQSUBU(size
) : mkVecQSUBS(size
);
11904 nop
= mkVecSUB(size
);
11906 IRTemp argL
= newTempV128();
11907 IRTemp argR
= newTempV128();
11908 IRTemp qres
= newTempV128();
11909 IRTemp nres
= newTempV128();
11910 assign(argL
, getQReg128(nn
));
11911 assign(argR
, getQReg128(mm
));
11912 assign(qres
, math_MAYBE_ZERO_HI64_fromE(
11913 bitQ
, binop(qop
, mkexpr(argL
), mkexpr(argR
))));
11914 assign(nres
, math_MAYBE_ZERO_HI64_fromE(
11915 bitQ
, binop(nop
, mkexpr(argL
), mkexpr(argR
))));
11916 putQReg128(dd
, mkexpr(qres
));
11917 updateQCFLAGwithDifference(qres
, nres
);
11918 const HChar
* nm
= isADD
? (isU
? "uqadd" : "sqadd")
11919 : (isU
? "uqsub" : "sqsub");
11920 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
11921 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
11922 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
11926 if (bitU
== 0 && opcode
== BITS5(0,0,0,1,1)) {
11927 /* -------- 0,00,00011 AND 16b_16b_16b, 8b_8b_8b -------- */
11928 /* -------- 0,01,00011 BIC 16b_16b_16b, 8b_8b_8b -------- */
11929 /* -------- 0,10,00011 ORR 16b_16b_16b, 8b_8b_8b -------- */
11930 /* -------- 0,10,00011 ORN 16b_16b_16b, 8b_8b_8b -------- */
11931 Bool isORx
= (size
& 2) == 2;
11932 Bool invert
= (size
& 1) == 1;
11933 IRTemp res
= newTempV128();
11934 assign(res
, binop(isORx
? Iop_OrV128
: Iop_AndV128
,
11936 invert
? unop(Iop_NotV128
, getQReg128(mm
))
11937 : getQReg128(mm
)));
11938 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
11939 const HChar
* names
[4] = { "and", "bic", "orr", "orn" };
11940 const HChar
* ar
= bitQ
== 1 ? "16b" : "8b";
11941 DIP("%s %s.%s, %s.%s, %s.%s\n", names
[INSN(23,22)],
11942 nameQReg128(dd
), ar
, nameQReg128(nn
), ar
, nameQReg128(mm
), ar
);
11946 if (bitU
== 1 && opcode
== BITS5(0,0,0,1,1)) {
11947 /* -------- 1,00,00011 EOR 16b_16b_16b, 8b_8b_8b -------- */
11948 /* -------- 1,01,00011 BSL 16b_16b_16b, 8b_8b_8b -------- */
11949 /* -------- 1,10,00011 BIT 16b_16b_16b, 8b_8b_8b -------- */
11950 /* -------- 1,10,00011 BIF 16b_16b_16b, 8b_8b_8b -------- */
11951 IRTemp argD
= newTempV128();
11952 IRTemp argN
= newTempV128();
11953 IRTemp argM
= newTempV128();
11954 assign(argD
, getQReg128(dd
));
11955 assign(argN
, getQReg128(nn
));
11956 assign(argM
, getQReg128(mm
));
11957 const IROp opXOR
= Iop_XorV128
;
11958 const IROp opAND
= Iop_AndV128
;
11959 const IROp opNOT
= Iop_NotV128
;
11960 IRTemp res
= newTempV128();
11962 case BITS2(0,0): /* EOR */
11963 assign(res
, binop(opXOR
, mkexpr(argM
), mkexpr(argN
)));
11965 case BITS2(0,1): /* BSL */
11966 assign(res
, binop(opXOR
, mkexpr(argM
),
11968 binop(opXOR
, mkexpr(argM
), mkexpr(argN
)),
11971 case BITS2(1,0): /* BIT */
11972 assign(res
, binop(opXOR
, mkexpr(argD
),
11974 binop(opXOR
, mkexpr(argD
), mkexpr(argN
)),
11977 case BITS2(1,1): /* BIF */
11978 assign(res
, binop(opXOR
, mkexpr(argD
),
11980 binop(opXOR
, mkexpr(argD
), mkexpr(argN
)),
11981 unop(opNOT
, mkexpr(argM
)))));
11986 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
11987 const HChar
* nms
[4] = { "eor", "bsl", "bit", "bif" };
11988 const HChar
* arr
= bitQ
== 1 ? "16b" : "8b";
11989 DIP("%s %s.%s, %s.%s, %s.%s\n", nms
[size
],
11990 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
11994 if (opcode
== BITS5(0,0,1,1,0)) {
11995 /* -------- 0,xx,00110 CMGT std7_std7_std7 -------- */ // >s
11996 /* -------- 1,xx,00110 CMHI std7_std7_std7 -------- */ // >u
11997 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
11998 Bool isGT
= bitU
== 0;
11999 IRExpr
* argL
= getQReg128(nn
);
12000 IRExpr
* argR
= getQReg128(mm
);
12001 IRTemp res
= newTempV128();
12003 isGT
? binop(mkVecCMPGTS(size
), argL
, argR
)
12004 : binop(mkVecCMPGTU(size
), argL
, argR
));
12005 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12006 const HChar
* nm
= isGT
? "cmgt" : "cmhi";
12007 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12008 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
12009 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12013 if (opcode
== BITS5(0,0,1,1,1)) {
12014 /* -------- 0,xx,00111 CMGE std7_std7_std7 -------- */ // >=s
12015 /* -------- 1,xx,00111 CMHS std7_std7_std7 -------- */ // >=u
12016 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12017 Bool isGE
= bitU
== 0;
12018 IRExpr
* argL
= getQReg128(nn
);
12019 IRExpr
* argR
= getQReg128(mm
);
12020 IRTemp res
= newTempV128();
12022 isGE
? unop(Iop_NotV128
, binop(mkVecCMPGTS(size
), argR
, argL
))
12023 : unop(Iop_NotV128
, binop(mkVecCMPGTU(size
), argR
, argL
)));
12024 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12025 const HChar
* nm
= isGE
? "cmge" : "cmhs";
12026 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12027 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
12028 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12032 if (opcode
== BITS5(0,1,0,0,0) || opcode
== BITS5(0,1,0,1,0)) {
12033 /* -------- 0,xx,01000 SSHL std7_std7_std7 -------- */
12034 /* -------- 0,xx,01010 SRSHL std7_std7_std7 -------- */
12035 /* -------- 1,xx,01000 USHL std7_std7_std7 -------- */
12036 /* -------- 1,xx,01010 URSHL std7_std7_std7 -------- */
12037 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12038 Bool isU
= bitU
== 1;
12039 Bool isR
= opcode
== BITS5(0,1,0,1,0);
12040 IROp op
= isR
? (isU
? mkVecRSHU(size
) : mkVecRSHS(size
))
12041 : (isU
? mkVecSHU(size
) : mkVecSHS(size
));
12042 IRTemp res
= newTempV128();
12043 assign(res
, binop(op
, getQReg128(nn
), getQReg128(mm
)));
12044 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12045 const HChar
* nm
= isR
? (isU
? "urshl" : "srshl")
12046 : (isU
? "ushl" : "sshl");
12047 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12048 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
12049 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12053 if (opcode
== BITS5(0,1,0,0,1) || opcode
== BITS5(0,1,0,1,1)) {
12054 /* -------- 0,xx,01001 SQSHL std7_std7_std7 -------- */
12055 /* -------- 0,xx,01011 SQRSHL std7_std7_std7 -------- */
12056 /* -------- 1,xx,01001 UQSHL std7_std7_std7 -------- */
12057 /* -------- 1,xx,01011 UQRSHL std7_std7_std7 -------- */
12058 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12059 Bool isU
= bitU
== 1;
12060 Bool isR
= opcode
== BITS5(0,1,0,1,1);
12061 IROp op
= isR
? (isU
? mkVecQANDUQRSH(size
) : mkVecQANDSQRSH(size
))
12062 : (isU
? mkVecQANDUQSH(size
) : mkVecQANDSQSH(size
));
12063 /* This is a bit tricky. If we're only interested in the lowest 64 bits
12064 of the result (viz, bitQ == 0), then we must adjust the operands to
12065 ensure that the upper part of the result, that we don't care about,
12066 doesn't pollute the returned Q value. To do this, zero out the upper
12067 operand halves beforehand. This works because it means, for the
12068 lanes we don't care about, we are shifting zero by zero, which can
12070 IRTemp res256
= newTemp(Ity_V256
);
12071 IRTemp resSH
= newTempV128();
12072 IRTemp resQ
= newTempV128();
12073 IRTemp zero
= newTempV128();
12074 assign(res256
, binop(op
,
12075 math_MAYBE_ZERO_HI64_fromE(bitQ
, getQReg128(nn
)),
12076 math_MAYBE_ZERO_HI64_fromE(bitQ
, getQReg128(mm
))));
12077 assign(resSH
, unop(Iop_V256toV128_0
, mkexpr(res256
)));
12078 assign(resQ
, unop(Iop_V256toV128_1
, mkexpr(res256
)));
12079 assign(zero
, mkV128(0x0000));
12080 putQReg128(dd
, mkexpr(resSH
));
12081 updateQCFLAGwithDifference(resQ
, zero
);
12082 const HChar
* nm
= isR
? (isU
? "uqrshl" : "sqrshl")
12083 : (isU
? "uqshl" : "sqshl");
12084 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12085 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
12086 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12090 if (opcode
== BITS5(0,1,1,0,0) || opcode
== BITS5(0,1,1,0,1)) {
12091 /* -------- 0,xx,01100 SMAX std7_std7_std7 -------- */
12092 /* -------- 1,xx,01100 UMAX std7_std7_std7 -------- */
12093 /* -------- 0,xx,01101 SMIN std7_std7_std7 -------- */
12094 /* -------- 1,xx,01101 UMIN std7_std7_std7 -------- */
12095 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12096 Bool isU
= bitU
== 1;
12097 Bool isMAX
= (opcode
& 1) == 0;
12098 IROp op
= isMAX
? (isU
? mkVecMAXU(size
) : mkVecMAXS(size
))
12099 : (isU
? mkVecMINU(size
) : mkVecMINS(size
));
12100 IRTemp t
= newTempV128();
12101 assign(t
, binop(op
, getQReg128(nn
), getQReg128(mm
)));
12102 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t
));
12103 const HChar
* nm
= isMAX
? (isU
? "umax" : "smax")
12104 : (isU
? "umin" : "smin");
12105 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12106 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
12107 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12111 if (opcode
== BITS5(0,1,1,1,0) || opcode
== BITS5(0,1,1,1,1)) {
12112 /* -------- 0,xx,01110 SABD std6_std6_std6 -------- */
12113 /* -------- 1,xx,01110 UABD std6_std6_std6 -------- */
12114 /* -------- 0,xx,01111 SABA std6_std6_std6 -------- */
12115 /* -------- 1,xx,01111 UABA std6_std6_std6 -------- */
12116 if (size
== X11
) return False
; // 1d/2d cases not allowed
12117 Bool isU
= bitU
== 1;
12118 Bool isACC
= opcode
== BITS5(0,1,1,1,1);
12119 vassert(size
<= 2);
12120 IRTemp t1
= math_ABD(isU
, size
, getQReg128(nn
), getQReg128(mm
));
12121 IRTemp t2
= newTempV128();
12122 assign(t2
, isACC
? binop(mkVecADD(size
), mkexpr(t1
), getQReg128(dd
))
12124 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t2
));
12125 const HChar
* nm
= isACC
? (isU
? "uaba" : "saba")
12126 : (isU
? "uabd" : "sabd");
12127 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12128 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
12129 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12133 if (opcode
== BITS5(1,0,0,0,0)) {
12134 /* -------- 0,xx,10000 ADD std7_std7_std7 -------- */
12135 /* -------- 1,xx,10000 SUB std7_std7_std7 -------- */
12136 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12137 Bool isSUB
= bitU
== 1;
12138 IROp op
= isSUB
? mkVecSUB(size
) : mkVecADD(size
);
12139 IRTemp t
= newTempV128();
12140 assign(t
, binop(op
, getQReg128(nn
), getQReg128(mm
)));
12141 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t
));
12142 const HChar
* nm
= isSUB
? "sub" : "add";
12143 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12144 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
12145 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12149 if (opcode
== BITS5(1,0,0,0,1)) {
12150 /* -------- 0,xx,10001 CMTST std7_std7_std7 -------- */ // &, != 0
12151 /* -------- 1,xx,10001 CMEQ std7_std7_std7 -------- */ // ==
12152 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12153 Bool isEQ
= bitU
== 1;
12154 IRExpr
* argL
= getQReg128(nn
);
12155 IRExpr
* argR
= getQReg128(mm
);
12156 IRTemp res
= newTempV128();
12158 isEQ
? binop(mkVecCMPEQ(size
), argL
, argR
)
12159 : unop(Iop_NotV128
, binop(mkVecCMPEQ(size
),
12160 binop(Iop_AndV128
, argL
, argR
),
12162 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12163 const HChar
* nm
= isEQ
? "cmeq" : "cmtst";
12164 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12165 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
12166 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12170 if (opcode
== BITS5(1,0,0,1,0)) {
12171 /* -------- 0,xx,10010 MLA std7_std7_std7 -------- */
12172 /* -------- 1,xx,10010 MLS std7_std7_std7 -------- */
12173 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12174 Bool isMLS
= bitU
== 1;
12175 IROp opMUL
= mkVecMUL(size
);
12176 IROp opADDSUB
= isMLS
? mkVecSUB(size
) : mkVecADD(size
);
12177 IRTemp res
= newTempV128();
12178 if (opMUL
!= Iop_INVALID
&& opADDSUB
!= Iop_INVALID
) {
12179 assign(res
, binop(opADDSUB
,
12181 binop(opMUL
, getQReg128(nn
), getQReg128(mm
))));
12182 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12183 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12184 DIP("%s %s.%s, %s.%s, %s.%s\n", isMLS
? "mls" : "mla",
12185 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12191 if (opcode
== BITS5(1,0,0,1,1)) {
12192 /* -------- 0,xx,10011 MUL std7_std7_std7 -------- */
12193 /* -------- 1,xx,10011 PMUL 16b_16b_16b, 8b_8b_8b -------- */
12194 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12195 Bool isPMUL
= bitU
== 1;
12196 const IROp opsPMUL
[4]
12197 = { Iop_PolynomialMul8x16
, Iop_INVALID
, Iop_INVALID
, Iop_INVALID
};
12198 IROp opMUL
= isPMUL
? opsPMUL
[size
] : mkVecMUL(size
);
12199 IRTemp res
= newTempV128();
12200 if (opMUL
!= Iop_INVALID
) {
12201 assign(res
, binop(opMUL
, getQReg128(nn
), getQReg128(mm
)));
12202 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12203 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12204 DIP("%s %s.%s, %s.%s, %s.%s\n", isPMUL
? "pmul" : "mul",
12205 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12211 if (opcode
== BITS5(1,0,1,0,0) || opcode
== BITS5(1,0,1,0,1)) {
12212 /* -------- 0,xx,10100 SMAXP std6_std6_std6 -------- */
12213 /* -------- 1,xx,10100 UMAXP std6_std6_std6 -------- */
12214 /* -------- 0,xx,10101 SMINP std6_std6_std6 -------- */
12215 /* -------- 1,xx,10101 UMINP std6_std6_std6 -------- */
12216 if (size
== X11
) return False
;
12217 Bool isU
= bitU
== 1;
12218 Bool isMAX
= opcode
== BITS5(1,0,1,0,0);
12219 IRTemp vN
= newTempV128();
12220 IRTemp vM
= newTempV128();
12221 IROp op
= isMAX
? (isU
? mkVecMAXU(size
) : mkVecMAXS(size
))
12222 : (isU
? mkVecMINU(size
) : mkVecMINS(size
));
12223 assign(vN
, getQReg128(nn
));
12224 assign(vM
, getQReg128(mm
));
12225 IRTemp res128
= newTempV128();
12228 binop(mkVecCATEVENLANES(size
), mkexpr(vM
), mkexpr(vN
)),
12229 binop(mkVecCATODDLANES(size
), mkexpr(vM
), mkexpr(vN
))));
12230 /* In the half-width case, use CatEL32x4 to extract the half-width
12231 result from the full-width result. */
12233 = bitQ
== 0 ? unop(Iop_ZeroHI64ofV128
,
12234 binop(Iop_CatEvenLanes32x4
, mkexpr(res128
),
12237 putQReg128(dd
, res
);
12238 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12239 const HChar
* nm
= isMAX
? (isU
? "umaxp" : "smaxp")
12240 : (isU
? "uminp" : "sminp");
12241 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
12242 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12246 if (opcode
== BITS5(1,0,1,1,0)) {
12247 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
12248 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
12249 if (size
== X00
|| size
== X11
) return False
;
12250 Bool isR
= bitU
== 1;
12251 IRTemp res
, sat1q
, sat1n
, vN
, vM
;
12252 res
= sat1q
= sat1n
= vN
= vM
= IRTemp_INVALID
;
12253 newTempsV128_2(&vN
, &vM
);
12254 assign(vN
, getQReg128(nn
));
12255 assign(vM
, getQReg128(mm
));
12256 math_SQDMULH(&res
, &sat1q
, &sat1n
, isR
, size
, vN
, vM
);
12257 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12258 IROp opZHI
= bitQ
== 0 ? Iop_ZeroHI64ofV128
: Iop_INVALID
;
12259 updateQCFLAGwithDifferenceZHI(sat1q
, sat1n
, opZHI
);
12260 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12261 const HChar
* nm
= isR
? "sqrdmulh" : "sqdmulh";
12262 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
12263 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12267 if (bitU
== 0 && opcode
== BITS5(1,0,1,1,1)) {
12268 /* -------- 0,xx,10111 ADDP std7_std7_std7 -------- */
12269 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12270 IRTemp vN
= newTempV128();
12271 IRTemp vM
= newTempV128();
12272 assign(vN
, getQReg128(nn
));
12273 assign(vM
, getQReg128(mm
));
12274 IRTemp res128
= newTempV128();
12276 binop(mkVecADD(size
),
12277 binop(mkVecCATEVENLANES(size
), mkexpr(vM
), mkexpr(vN
)),
12278 binop(mkVecCATODDLANES(size
), mkexpr(vM
), mkexpr(vN
))));
12279 /* In the half-width case, use CatEL32x4 to extract the half-width
12280 result from the full-width result. */
12282 = bitQ
== 0 ? unop(Iop_ZeroHI64ofV128
,
12283 binop(Iop_CatEvenLanes32x4
, mkexpr(res128
),
12286 putQReg128(dd
, res
);
12287 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12288 DIP("addp %s.%s, %s.%s, %s.%s\n",
12289 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12294 && (opcode
== BITS5(1,1,0,0,0) || opcode
== BITS5(1,1,1,1,0))) {
12295 /* -------- 0,0x,11000 FMAXNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12296 /* -------- 0,1x,11000 FMINNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12297 /* -------- 0,0x,11110 FMAX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12298 /* -------- 0,1x,11110 FMIN 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12299 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
12300 Bool isD
= (size
& 1) == 1;
12301 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12302 Bool isMIN
= (size
& 2) == 2;
12303 Bool isNM
= opcode
== BITS5(1,1,0,0,0);
12304 IROp opMXX
= (isMIN
? mkVecMINF
: mkVecMAXF
)(isD
? X11
: X10
);
12305 IRTemp res
= newTempV128();
12306 assign(res
, binop(opMXX
, getQReg128(nn
), getQReg128(mm
)));
12307 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12308 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12309 DIP("%s%s %s.%s, %s.%s, %s.%s\n",
12310 isMIN
? "fmin" : "fmax", isNM
? "nm" : "",
12311 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12315 if (bitU
== 0 && opcode
== BITS5(1,1,0,0,1)) {
12316 /* -------- 0,0x,11001 FMLA 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12317 /* -------- 0,1x,11001 FMLS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12318 Bool isD
= (size
& 1) == 1;
12319 Bool isSUB
= (size
& 2) == 2;
12320 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12321 IROp opADD
= isD
? Iop_Add64Fx2
: Iop_Add32Fx4
;
12322 IROp opSUB
= isD
? Iop_Sub64Fx2
: Iop_Sub32Fx4
;
12323 IROp opMUL
= isD
? Iop_Mul64Fx2
: Iop_Mul32Fx4
;
12324 IRTemp rm
= mk_get_IR_rounding_mode();
12325 IRTemp t1
= newTempV128();
12326 IRTemp t2
= newTempV128();
12327 // FIXME: double rounding; use FMA primops instead
12328 assign(t1
, triop(opMUL
,
12329 mkexpr(rm
), getQReg128(nn
), getQReg128(mm
)));
12330 assign(t2
, triop(isSUB
? opSUB
: opADD
,
12331 mkexpr(rm
), getQReg128(dd
), mkexpr(t1
)));
12332 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t2
));
12333 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12334 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB
? "fmls" : "fmla",
12335 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12339 if (bitU
== 0 && opcode
== BITS5(1,1,0,1,0)) {
12340 /* -------- 0,0x,11010 FADD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12341 /* -------- 0,1x,11010 FSUB 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12342 Bool isD
= (size
& 1) == 1;
12343 Bool isSUB
= (size
& 2) == 2;
12344 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12346 = { Iop_Add32Fx4
, Iop_Add64Fx2
, Iop_Sub32Fx4
, Iop_Sub64Fx2
};
12347 IROp op
= ops
[size
];
12348 IRTemp rm
= mk_get_IR_rounding_mode();
12349 IRTemp t1
= newTempV128();
12350 IRTemp t2
= newTempV128();
12351 assign(t1
, triop(op
, mkexpr(rm
), getQReg128(nn
), getQReg128(mm
)));
12352 assign(t2
, math_MAYBE_ZERO_HI64(bitQ
, t1
));
12353 putQReg128(dd
, mkexpr(t2
));
12354 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12355 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB
? "fsub" : "fadd",
12356 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12360 if (bitU
== 1 && size
>= X10
&& opcode
== BITS5(1,1,0,1,0)) {
12361 /* -------- 1,1x,11010 FABD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12362 Bool isD
= (size
& 1) == 1;
12363 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12364 IROp opSUB
= isD
? Iop_Sub64Fx2
: Iop_Sub32Fx4
;
12365 IROp opABS
= isD
? Iop_Abs64Fx2
: Iop_Abs32Fx4
;
12366 IRTemp rm
= mk_get_IR_rounding_mode();
12367 IRTemp t1
= newTempV128();
12368 IRTemp t2
= newTempV128();
12369 // FIXME: use Abd primop instead?
12370 assign(t1
, triop(opSUB
, mkexpr(rm
), getQReg128(nn
), getQReg128(mm
)));
12371 assign(t2
, unop(opABS
, mkexpr(t1
)));
12372 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t2
));
12373 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12374 DIP("fabd %s.%s, %s.%s, %s.%s\n",
12375 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12379 if (size
<= X01
&& opcode
== BITS5(1,1,0,1,1)) {
12380 /* -------- 0,0x,11011 FMULX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12381 /* -------- 1,0x,11011 FMUL 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12382 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
12383 Bool isD
= (size
& 1) == 1;
12384 Bool isMULX
= bitU
== 0;
12385 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12386 IRTemp rm
= mk_get_IR_rounding_mode();
12387 IRTemp t1
= newTempV128();
12388 assign(t1
, triop(isD
? Iop_Mul64Fx2
: Iop_Mul32Fx4
,
12389 mkexpr(rm
), getQReg128(nn
), getQReg128(mm
)));
12390 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t1
));
12391 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12392 DIP("%s %s.%s, %s.%s, %s.%s\n", isMULX
? "fmulx" : "fmul",
12393 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12397 if (size
<= X01
&& opcode
== BITS5(1,1,1,0,0)) {
12398 /* -------- 0,0x,11100 FCMEQ 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12399 /* -------- 1,0x,11100 FCMGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12400 Bool isD
= (size
& 1) == 1;
12401 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12402 Bool isGE
= bitU
== 1;
12403 IROp opCMP
= isGE
? (isD
? Iop_CmpLE64Fx2
: Iop_CmpLE32Fx4
)
12404 : (isD
? Iop_CmpEQ64Fx2
: Iop_CmpEQ32Fx4
);
12405 IRTemp t1
= newTempV128();
12406 assign(t1
, isGE
? binop(opCMP
, getQReg128(mm
), getQReg128(nn
)) // swapd
12407 : binop(opCMP
, getQReg128(nn
), getQReg128(mm
)));
12408 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t1
));
12409 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12410 DIP("%s %s.%s, %s.%s, %s.%s\n", isGE
? "fcmge" : "fcmeq",
12411 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12415 if (bitU
== 1 && size
>= X10
&& opcode
== BITS5(1,1,1,0,0)) {
12416 /* -------- 1,1x,11100 FCMGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12417 Bool isD
= (size
& 1) == 1;
12418 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12419 IROp opCMP
= isD
? Iop_CmpLT64Fx2
: Iop_CmpLT32Fx4
;
12420 IRTemp t1
= newTempV128();
12421 assign(t1
, binop(opCMP
, getQReg128(mm
), getQReg128(nn
))); // swapd
12422 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t1
));
12423 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12424 DIP("%s %s.%s, %s.%s, %s.%s\n", "fcmgt",
12425 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12429 if (bitU
== 1 && opcode
== BITS5(1,1,1,0,1)) {
12430 /* -------- 1,0x,11101 FACGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12431 /* -------- 1,1x,11101 FACGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12432 Bool isD
= (size
& 1) == 1;
12433 Bool isGT
= (size
& 2) == 2;
12434 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12435 IROp opCMP
= isGT
? (isD
? Iop_CmpLT64Fx2
: Iop_CmpLT32Fx4
)
12436 : (isD
? Iop_CmpLE64Fx2
: Iop_CmpLE32Fx4
);
12437 IROp opABS
= isD
? Iop_Abs64Fx2
: Iop_Abs32Fx4
;
12438 IRTemp t1
= newTempV128();
12439 assign(t1
, binop(opCMP
, unop(opABS
, getQReg128(mm
)),
12440 unop(opABS
, getQReg128(nn
)))); // swapd
12441 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t1
));
12442 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12443 DIP("%s %s.%s, %s.%s, %s.%s\n", isGT
? "facgt" : "facge",
12444 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12449 && (opcode
== BITS5(1,1,0,0,0) || opcode
== BITS5(1,1,1,1,0))) {
12450 /* -------- 1,0x,11000 FMAXNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12451 /* -------- 1,1x,11000 FMINNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12452 /* -------- 1,0x,11110 FMAXP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12453 /* -------- 1,1x,11110 FMINP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12454 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
12455 Bool isD
= (size
& 1) == 1;
12456 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12457 Bool isMIN
= (size
& 2) == 2;
12458 Bool isNM
= opcode
== BITS5(1,1,0,0,0);
12459 IROp opMXX
= (isMIN
? mkVecMINF
: mkVecMAXF
)(isD
? 3 : 2);
12460 IRTemp srcN
= newTempV128();
12461 IRTemp srcM
= newTempV128();
12462 IRTemp preL
= IRTemp_INVALID
;
12463 IRTemp preR
= IRTemp_INVALID
;
12464 assign(srcN
, getQReg128(nn
));
12465 assign(srcM
, getQReg128(mm
));
12466 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL
, &preR
,
12467 srcM
, srcN
, isD
, bitQ
);
12469 dd
, math_MAYBE_ZERO_HI64_fromE(
12471 binop(opMXX
, mkexpr(preL
), mkexpr(preR
))));
12472 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12473 DIP("%s%sp %s.%s, %s.%s, %s.%s\n",
12474 isMIN
? "fmin" : "fmax", isNM
? "nm" : "",
12475 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12479 if (bitU
== 1 && size
<= X01
&& opcode
== BITS5(1,1,0,1,0)) {
12480 /* -------- 1,0x,11010 FADDP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12481 Bool isD
= size
== X01
;
12482 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12483 IRTemp srcN
= newTempV128();
12484 IRTemp srcM
= newTempV128();
12485 IRTemp preL
= IRTemp_INVALID
;
12486 IRTemp preR
= IRTemp_INVALID
;
12487 assign(srcN
, getQReg128(nn
));
12488 assign(srcM
, getQReg128(mm
));
12489 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL
, &preR
,
12490 srcM
, srcN
, isD
, bitQ
);
12492 dd
, math_MAYBE_ZERO_HI64_fromE(
12494 triop(mkVecADDF(isD
? 3 : 2),
12495 mkexpr(mk_get_IR_rounding_mode()),
12496 mkexpr(preL
), mkexpr(preR
))));
12497 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12498 DIP("%s %s.%s, %s.%s, %s.%s\n", "faddp",
12499 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12503 if (bitU
== 1 && size
<= X01
&& opcode
== BITS5(1,1,1,1,1)) {
12504 /* -------- 1,0x,11111 FDIV 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12505 Bool isD
= (size
& 1) == 1;
12506 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12507 vassert(size
<= 1);
12508 const IROp ops
[2] = { Iop_Div32Fx4
, Iop_Div64Fx2
};
12509 IROp op
= ops
[size
];
12510 IRTemp rm
= mk_get_IR_rounding_mode();
12511 IRTemp t1
= newTempV128();
12512 IRTemp t2
= newTempV128();
12513 assign(t1
, triop(op
, mkexpr(rm
), getQReg128(nn
), getQReg128(mm
)));
12514 assign(t2
, math_MAYBE_ZERO_HI64(bitQ
, t1
));
12515 putQReg128(dd
, mkexpr(t2
));
12516 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12517 DIP("%s %s.%s, %s.%s, %s.%s\n", "fdiv",
12518 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12522 if (bitU
== 0 && opcode
== BITS5(1,1,1,1,1)) {
12523 /* -------- 0,0x,11111: FRECPS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12524 /* -------- 0,1x,11111: FRSQRTS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12525 Bool isSQRT
= (size
& 2) == 2;
12526 Bool isD
= (size
& 1) == 1;
12527 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12528 IROp op
= isSQRT
? (isD
? Iop_RSqrtStep64Fx2
: Iop_RSqrtStep32Fx4
)
12529 : (isD
? Iop_RecipStep64Fx2
: Iop_RecipStep32Fx4
);
12530 IRTemp res
= newTempV128();
12531 assign(res
, binop(op
, getQReg128(nn
), getQReg128(mm
)));
12532 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12533 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12534 DIP("%s %s.%s, %s.%s, %s.%s\n", isSQRT
? "frsqrts" : "frecps",
12535 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12545 Bool
dis_AdvSIMD_three_same_extra(/*MB_OUT*/DisResult
* dres
, UInt insn
)
12547 /* 31 30 29 28 23 21 20 15 14 10 9 4
12548 0 Q U 01110 size 0 m 1 opcode 1 n d
12549 Decode fields: u,size,opcode
12551 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12552 if (INSN(31,31) != 0
12553 || INSN(28,24) != BITS5(0,1,1,1,0)
12554 || INSN(21,21) != 0
12555 || INSN(15,15) != 1
12556 || INSN(10,10) != 1) {
12559 UInt bitQ
= INSN(30,30);
12560 UInt bitU
= INSN(29,29);
12561 UInt size
= INSN(23,22);
12562 UInt mm
= INSN(20,16);
12563 UInt opcode
= INSN(14,11);
12564 UInt nn
= INSN(9,5);
12565 UInt dd
= INSN(4,0);
12567 vassert(mm
< 32 && nn
< 32 && dd
< 32);
12569 if (bitU
== 1 && (opcode
== BITS4(0,0,0,0) || opcode
== BITS4(0,0,0,1))) {
12570 /* -------- 0,xx,10110 SQRDMLAH s and h variants only -------- */
12571 /* -------- 1,xx,10110 SQRDMLSH s and h variants only -------- */
12572 if (size
== X00
|| size
== X11
) return False
;
12573 Bool isAdd
= opcode
== BITS4(0,0,0,0);
12575 IRTemp res
, res_nosat
, vD
, vN
, vM
;
12576 res
= res_nosat
= vD
= vN
= vM
= IRTemp_INVALID
;
12577 newTempsV128_3(&vD
, &vN
, &vM
);
12578 assign(vD
, getQReg128(dd
));
12579 assign(vN
, getQReg128(nn
));
12580 assign(vM
, getQReg128(mm
));
12582 math_SQRDMLAH(&res
, &res_nosat
, isAdd
, size
, vD
, vN
, vM
);
12583 IROp opZHI
= bitQ
== 0 ? Iop_ZeroHI64ofV128
: Iop_INVALID
;
12584 updateQCFLAGwithDifferenceZHI(res
, res_nosat
, opZHI
);
12585 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12587 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12588 const HChar
* nm
= isAdd
? "sqrdmlah" : "sqrdmlsh";
12589 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
12590 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12600 Bool
dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult
* dres
, UInt insn
)
12602 /* 31 30 29 28 23 21 16 11 9 4
12603 0 Q U 01110 size 10000 opcode 10 n d
12604 Decode fields: U,size,opcode
12606 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12607 if (INSN(31,31) != 0
12608 || INSN(28,24) != BITS5(0,1,1,1,0)
12609 || INSN(21,17) != BITS5(1,0,0,0,0)
12610 || INSN(11,10) != BITS2(1,0)) {
12613 UInt bitQ
= INSN(30,30);
12614 UInt bitU
= INSN(29,29);
12615 UInt size
= INSN(23,22);
12616 UInt opcode
= INSN(16,12);
12617 UInt nn
= INSN(9,5);
12618 UInt dd
= INSN(4,0);
12621 if (bitU
== 0 && size
<= X10
&& opcode
== BITS5(0,0,0,0,0)) {
12622 /* -------- 0,00,00000: REV64 16b_16b, 8b_8b -------- */
12623 /* -------- 0,01,00000: REV64 8h_8h, 4h_4h -------- */
12624 /* -------- 0,10,00000: REV64 4s_4s, 2s_2s -------- */
12625 const IROp iops
[3] = { Iop_Reverse8sIn64_x2
,
12626 Iop_Reverse16sIn64_x2
, Iop_Reverse32sIn64_x2
};
12627 vassert(size
<= 2);
12628 IRTemp res
= newTempV128();
12629 assign(res
, unop(iops
[size
], getQReg128(nn
)));
12630 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12631 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12632 DIP("%s %s.%s, %s.%s\n", "rev64",
12633 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12637 if (bitU
== 1 && size
<= X01
&& opcode
== BITS5(0,0,0,0,0)) {
12638 /* -------- 1,00,00000: REV32 16b_16b, 8b_8b -------- */
12639 /* -------- 1,01,00000: REV32 8h_8h, 4h_4h -------- */
12640 Bool isH
= size
== X01
;
12641 IRTemp res
= newTempV128();
12642 IROp iop
= isH
? Iop_Reverse16sIn32_x4
: Iop_Reverse8sIn32_x4
;
12643 assign(res
, unop(iop
, getQReg128(nn
)));
12644 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12645 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12646 DIP("%s %s.%s, %s.%s\n", "rev32",
12647 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12651 if (bitU
== 0 && size
== X00
&& opcode
== BITS5(0,0,0,0,1)) {
12652 /* -------- 0,00,00001: REV16 16b_16b, 8b_8b -------- */
12653 IRTemp res
= newTempV128();
12654 assign(res
, unop(Iop_Reverse8sIn16_x8
, getQReg128(nn
)));
12655 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12656 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12657 DIP("%s %s.%s, %s.%s\n", "rev16",
12658 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12662 if (opcode
== BITS5(0,0,0,1,0) || opcode
== BITS5(0,0,1,1,0)) {
12663 /* -------- 0,xx,00010: SADDLP std6_std6 -------- */
12664 /* -------- 1,xx,00010: UADDLP std6_std6 -------- */
12665 /* -------- 0,xx,00110: SADALP std6_std6 -------- */
12666 /* -------- 1,xx,00110: UADALP std6_std6 -------- */
12667 /* Widens, and size refers to the narrow size. */
12668 if (size
== X11
) return False
; // no 1d or 2d cases
12669 Bool isU
= bitU
== 1;
12670 Bool isACC
= opcode
== BITS5(0,0,1,1,0);
12671 IRTemp src
= newTempV128();
12672 IRTemp sum
= newTempV128();
12673 IRTemp res
= newTempV128();
12674 assign(src
, getQReg128(nn
));
12676 binop(mkVecADD(size
+1),
12677 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
12678 isU
, True
/*fromOdd*/, size
, mkexpr(src
))),
12679 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
12680 isU
, False
/*!fromOdd*/, size
, mkexpr(src
)))));
12681 assign(res
, isACC
? binop(mkVecADD(size
+1), mkexpr(sum
), getQReg128(dd
))
12683 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12684 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
12685 const HChar
* arrWide
= nameArr_Q_SZ(bitQ
, size
+1);
12686 DIP("%s %s.%s, %s.%s\n", isACC
? (isU
? "uadalp" : "sadalp")
12687 : (isU
? "uaddlp" : "saddlp"),
12688 nameQReg128(dd
), arrWide
, nameQReg128(nn
), arrNarrow
);
12692 if (opcode
== BITS5(0,0,0,1,1)) {
12693 /* -------- 0,xx,00011: SUQADD std7_std7 -------- */
12694 /* -------- 1,xx,00011: USQADD std7_std7 -------- */
12695 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12696 Bool isUSQADD
= bitU
== 1;
12697 /* This is switched (in the US vs SU sense) deliberately.
12698 SUQADD corresponds to the ExtUSsatSS variants and
12699 USQADD corresponds to the ExtSUsatUU variants.
12700 See libvex_ir for more details. */
12701 IROp qop
= isUSQADD
? mkVecQADDEXTSUSATUU(size
)
12702 : mkVecQADDEXTUSSATSS(size
);
12703 IROp nop
= mkVecADD(size
);
12704 IRTemp argL
= newTempV128();
12705 IRTemp argR
= newTempV128();
12706 IRTemp qres
= newTempV128();
12707 IRTemp nres
= newTempV128();
12708 /* Because the two arguments to the addition are implicitly
12709 extended differently (one signedly, the other unsignedly) it is
12710 important to present them to the primop in the correct order. */
12711 assign(argL
, getQReg128(nn
));
12712 assign(argR
, getQReg128(dd
));
12713 assign(qres
, math_MAYBE_ZERO_HI64_fromE(
12714 bitQ
, binop(qop
, mkexpr(argL
), mkexpr(argR
))));
12715 assign(nres
, math_MAYBE_ZERO_HI64_fromE(
12716 bitQ
, binop(nop
, mkexpr(argL
), mkexpr(argR
))));
12717 putQReg128(dd
, mkexpr(qres
));
12718 updateQCFLAGwithDifference(qres
, nres
);
12719 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12720 DIP("%s %s.%s, %s.%s\n", isUSQADD
? "usqadd" : "suqadd",
12721 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12725 if (opcode
== BITS5(0,0,1,0,0)) {
12726 /* -------- 0,xx,00100: CLS std6_std6 -------- */
12727 /* -------- 1,xx,00100: CLZ std6_std6 -------- */
12728 if (size
== X11
) return False
; // no 1d or 2d cases
12729 const IROp opsCLS
[3] = { Iop_Cls8x16
, Iop_Cls16x8
, Iop_Cls32x4
};
12730 const IROp opsCLZ
[3] = { Iop_Clz8x16
, Iop_Clz16x8
, Iop_Clz32x4
};
12731 Bool isCLZ
= bitU
== 1;
12732 IRTemp res
= newTempV128();
12733 vassert(size
<= 2);
12734 assign(res
, unop(isCLZ
? opsCLZ
[size
] : opsCLS
[size
], getQReg128(nn
)));
12735 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12736 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12737 DIP("%s %s.%s, %s.%s\n", isCLZ
? "clz" : "cls",
12738 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12742 if (size
== X00
&& opcode
== BITS5(0,0,1,0,1)) {
12743 /* -------- 0,00,00101: CNT 16b_16b, 8b_8b -------- */
12744 /* -------- 1,00,00101: NOT 16b_16b, 8b_8b -------- */
12745 IRTemp res
= newTempV128();
12746 assign(res
, unop(bitU
== 0 ? Iop_Cnt8x16
: Iop_NotV128
, getQReg128(nn
)));
12747 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12748 const HChar
* arr
= nameArr_Q_SZ(bitQ
, 0);
12749 DIP("%s %s.%s, %s.%s\n", bitU
== 0 ? "cnt" : "not",
12750 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12754 if (bitU
== 1 && size
== X01
&& opcode
== BITS5(0,0,1,0,1)) {
12755 /* -------- 1,01,00101 RBIT 16b_16b, 8b_8b -------- */
12756 IRTemp res
= newTempV128();
12757 assign(res
, unop(Iop_Reverse1sIn8_x16
, getQReg128(nn
)));
12758 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12759 const HChar
* arr
= nameArr_Q_SZ(bitQ
, 0);
12760 DIP("%s %s.%s, %s.%s\n", "rbit",
12761 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12765 if (opcode
== BITS5(0,0,1,1,1)) {
12766 /* -------- 0,xx,00111 SQABS std7_std7 -------- */
12767 /* -------- 1,xx,00111 SQNEG std7_std7 -------- */
12768 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12769 Bool isNEG
= bitU
== 1;
12770 IRTemp qresFW
= IRTemp_INVALID
, nresFW
= IRTemp_INVALID
;
12771 (isNEG
? math_SQNEG
: math_SQABS
)( &qresFW
, &nresFW
,
12772 getQReg128(nn
), size
);
12773 IRTemp qres
= newTempV128(), nres
= newTempV128();
12774 assign(qres
, math_MAYBE_ZERO_HI64(bitQ
, qresFW
));
12775 assign(nres
, math_MAYBE_ZERO_HI64(bitQ
, nresFW
));
12776 putQReg128(dd
, mkexpr(qres
));
12777 updateQCFLAGwithDifference(qres
, nres
);
12778 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12779 DIP("%s %s.%s, %s.%s\n", isNEG
? "sqneg" : "sqabs",
12780 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12784 if (opcode
== BITS5(0,1,0,0,0)) {
12785 /* -------- 0,xx,01000: CMGT std7_std7_#0 -------- */ // >s 0
12786 /* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0
12787 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12788 Bool isGT
= bitU
== 0;
12789 IRExpr
* argL
= getQReg128(nn
);
12790 IRExpr
* argR
= mkV128(0x0000);
12791 IRTemp res
= newTempV128();
12792 IROp opGTS
= mkVecCMPGTS(size
);
12793 assign(res
, isGT
? binop(opGTS
, argL
, argR
)
12794 : unop(Iop_NotV128
, binop(opGTS
, argR
, argL
)));
12795 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12796 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12797 DIP("cm%s %s.%s, %s.%s, #0\n", isGT
? "gt" : "ge",
12798 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12802 if (opcode
== BITS5(0,1,0,0,1)) {
12803 /* -------- 0,xx,01001: CMEQ std7_std7_#0 -------- */ // == 0
12804 /* -------- 1,xx,01001: CMLE std7_std7_#0 -------- */ // <=s 0
12805 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12806 Bool isEQ
= bitU
== 0;
12807 IRExpr
* argL
= getQReg128(nn
);
12808 IRExpr
* argR
= mkV128(0x0000);
12809 IRTemp res
= newTempV128();
12810 assign(res
, isEQ
? binop(mkVecCMPEQ(size
), argL
, argR
)
12811 : unop(Iop_NotV128
,
12812 binop(mkVecCMPGTS(size
), argL
, argR
)));
12813 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12814 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12815 DIP("cm%s %s.%s, %s.%s, #0\n", isEQ
? "eq" : "le",
12816 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12820 if (bitU
== 0 && opcode
== BITS5(0,1,0,1,0)) {
12821 /* -------- 0,xx,01010: CMLT std7_std7_#0 -------- */ // <s 0
12822 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12823 IRExpr
* argL
= getQReg128(nn
);
12824 IRExpr
* argR
= mkV128(0x0000);
12825 IRTemp res
= newTempV128();
12826 assign(res
, binop(mkVecCMPGTS(size
), argR
, argL
));
12827 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12828 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12829 DIP("cm%s %s.%s, %s.%s, #0\n", "lt",
12830 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12834 if (bitU
== 0 && opcode
== BITS5(0,1,0,1,1)) {
12835 /* -------- 0,xx,01011: ABS std7_std7 -------- */
12836 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12837 IRTemp res
= newTempV128();
12838 assign(res
, unop(mkVecABS(size
), getQReg128(nn
)));
12839 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12840 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12841 DIP("abs %s.%s, %s.%s\n", nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12845 if (bitU
== 1 && opcode
== BITS5(0,1,0,1,1)) {
12846 /* -------- 1,xx,01011: NEG std7_std7 -------- */
12847 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12848 IRTemp res
= newTempV128();
12849 assign(res
, binop(mkVecSUB(size
), mkV128(0x0000), getQReg128(nn
)));
12850 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12851 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12852 DIP("neg %s.%s, %s.%s\n", nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12856 UInt ix
= 0; /*INVALID*/
12859 case BITS5(0,1,1,0,0): ix
= (bitU
== 1) ? 4 : 1; break;
12860 case BITS5(0,1,1,0,1): ix
= (bitU
== 1) ? 5 : 2; break;
12861 case BITS5(0,1,1,1,0): if (bitU
== 0) ix
= 3; break;
12866 /* -------- 0,1x,01100 FCMGT 2d_2d,4s_4s,2s_2s _#0.0 (ix 1) -------- */
12867 /* -------- 0,1x,01101 FCMEQ 2d_2d,4s_4s,2s_2s _#0.0 (ix 2) -------- */
12868 /* -------- 0,1x,01110 FCMLT 2d_2d,4s_4s,2s_2s _#0.0 (ix 3) -------- */
12869 /* -------- 1,1x,01100 FCMGE 2d_2d,4s_4s,2s_2s _#0.0 (ix 4) -------- */
12870 /* -------- 1,1x,01101 FCMLE 2d_2d,4s_4s,2s_2s _#0.0 (ix 5) -------- */
12871 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12872 Bool isD
= size
== X11
;
12873 IROp opCmpEQ
= isD
? Iop_CmpEQ64Fx2
: Iop_CmpEQ32Fx4
;
12874 IROp opCmpLE
= isD
? Iop_CmpLE64Fx2
: Iop_CmpLE32Fx4
;
12875 IROp opCmpLT
= isD
? Iop_CmpLT64Fx2
: Iop_CmpLT32Fx4
;
12876 IROp opCmp
= Iop_INVALID
;
12878 const HChar
* nm
= "??";
12880 case 1: nm
= "fcmgt"; opCmp
= opCmpLT
; swap
= True
; break;
12881 case 2: nm
= "fcmeq"; opCmp
= opCmpEQ
; break;
12882 case 3: nm
= "fcmlt"; opCmp
= opCmpLT
; break;
12883 case 4: nm
= "fcmge"; opCmp
= opCmpLE
; swap
= True
; break;
12884 case 5: nm
= "fcmle"; opCmp
= opCmpLE
; break;
12885 default: vassert(0);
12887 IRExpr
* zero
= mkV128(0x0000);
12888 IRTemp res
= newTempV128();
12889 assign(res
, swap
? binop(opCmp
, zero
, getQReg128(nn
))
12890 : binop(opCmp
, getQReg128(nn
), zero
));
12891 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12892 const HChar
* arr
= bitQ
== 0 ? "2s" : (size
== X11
? "2d" : "4s");
12893 DIP("%s %s.%s, %s.%s, #0.0\n", nm
,
12894 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12898 if (size
>= X10
&& opcode
== BITS5(0,1,1,1,1)) {
12899 /* -------- 0,1x,01111: FABS 2d_2d, 4s_4s, 2s_2s -------- */
12900 /* -------- 1,1x,01111: FNEG 2d_2d, 4s_4s, 2s_2s -------- */
12901 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12902 Bool isFNEG
= bitU
== 1;
12903 IROp op
= isFNEG
? (size
== X10
? Iop_Neg32Fx4
: Iop_Neg64Fx2
)
12904 : (size
== X10
? Iop_Abs32Fx4
: Iop_Abs64Fx2
);
12905 IRTemp res
= newTempV128();
12906 assign(res
, unop(op
, getQReg128(nn
)));
12907 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12908 const HChar
* arr
= bitQ
== 0 ? "2s" : (size
== X11
? "2d" : "4s");
12909 DIP("%s %s.%s, %s.%s\n", isFNEG
? "fneg" : "fabs",
12910 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12914 if (bitU
== 0 && opcode
== BITS5(1,0,0,1,0)) {
12915 /* -------- 0,xx,10010: XTN{,2} -------- */
12916 if (size
== X11
) return False
;
12918 Bool is2
= bitQ
== 1;
12919 IROp opN
= mkVecNARROWUN(size
);
12920 IRTemp resN
= newTempV128();
12921 assign(resN
, unop(Iop_64UtoV128
, unop(opN
, getQReg128(nn
))));
12922 putLO64andZUorPutHI64(is2
, dd
, resN
);
12923 const HChar
* nm
= "xtn";
12924 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
12925 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
12926 DIP("%s%s %s.%s, %s.%s\n", is2
? "2" : "", nm
,
12927 nameQReg128(dd
), arrNarrow
, nameQReg128(nn
), arrWide
);
12931 if (opcode
== BITS5(1,0,1,0,0)
12932 || (bitU
== 1 && opcode
== BITS5(1,0,0,1,0))) {
12933 /* -------- 0,xx,10100: SQXTN{,2} -------- */
12934 /* -------- 1,xx,10100: UQXTN{,2} -------- */
12935 /* -------- 1,xx,10010: SQXTUN{,2} -------- */
12936 if (size
== X11
) return False
;
12938 Bool is2
= bitQ
== 1;
12939 IROp opN
= Iop_INVALID
;
12940 Bool zWiden
= True
;
12941 const HChar
* nm
= "??";
12942 /**/ if (bitU
== 0 && opcode
== BITS5(1,0,1,0,0)) {
12943 opN
= mkVecQNARROWUNSS(size
); nm
= "sqxtn"; zWiden
= False
;
12945 else if (bitU
== 1 && opcode
== BITS5(1,0,1,0,0)) {
12946 opN
= mkVecQNARROWUNUU(size
); nm
= "uqxtn";
12948 else if (bitU
== 1 && opcode
== BITS5(1,0,0,1,0)) {
12949 opN
= mkVecQNARROWUNSU(size
); nm
= "sqxtun";
12952 IRTemp src
= newTempV128();
12953 assign(src
, getQReg128(nn
));
12954 IRTemp resN
= newTempV128();
12955 assign(resN
, unop(Iop_64UtoV128
, unop(opN
, mkexpr(src
))));
12956 putLO64andZUorPutHI64(is2
, dd
, resN
);
12957 IRTemp resW
= math_WIDEN_LO_OR_HI_LANES(zWiden
, False
/*!fromUpperHalf*/,
12958 size
, mkexpr(resN
));
12959 updateQCFLAGwithDifference(src
, resW
);
12960 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
12961 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
12962 DIP("%s%s %s.%s, %s.%s\n", is2
? "2" : "", nm
,
12963 nameQReg128(dd
), arrNarrow
, nameQReg128(nn
), arrWide
);
12967 if (bitU
== 1 && opcode
== BITS5(1,0,0,1,1)) {
12968 /* -------- 1,xx,10011 SHLL{2} #lane-width -------- */
12969 /* Widens, and size is the narrow size. */
12970 if (size
== X11
) return False
;
12971 Bool is2
= bitQ
== 1;
12972 IROp opINT
= is2
? mkVecINTERLEAVEHI(size
) : mkVecINTERLEAVELO(size
);
12973 IROp opSHL
= mkVecSHLN(size
+1);
12974 IRTemp src
= newTempV128();
12975 IRTemp res
= newTempV128();
12976 assign(src
, getQReg128(nn
));
12977 assign(res
, binop(opSHL
, binop(opINT
, mkexpr(src
), mkexpr(src
)),
12979 putQReg128(dd
, mkexpr(res
));
12980 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
12981 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
12982 DIP("shll%s %s.%s, %s.%s, #%d\n", is2
? "2" : "",
12983 nameQReg128(dd
), arrWide
, nameQReg128(nn
), arrNarrow
, 8 << size
);
12987 if (bitU
== 0 && size
<= X01
&& opcode
== BITS5(1,0,1,1,0)) {
12988 /* -------- 0,0x,10110: FCVTN 4h/8h_4s, 2s/4s_2d -------- */
12989 UInt nLanes
= size
== X00
? 4 : 2;
12990 IRType srcTy
= size
== X00
? Ity_F32
: Ity_F64
;
12991 IROp opCvt
= size
== X00
? Iop_F32toF16
: Iop_F64toF32
;
12992 IRTemp rm
= mk_get_IR_rounding_mode();
12993 IRTemp src
[nLanes
];
12994 for (UInt i
= 0; i
< nLanes
; i
++) {
12995 src
[i
] = newTemp(srcTy
);
12996 assign(src
[i
], getQRegLane(nn
, i
, srcTy
));
12998 for (UInt i
= 0; i
< nLanes
; i
++) {
12999 putQRegLane(dd
, nLanes
* bitQ
+ i
,
13000 binop(opCvt
, mkexpr(rm
), mkexpr(src
[i
])));
13003 putQRegLane(dd
, 1, mkU64(0));
13005 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, 1+size
);
13006 const HChar
* arrWide
= nameArr_Q_SZ(1, 1+size
+1);
13007 DIP("fcvtn%s %s.%s, %s.%s\n", bitQ
? "2" : "",
13008 nameQReg128(dd
), arrNarrow
, nameQReg128(nn
), arrWide
);
13012 if (bitU
== 1 && size
== X01
&& opcode
== BITS5(1,0,1,1,0)) {
13013 /* -------- 1,01,10110: FCVTXN 2s/4s_2d -------- */
13014 /* Using Irrm_NEAREST here isn't right. The docs say "round to
13015 odd" but I don't know what that really means. */
13016 IRType srcTy
= Ity_F64
;
13017 IROp opCvt
= Iop_F64toF32
;
13019 for (UInt i
= 0; i
< 2; i
++) {
13020 src
[i
] = newTemp(srcTy
);
13021 assign(src
[i
], getQRegLane(nn
, i
, srcTy
));
13023 for (UInt i
= 0; i
< 2; i
++) {
13024 putQRegLane(dd
, 2 * bitQ
+ i
,
13025 binop(opCvt
, mkU32(Irrm_NEAREST
), mkexpr(src
[i
])));
13028 putQRegLane(dd
, 1, mkU64(0));
13030 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, 1+size
);
13031 const HChar
* arrWide
= nameArr_Q_SZ(1, 1+size
+1);
13032 DIP("fcvtxn%s %s.%s, %s.%s\n", bitQ
? "2" : "",
13033 nameQReg128(dd
), arrNarrow
, nameQReg128(nn
), arrWide
);
13037 if (bitU
== 0 && size
<= X01
&& opcode
== BITS5(1,0,1,1,1)) {
13038 /* -------- 0,0x,10111: FCVTL 4s_4h/8h, 2d_2s/4s -------- */
13039 UInt nLanes
= size
== X00
? 4 : 2;
13040 IRType srcTy
= size
== X00
? Ity_F16
: Ity_F32
;
13041 IROp opCvt
= size
== X00
? Iop_F16toF32
: Iop_F32toF64
;
13042 IRTemp src
[nLanes
];
13043 for (UInt i
= 0; i
< nLanes
; i
++) {
13044 src
[i
] = newTemp(srcTy
);
13045 assign(src
[i
], getQRegLane(nn
, nLanes
* bitQ
+ i
, srcTy
));
13047 for (UInt i
= 0; i
< nLanes
; i
++) {
13048 putQRegLane(dd
, i
, unop(opCvt
, mkexpr(src
[i
])));
13050 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, 1+size
);
13051 const HChar
* arrWide
= nameArr_Q_SZ(1, 1+size
+1);
13052 DIP("fcvtl%s %s.%s, %s.%s\n", bitQ
? "2" : "",
13053 nameQReg128(dd
), arrWide
, nameQReg128(nn
), arrNarrow
);
13058 if (opcode
== BITS5(1,1,0,0,0) || opcode
== BITS5(1,1,0,0,1)) {
13059 ix
= 1 + ((((bitU
& 1) << 2) | ((size
& 2) << 0)) | ((opcode
& 1) << 0));
13060 // = 1 + bitU[0]:size[1]:opcode[0]
13061 vassert(ix
>= 1 && ix
<= 8);
13062 if (ix
== 7) ix
= 0;
13065 /* -------- 0,0x,11000 FRINTN 2d_2d, 4s_4s, 2s_2s (1) -------- */
13066 /* -------- 0,0x,11001 FRINTM 2d_2d, 4s_4s, 2s_2s (2) -------- */
13067 /* -------- 0,1x,11000 FRINTP 2d_2d, 4s_4s, 2s_2s (3) -------- */
13068 /* -------- 0,1x,11001 FRINTZ 2d_2d, 4s_4s, 2s_2s (4) -------- */
13069 /* -------- 1,0x,11000 FRINTA 2d_2d, 4s_4s, 2s_2s (5) -------- */
13070 /* -------- 1,0x,11001 FRINTX 2d_2d, 4s_4s, 2s_2s (6) -------- */
13071 /* -------- 1,1x,11000 (apparently unassigned) (7) -------- */
13072 /* -------- 1,1x,11001 FRINTI 2d_2d, 4s_4s, 2s_2s (8) -------- */
13074 FRINTN: tieeven -- !! FIXME KLUDGED !!
13078 FRINTA: tieaway -- !! FIXME KLUDGED !!
13079 FRINTX: per FPCR + "exact = TRUE"
13082 Bool isD
= (size
& 1) == 1;
13083 if (bitQ
== 0 && isD
) return False
; // implied 1d case
13085 IRTemp irrmRM
= mk_get_IR_rounding_mode();
13088 IRTemp irrm
= newTemp(Ity_I32
);
13090 case 1: ch
= 'n'; assign(irrm
, mkU32(Irrm_NEAREST
)); break;
13091 case 2: ch
= 'm'; assign(irrm
, mkU32(Irrm_NegINF
)); break;
13092 case 3: ch
= 'p'; assign(irrm
, mkU32(Irrm_PosINF
)); break;
13093 case 4: ch
= 'z'; assign(irrm
, mkU32(Irrm_ZERO
)); break;
13094 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
13095 case 5: ch
= 'a'; assign(irrm
, mkU32(Irrm_NEAREST
)); break;
13096 // I am unsure about the following, due to the "integral exact"
13097 // description in the manual. What does it mean? (frintx, that is)
13098 case 6: ch
= 'x'; assign(irrm
, mkexpr(irrmRM
)); break;
13099 case 8: ch
= 'i'; assign(irrm
, mkexpr(irrmRM
)); break;
13100 default: vassert(0);
13103 IROp opRND
= isD
? Iop_RoundF64toInt
: Iop_RoundF32toInt
;
13105 for (UInt i
= 0; i
< 2; i
++) {
13106 putQRegLane(dd
, i
, binop(opRND
, mkexpr(irrm
),
13107 getQRegLane(nn
, i
, Ity_F64
)));
13110 UInt n
= bitQ
==1 ? 4 : 2;
13111 for (UInt i
= 0; i
< n
; i
++) {
13112 putQRegLane(dd
, i
, binop(opRND
, mkexpr(irrm
),
13113 getQRegLane(nn
, i
, Ity_F32
)));
13116 putQRegLane(dd
, 1, mkU64(0)); // zero out lanes 2 and 3
13118 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
13119 DIP("frint%c %s.%s, %s.%s\n", ch
,
13120 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
13124 ix
= 0; /*INVALID*/
13126 case BITS5(1,1,0,1,0): ix
= ((size
& 2) == 2) ? 4 : 1; break;
13127 case BITS5(1,1,0,1,1): ix
= ((size
& 2) == 2) ? 5 : 2; break;
13128 case BITS5(1,1,1,0,0): if ((size
& 2) == 0) ix
= 3; break;
13132 /* -------- 0,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
13133 /* -------- 0,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
13134 /* -------- 0,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
13135 /* -------- 0,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
13136 /* -------- 0,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
13137 /* -------- 1,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
13138 /* -------- 1,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
13139 /* -------- 1,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
13140 /* -------- 1,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
13141 /* -------- 1,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
13142 Bool isD
= (size
& 1) == 1;
13143 if (bitQ
== 0 && isD
) return False
; // implied 1d case
13145 IRRoundingMode irrm
= 8; /*impossible*/
13148 case 1: ch
= 'n'; irrm
= Irrm_NEAREST
; break;
13149 case 2: ch
= 'm'; irrm
= Irrm_NegINF
; break;
13150 case 3: ch
= 'a'; irrm
= Irrm_NEAREST
; break; /* kludge? */
13151 case 4: ch
= 'p'; irrm
= Irrm_PosINF
; break;
13152 case 5: ch
= 'z'; irrm
= Irrm_ZERO
; break;
13153 default: vassert(0);
13155 IROp cvt
= Iop_INVALID
;
13157 cvt
= isD
? Iop_F64toI64U
: Iop_F32toI32U
;
13159 cvt
= isD
? Iop_F64toI64S
: Iop_F32toI32S
;
13162 for (UInt i
= 0; i
< 2; i
++) {
13163 putQRegLane(dd
, i
, binop(cvt
, mkU32(irrm
),
13164 getQRegLane(nn
, i
, Ity_F64
)));
13167 UInt n
= bitQ
==1 ? 4 : 2;
13168 for (UInt i
= 0; i
< n
; i
++) {
13169 putQRegLane(dd
, i
, binop(cvt
, mkU32(irrm
),
13170 getQRegLane(nn
, i
, Ity_F32
)));
13173 putQRegLane(dd
, 1, mkU64(0)); // zero out lanes 2 and 3
13175 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
13176 DIP("fcvt%c%c %s.%s, %s.%s\n", ch
, bitU
== 1 ? 'u' : 's',
13177 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
13181 if (size
== X10
&& opcode
== BITS5(1,1,1,0,0)) {
13182 /* -------- 0,10,11100: URECPE 4s_4s, 2s_2s -------- */
13183 /* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */
13184 Bool isREC
= bitU
== 0;
13185 IROp op
= isREC
? Iop_RecipEst32Ux4
: Iop_RSqrtEst32Ux4
;
13186 IRTemp res
= newTempV128();
13187 assign(res
, unop(op
, getQReg128(nn
)));
13188 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
13189 const HChar
* nm
= isREC
? "urecpe" : "ursqrte";
13190 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
13191 DIP("%s %s.%s, %s.%s\n", nm
,
13192 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
13196 if (size
<= X01
&& opcode
== BITS5(1,1,1,0,1)) {
13197 /* -------- 0,0x,11101: SCVTF -------- */
13198 /* -------- 1,0x,11101: UCVTF -------- */
13199 /* 31 28 22 21 15 9 4
13200 0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn
13201 0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn
13203 case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
13205 Bool isQ
= bitQ
== 1;
13206 Bool isU
= bitU
== 1;
13207 Bool isF64
= (size
& 1) == 1;
13208 if (isQ
|| !isF64
) {
13209 IRType tyF
= Ity_INVALID
, tyI
= Ity_INVALID
;
13211 Bool zeroHI
= False
;
13212 const HChar
* arrSpec
= NULL
;
13213 Bool ok
= getLaneInfo_Q_SZ(&tyI
, &tyF
, &nLanes
, &zeroHI
, &arrSpec
,
13215 IROp iop
= isU
? (isF64
? Iop_I64UtoF64
: Iop_I32UtoF32
)
13216 : (isF64
? Iop_I64StoF64
: Iop_I32StoF32
);
13217 IRTemp rm
= mk_get_IR_rounding_mode();
13219 vassert(ok
); /* the 'if' above should ensure this */
13220 for (i
= 0; i
< nLanes
; i
++) {
13222 binop(iop
, mkexpr(rm
), getQRegLane(nn
, i
, tyI
)));
13225 putQRegLane(dd
, 1, mkU64(0));
13227 DIP("%ccvtf %s.%s, %s.%s\n", isU
? 'u' : 's',
13228 nameQReg128(dd
), arrSpec
, nameQReg128(nn
), arrSpec
);
13231 /* else fall through */
13234 if (size
>= X10
&& opcode
== BITS5(1,1,1,0,1)) {
13235 /* -------- 0,1x,11101: FRECPE 2d_2d, 4s_4s, 2s_2s -------- */
13236 /* -------- 1,1x,11101: FRSQRTE 2d_2d, 4s_4s, 2s_2s -------- */
13237 Bool isSQRT
= bitU
== 1;
13238 Bool isD
= (size
& 1) == 1;
13239 IROp op
= isSQRT
? (isD
? Iop_RSqrtEst64Fx2
: Iop_RSqrtEst32Fx4
)
13240 : (isD
? Iop_RecipEst64Fx2
: Iop_RecipEst32Fx4
);
13241 if (bitQ
== 0 && isD
) return False
; // implied 1d case
13242 IRTemp resV
= newTempV128();
13243 assign(resV
, unop(op
, getQReg128(nn
)));
13244 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, resV
));
13245 const HChar
* arr
= bitQ
== 0 ? "2s" : (size
== X11
? "2d" : "4s");
13246 DIP("%s %s.%s, %s.%s\n", isSQRT
? "frsqrte" : "frecpe",
13247 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
13251 if (bitU
== 1 && size
>= X10
&& opcode
== BITS5(1,1,1,1,1)) {
13252 /* -------- 1,1x,11111: FSQRT 2d_2d, 4s_4s, 2s_2s -------- */
13253 Bool isD
= (size
& 1) == 1;
13254 IROp op
= isD
? Iop_Sqrt64Fx2
: Iop_Sqrt32Fx4
;
13255 if (bitQ
== 0 && isD
) return False
; // implied 1d case
13256 IRTemp resV
= newTempV128();
13257 assign(resV
, binop(op
, mkexpr(mk_get_IR_rounding_mode()),
13259 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, resV
));
13260 const HChar
* arr
= bitQ
== 0 ? "2s" : (size
== X11
? "2d" : "4s");
13261 DIP("%s %s.%s, %s.%s\n", "fsqrt",
13262 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
13272 Bool
dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult
* dres
, UInt insn
)
13274 /* 31 28 23 21 20 19 15 11 9 4
13275 0 Q U 01111 size L M m opcode H 0 n d
13276 Decode fields are: u,size,opcode
13277 M is really part of the mm register number. Individual
13278 cases need to inspect L and H though.
13280 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13281 if (INSN(31,31) != 0
13282 || INSN(28,24) != BITS5(0,1,1,1,1) || INSN(10,10) !=0) {
13285 UInt bitQ
= INSN(30,30);
13286 UInt bitU
= INSN(29,29);
13287 UInt size
= INSN(23,22);
13288 UInt bitL
= INSN(21,21);
13289 UInt bitM
= INSN(20,20);
13290 UInt mmLO4
= INSN(19,16);
13291 UInt opcode
= INSN(15,12);
13292 UInt bitH
= INSN(11,11);
13293 UInt nn
= INSN(9,5);
13294 UInt dd
= INSN(4,0);
13296 vassert(bitH
< 2 && bitM
< 2 && bitL
< 2);
13298 if (bitU
== 0 && size
>= X10
13299 && (opcode
== BITS4(0,0,0,1) || opcode
== BITS4(0,1,0,1))) {
13300 /* -------- 0,1x,0001 FMLA 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
13301 /* -------- 0,1x,0101 FMLS 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
13302 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
13303 Bool isD
= (size
& 1) == 1;
13304 Bool isSUB
= opcode
== BITS4(0,1,0,1);
13306 if (!isD
) index
= (bitH
<< 1) | bitL
;
13307 else if (isD
&& bitL
== 0) index
= bitH
;
13308 else return False
; // sz:L == x11 => unallocated encoding
13309 vassert(index
< (isD
? 2 : 4));
13310 IRType ity
= isD
? Ity_F64
: Ity_F32
;
13311 IRTemp elem
= newTemp(ity
);
13312 UInt mm
= (bitM
<< 4) | mmLO4
;
13313 assign(elem
, getQRegLane(mm
, index
, ity
));
13314 IRTemp dupd
= math_DUP_TO_V128(elem
, ity
);
13315 IROp opADD
= isD
? Iop_Add64Fx2
: Iop_Add32Fx4
;
13316 IROp opSUB
= isD
? Iop_Sub64Fx2
: Iop_Sub32Fx4
;
13317 IROp opMUL
= isD
? Iop_Mul64Fx2
: Iop_Mul32Fx4
;
13318 IRTemp rm
= mk_get_IR_rounding_mode();
13319 IRTemp t1
= newTempV128();
13320 IRTemp t2
= newTempV128();
13321 // FIXME: double rounding; use FMA primops instead
13322 assign(t1
, triop(opMUL
, mkexpr(rm
), getQReg128(nn
), mkexpr(dupd
)));
13323 assign(t2
, triop(isSUB
? opSUB
: opADD
,
13324 mkexpr(rm
), getQReg128(dd
), mkexpr(t1
)));
13325 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t2
));
13326 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
13327 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isSUB
? "fmls" : "fmla",
13328 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
),
13329 isD
? 'd' : 's', index
);
13333 if (size
>= X10
&& opcode
== BITS4(1,0,0,1)) {
13334 /* -------- 0,1x,1001 FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
13335 /* -------- 1,1x,1001 FMULX 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
13336 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
13337 Bool isD
= (size
& 1) == 1;
13338 Bool isMULX
= bitU
== 1;
13340 if (!isD
) index
= (bitH
<< 1) | bitL
;
13341 else if (isD
&& bitL
== 0) index
= bitH
;
13342 else return False
; // sz:L == x11 => unallocated encoding
13343 vassert(index
< (isD
? 2 : 4));
13344 IRType ity
= isD
? Ity_F64
: Ity_F32
;
13345 IRTemp elem
= newTemp(ity
);
13346 UInt mm
= (bitM
<< 4) | mmLO4
;
13347 assign(elem
, getQRegLane(mm
, index
, ity
));
13348 IRTemp dupd
= math_DUP_TO_V128(elem
, ity
);
13349 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
13350 IRTemp res
= newTempV128();
13351 assign(res
, triop(isD
? Iop_Mul64Fx2
: Iop_Mul32Fx4
,
13352 mkexpr(mk_get_IR_rounding_mode()),
13353 getQReg128(nn
), mkexpr(dupd
)));
13354 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
13355 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
13356 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n",
13357 isMULX
? "fmulx" : "fmul", nameQReg128(dd
), arr
,
13358 nameQReg128(nn
), arr
, nameQReg128(mm
), isD
? 'd' : 's', index
);
13362 if ((bitU
== 1 && (opcode
== BITS4(0,0,0,0) || opcode
== BITS4(0,1,0,0)))
13363 || (bitU
== 0 && opcode
== BITS4(1,0,0,0))) {
13364 /* -------- 1,xx,0000 MLA s/h variants only -------- */
13365 /* -------- 1,xx,0100 MLS s/h variants only -------- */
13366 /* -------- 0,xx,1000 MUL s/h variants only -------- */
13367 Bool isMLA
= opcode
== BITS4(0,0,0,0);
13368 Bool isMLS
= opcode
== BITS4(0,1,0,0);
13369 UInt mm
= 32; // invalid
13370 UInt ix
= 16; // invalid
13373 return False
; // b case is not allowed
13375 mm
= mmLO4
; ix
= (bitH
<< 2) | (bitL
<< 1) | (bitM
<< 0); break;
13377 mm
= (bitM
<< 4) | mmLO4
; ix
= (bitH
<< 1) | (bitL
<< 0); break;
13379 return False
; // d case is not allowed
13383 vassert(mm
< 32 && ix
< 16);
13384 IROp opMUL
= mkVecMUL(size
);
13385 IROp opADD
= mkVecADD(size
);
13386 IROp opSUB
= mkVecSUB(size
);
13387 HChar ch
= size
== X01
? 'h' : 's';
13388 IRTemp vecM
= math_DUP_VEC_ELEM(getQReg128(mm
), size
, ix
);
13389 IRTemp vecD
= newTempV128();
13390 IRTemp vecN
= newTempV128();
13391 IRTemp res
= newTempV128();
13392 assign(vecD
, getQReg128(dd
));
13393 assign(vecN
, getQReg128(nn
));
13394 IRExpr
* prod
= binop(opMUL
, mkexpr(vecN
), mkexpr(vecM
));
13395 if (isMLA
|| isMLS
) {
13396 assign(res
, binop(isMLA
? opADD
: opSUB
, mkexpr(vecD
), prod
));
13400 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
13401 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
13402 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isMLA
? "mla"
13403 : (isMLS
? "mls" : "mul"),
13404 nameQReg128(dd
), arr
,
13405 nameQReg128(nn
), arr
, nameQReg128(dd
), ch
, ix
);
13409 if (opcode
== BITS4(1,0,1,0)
13410 || opcode
== BITS4(0,0,1,0) || opcode
== BITS4(0,1,1,0)) {
13411 /* -------- 0,xx,1010 SMULL s/h variants only -------- */ // 0 (ks)
13412 /* -------- 1,xx,1010 UMULL s/h variants only -------- */ // 0
13413 /* -------- 0,xx,0010 SMLAL s/h variants only -------- */ // 1
13414 /* -------- 1,xx,0010 UMLAL s/h variants only -------- */ // 1
13415 /* -------- 0,xx,0110 SMLSL s/h variants only -------- */ // 2
13416 /* -------- 1,xx,0110 SMLSL s/h variants only -------- */ // 2
13417 /* Widens, and size refers to the narrowed lanes. */
13420 case BITS4(1,0,1,0): ks
= 0; break;
13421 case BITS4(0,0,1,0): ks
= 1; break;
13422 case BITS4(0,1,1,0): ks
= 2; break;
13423 default: vassert(0);
13425 vassert(ks
>= 0 && ks
<= 2);
13426 Bool isU
= bitU
== 1;
13427 Bool is2
= bitQ
== 1;
13428 UInt mm
= 32; // invalid
13429 UInt ix
= 16; // invalid
13432 return False
; // h_b_b[] case is not allowed
13434 mm
= mmLO4
; ix
= (bitH
<< 2) | (bitL
<< 1) | (bitM
<< 0); break;
13436 mm
= (bitM
<< 4) | mmLO4
; ix
= (bitH
<< 1) | (bitL
<< 0); break;
13438 return False
; // q_d_d[] case is not allowed
13442 vassert(mm
< 32 && ix
< 16);
13443 IRTemp vecN
= newTempV128();
13444 IRTemp vecM
= math_DUP_VEC_ELEM(getQReg128(mm
), size
, ix
);
13445 IRTemp vecD
= newTempV128();
13446 assign(vecN
, getQReg128(nn
));
13447 assign(vecD
, getQReg128(dd
));
13448 IRTemp res
= IRTemp_INVALID
;
13449 math_MULL_ACC(&res
, is2
, isU
, size
, "mas"[ks
],
13450 vecN
, vecM
, ks
== 0 ? IRTemp_INVALID
: vecD
);
13451 putQReg128(dd
, mkexpr(res
));
13452 const HChar
* nm
= ks
== 0 ? "mull" : (ks
== 1 ? "mlal" : "mlsl");
13453 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
13454 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
13455 HChar ch
= size
== X01
? 'h' : 's';
13456 DIP("%c%s%s %s.%s, %s.%s, %s.%c[%u]\n",
13457 isU
? 'u' : 's', nm
, is2
? "2" : "",
13458 nameQReg128(dd
), arrWide
,
13459 nameQReg128(nn
), arrNarrow
, nameQReg128(dd
), ch
, ix
);
13464 && (opcode
== BITS4(1,0,1,1)
13465 || opcode
== BITS4(0,0,1,1) || opcode
== BITS4(0,1,1,1))) {
13466 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
13467 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
13468 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
13469 /* Widens, and size refers to the narrowed lanes. */
13472 case BITS4(1,0,1,1): ks
= 0; break;
13473 case BITS4(0,0,1,1): ks
= 1; break;
13474 case BITS4(0,1,1,1): ks
= 2; break;
13475 default: vassert(0);
13477 vassert(ks
>= 0 && ks
<= 2);
13478 Bool is2
= bitQ
== 1;
13479 UInt mm
= 32; // invalid
13480 UInt ix
= 16; // invalid
13483 return False
; // h_b_b[] case is not allowed
13485 mm
= mmLO4
; ix
= (bitH
<< 2) | (bitL
<< 1) | (bitM
<< 0); break;
13487 mm
= (bitM
<< 4) | mmLO4
; ix
= (bitH
<< 1) | (bitL
<< 0); break;
13489 return False
; // q_d_d[] case is not allowed
13493 vassert(mm
< 32 && ix
< 16);
13494 IRTemp vecN
, vecD
, res
, sat1q
, sat1n
, sat2q
, sat2n
;
13495 vecN
= vecD
= res
= sat1q
= sat1n
= sat2q
= sat2n
= IRTemp_INVALID
;
13496 newTempsV128_2(&vecN
, &vecD
);
13497 assign(vecN
, getQReg128(nn
));
13498 IRTemp vecM
= math_DUP_VEC_ELEM(getQReg128(mm
), size
, ix
);
13499 assign(vecD
, getQReg128(dd
));
13500 math_SQDMULL_ACC(&res
, &sat1q
, &sat1n
, &sat2q
, &sat2n
,
13501 is2
, size
, "mas"[ks
],
13502 vecN
, vecM
, ks
== 0 ? IRTemp_INVALID
: vecD
);
13503 putQReg128(dd
, mkexpr(res
));
13504 vassert(sat1q
!= IRTemp_INVALID
&& sat1n
!= IRTemp_INVALID
);
13505 updateQCFLAGwithDifference(sat1q
, sat1n
);
13506 if (sat2q
!= IRTemp_INVALID
|| sat2n
!= IRTemp_INVALID
) {
13507 updateQCFLAGwithDifference(sat2q
, sat2n
);
13509 const HChar
* nm
= ks
== 0 ? "sqdmull"
13510 : (ks
== 1 ? "sqdmlal" : "sqdmlsl");
13511 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
13512 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
13513 HChar ch
= size
== X01
? 'h' : 's';
13514 DIP("%s%s %s.%s, %s.%s, %s.%c[%u]\n",
13515 nm
, is2
? "2" : "",
13516 nameQReg128(dd
), arrWide
,
13517 nameQReg128(nn
), arrNarrow
, nameQReg128(dd
), ch
, ix
);
13521 if (bitU
== 0 && (opcode
== BITS4(1,1,0,0) || opcode
== BITS4(1,1,0,1))) {
13522 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
13523 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
13524 UInt mm
= 32; // invalid
13525 UInt ix
= 16; // invalid
13528 return False
; // b case is not allowed
13530 mm
= mmLO4
; ix
= (bitH
<< 2) | (bitL
<< 1) | (bitM
<< 0); break;
13532 mm
= (bitM
<< 4) | mmLO4
; ix
= (bitH
<< 1) | (bitL
<< 0); break;
13534 return False
; // q case is not allowed
13538 vassert(mm
< 32 && ix
< 16);
13539 Bool isR
= opcode
== BITS4(1,1,0,1);
13540 IRTemp res
, sat1q
, sat1n
, vN
, vM
;
13541 res
= sat1q
= sat1n
= vN
= vM
= IRTemp_INVALID
;
13542 vN
= newTempV128();
13543 assign(vN
, getQReg128(nn
));
13544 vM
= math_DUP_VEC_ELEM(getQReg128(mm
), size
, ix
);
13545 math_SQDMULH(&res
, &sat1q
, &sat1n
, isR
, size
, vN
, vM
);
13546 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
13547 IROp opZHI
= bitQ
== 0 ? Iop_ZeroHI64ofV128
: Iop_INVALID
;
13548 updateQCFLAGwithDifferenceZHI(sat1q
, sat1n
, opZHI
);
13549 const HChar
* nm
= isR
? "sqrdmulh" : "sqdmulh";
13550 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
13551 HChar ch
= size
== X01
? 'h' : 's';
13552 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm
,
13553 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(dd
), ch
, ix
);
13557 if (bitU
== 1 && (opcode
== BITS4(1,1,0,1) || opcode
== BITS4(1,1,1,1))) {
13558 /* -------- 0,xx,1101 SQRDMLAH s and h variants only -------- */
13559 /* -------- 0,xx,1111 SQRDMLSH s and h variants only -------- */
13560 UInt mm
= 32; // invalid
13561 UInt ix
= 16; // invalid
13564 return False
; // b case is not allowed
13566 mm
= mmLO4
; ix
= (bitH
<< 2) | (bitL
<< 1) | (bitM
<< 0); break;
13568 mm
= (bitM
<< 4) | mmLO4
; ix
= (bitH
<< 1) | (bitL
<< 0); break;
13570 return False
; // d case is not allowed
13574 vassert(mm
< 32 && ix
< 16);
13576 IRTemp res
, res_nosat
, vD
, vN
, vM
;
13577 res
= res_nosat
= vD
= vN
= vM
= IRTemp_INVALID
;
13578 newTempsV128_2(&vD
, &vN
);
13579 assign(vD
, getQReg128(dd
));
13580 assign(vN
, getQReg128(nn
));
13582 vM
= math_DUP_VEC_ELEM(getQReg128(mm
), size
, ix
);
13583 Bool isAdd
= opcode
== BITS4(1,1,0,1);
13584 math_SQRDMLAH(&res
, &res_nosat
, isAdd
, size
, vD
, vN
, vM
);
13585 IROp opZHI
= bitQ
== 0 ? Iop_ZeroHI64ofV128
: Iop_INVALID
;
13586 updateQCFLAGwithDifferenceZHI(res
, res_nosat
, opZHI
);
13587 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
13589 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
13590 const HChar
* nm
= isAdd
? "sqrdmlah" : "sqrdmlsh";
13591 HChar ch
= size
== X01
? 'h' : 's';
13592 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm
,
13593 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), ch
, ix
);
13603 Bool
dis_AdvSIMD_crypto_aes(/*MB_OUT*/DisResult
* dres
, UInt insn
)
13605 /* 31 23 21 16 11 9 4
13606 0100 1110 size 10100 opcode 10 n d
13607 Decode fields are: size,opcode
13608 Size is always 00 in ARMv8, it appears.
13610 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13611 if (INSN(31,24) != BITS8(0,1,0,0,1,1,1,0)
13612 || INSN(21,17) != BITS5(1,0,1,0,0) || INSN(11,10) != BITS2(1,0)) {
13615 UInt size
= INSN(23,22);
13616 UInt opcode
= INSN(16,12);
13617 UInt nn
= INSN(9,5);
13618 UInt dd
= INSN(4,0);
13620 if (size
== BITS2(0,0)
13621 && (opcode
== BITS5(0,0,1,0,0) || opcode
== BITS5(0,0,1,0,1))) {
13622 /* -------- 00,00100: AESE Vd.16b, Vn.16b -------- */
13623 /* -------- 00,00101: AESD Vd.16b, Vn.16b -------- */
13624 Bool isD
= opcode
== BITS5(0,0,1,0,1);
13625 IRTemp op1
= newTemp(Ity_V128
);
13626 IRTemp op2
= newTemp(Ity_V128
);
13627 IRTemp xord
= newTemp(Ity_V128
);
13628 IRTemp res
= newTemp(Ity_V128
);
13629 void* helper
= isD
? &arm64g_dirtyhelper_AESD
13630 : &arm64g_dirtyhelper_AESE
;
13631 const HChar
* hname
= isD
? "arm64g_dirtyhelper_AESD"
13632 : "arm64g_dirtyhelper_AESE";
13633 assign(op1
, getQReg128(dd
));
13634 assign(op2
, getQReg128(nn
));
13635 assign(xord
, binop(Iop_XorV128
, mkexpr(op1
), mkexpr(op2
)));
13637 = unsafeIRDirty_1_N( res
, 0/*regparms*/, hname
, helper
,
13640 unop(Iop_V128HIto64
, mkexpr(xord
)),
13641 unop(Iop_V128to64
, mkexpr(xord
)) ) );
13642 stmt(IRStmt_Dirty(di
));
13643 putQReg128(dd
, mkexpr(res
));
13644 DIP("aes%c %s.16b, %s.16b\n", isD
? 'd' : 'e',
13645 nameQReg128(dd
), nameQReg128(nn
));
13649 if (size
== BITS2(0,0)
13650 && (opcode
== BITS5(0,0,1,1,0) || opcode
== BITS5(0,0,1,1,1))) {
13651 /* -------- 00,00110: AESMC Vd.16b, Vn.16b -------- */
13652 /* -------- 00,00111: AESIMC Vd.16b, Vn.16b -------- */
13653 Bool isI
= opcode
== BITS5(0,0,1,1,1);
13654 IRTemp src
= newTemp(Ity_V128
);
13655 IRTemp res
= newTemp(Ity_V128
);
13656 void* helper
= isI
? &arm64g_dirtyhelper_AESIMC
13657 : &arm64g_dirtyhelper_AESMC
;
13658 const HChar
* hname
= isI
? "arm64g_dirtyhelper_AESIMC"
13659 : "arm64g_dirtyhelper_AESMC";
13660 assign(src
, getQReg128(nn
));
13662 = unsafeIRDirty_1_N( res
, 0/*regparms*/, hname
, helper
,
13665 unop(Iop_V128HIto64
, mkexpr(src
)),
13666 unop(Iop_V128to64
, mkexpr(src
)) ) );
13667 stmt(IRStmt_Dirty(di
));
13668 putQReg128(dd
, mkexpr(res
));
13669 DIP("aes%s %s.16b, %s.16b\n", isI
? "imc" : "mc",
13670 nameQReg128(dd
), nameQReg128(nn
));
13680 Bool
dis_AdvSIMD_crypto_three_reg_sha(/*MB_OUT*/DisResult
* dres
, UInt insn
)
13682 /* 31 28 23 21 20 15 14 11 9 4
13683 0101 1110 sz 0 m 0 opc 00 n d
13684 Decode fields are: sz,opc
13686 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13687 if (INSN(31,24) != BITS8(0,1,0,1,1,1,1,0) || INSN(21,21) != 0
13688 || INSN(15,15) != 0 || INSN(11,10) != BITS2(0,0)) {
13691 UInt sz
= INSN(23,22);
13692 UInt mm
= INSN(20,16);
13693 UInt opc
= INSN(14,12);
13694 UInt nn
= INSN(9,5);
13695 UInt dd
= INSN(4,0);
13696 if (sz
== BITS2(0,0) && opc
<= BITS3(1,1,0)) {
13697 /* -------- 00,000 SHA1C Qd, Sn, Vm.4S -------- */
13698 /* -------- 00,001 SHA1P Qd, Sn, Vm.4S -------- */
13699 /* -------- 00,010 SHA1M Qd, Sn, Vm.4S -------- */
13700 /* -------- 00,011 SHA1SU0 Vd.4S, Vn.4S, Vm.4S -------- */
13701 /* -------- 00,100 SHA256H Qd, Qn, Vm.4S -------- */
13702 /* -------- 00,101 SHA256H2 Qd, Qn, Vm.4S -------- */
13703 /* -------- 00,110 SHA256SU1 Vd.4S, Vn.4S, Vm.4S -------- */
13705 const HChar
* inames
[7]
13706 = { "sha1c", "sha1p", "sha1m", "sha1su0",
13707 "sha256h", "sha256h2", "sha256su1" };
13708 void(*helpers
[7])(V128
*,ULong
,ULong
,ULong
,ULong
,ULong
,ULong
)
13709 = { &arm64g_dirtyhelper_SHA1C
, &arm64g_dirtyhelper_SHA1P
,
13710 &arm64g_dirtyhelper_SHA1M
, &arm64g_dirtyhelper_SHA1SU0
,
13711 &arm64g_dirtyhelper_SHA256H
, &arm64g_dirtyhelper_SHA256H2
,
13712 &arm64g_dirtyhelper_SHA256SU1
};
13713 const HChar
* hnames
[7]
13714 = { "arm64g_dirtyhelper_SHA1C", "arm64g_dirtyhelper_SHA1P",
13715 "arm64g_dirtyhelper_SHA1M", "arm64g_dirtyhelper_SHA1SU0",
13716 "arm64g_dirtyhelper_SHA256H", "arm64g_dirtyhelper_SHA256H2",
13717 "arm64g_dirtyhelper_SHA256SU1" };
13718 IRTemp vD
= newTemp(Ity_V128
);
13719 IRTemp vN
= newTemp(Ity_V128
);
13720 IRTemp vM
= newTemp(Ity_V128
);
13721 IRTemp vDhi
= newTemp(Ity_I64
);
13722 IRTemp vDlo
= newTemp(Ity_I64
);
13723 IRTemp vNhiPre
= newTemp(Ity_I64
);
13724 IRTemp vNloPre
= newTemp(Ity_I64
);
13725 IRTemp vNhi
= newTemp(Ity_I64
);
13726 IRTemp vNlo
= newTemp(Ity_I64
);
13727 IRTemp vMhi
= newTemp(Ity_I64
);
13728 IRTemp vMlo
= newTemp(Ity_I64
);
13729 assign(vD
, getQReg128(dd
));
13730 assign(vN
, getQReg128(nn
));
13731 assign(vM
, getQReg128(mm
));
13732 assign(vDhi
, unop(Iop_V128HIto64
, mkexpr(vD
)));
13733 assign(vDlo
, unop(Iop_V128to64
, mkexpr(vD
)));
13734 assign(vNhiPre
, unop(Iop_V128HIto64
, mkexpr(vN
)));
13735 assign(vNloPre
, unop(Iop_V128to64
, mkexpr(vN
)));
13736 assign(vMhi
, unop(Iop_V128HIto64
, mkexpr(vM
)));
13737 assign(vMlo
, unop(Iop_V128to64
, mkexpr(vM
)));
13738 /* Mask off any bits of the N register operand that aren't actually
13739 needed, so that Memcheck doesn't complain unnecessarily. */
13741 case BITS3(0,0,0): case BITS3(0,0,1): case BITS3(0,1,0):
13742 assign(vNhi
, mkU64(0));
13743 assign(vNlo
, unop(Iop_32Uto64
, unop(Iop_64to32
, mkexpr(vNloPre
))));
13745 case BITS3(0,1,1): case BITS3(1,0,0):
13746 case BITS3(1,0,1): case BITS3(1,1,0):
13747 assign(vNhi
, mkexpr(vNhiPre
));
13748 assign(vNlo
, mkexpr(vNloPre
));
13753 IRTemp res
= newTemp(Ity_V128
);
13755 = unsafeIRDirty_1_N( res
, 0/*regparms*/, hnames
[opc
], helpers
[opc
],
13758 mkexpr(vDhi
), mkexpr(vDlo
), mkexpr(vNhi
),
13759 mkexpr(vNlo
), mkexpr(vMhi
), mkexpr(vMlo
)));
13760 stmt(IRStmt_Dirty(di
));
13761 putQReg128(dd
, mkexpr(res
));
13763 case BITS3(0,0,0): case BITS3(0,0,1): case BITS3(0,1,0):
13764 DIP("%s q%u, s%u, v%u.4s\n", inames
[opc
], dd
, nn
, mm
);
13766 case BITS3(0,1,1): case BITS3(1,1,0):
13767 DIP("%s v%u.4s, v%u.4s, v%u.4s\n", inames
[opc
], dd
, nn
, mm
);
13769 case BITS3(1,0,0): case BITS3(1,0,1):
13770 DIP("%s q%u, q%u, v%u.4s\n", inames
[opc
], dd
, nn
, mm
);
13784 Bool
dis_AdvSIMD_crypto_two_reg_sha(/*MB_OUT*/DisResult
* dres
, UInt insn
)
13786 /* 31 28 23 21 16 11 9 4
13787 0101 1110 sz 10100 opc 10 n d
13788 Decode fields are: sz,opc
13790 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13791 if (INSN(31,24) != BITS8(0,1,0,1,1,1,1,0)
13792 || INSN(21,17) != BITS5(1,0,1,0,0) || INSN(11,10) != BITS2(1,0)) {
13795 UInt sz
= INSN(23,22);
13796 UInt opc
= INSN(16,12);
13797 UInt nn
= INSN(9,5);
13798 UInt dd
= INSN(4,0);
13799 if (sz
== BITS2(0,0) && opc
<= BITS5(0,0,0,1,0)) {
13800 /* -------- 00,00000 SHA1H Sd, Sn -------- */
13801 /* -------- 00,00001 SHA1SU1 Vd.4S, Vn.4S -------- */
13802 /* -------- 00,00010 SHA256SU0 Vd.4S, Vn.4S -------- */
13804 const HChar
* inames
[3] = { "sha1h", "sha1su1", "sha256su0" };
13805 IRTemp vD
= newTemp(Ity_V128
);
13806 IRTemp vN
= newTemp(Ity_V128
);
13807 IRTemp vDhi
= newTemp(Ity_I64
);
13808 IRTemp vDlo
= newTemp(Ity_I64
);
13809 IRTemp vNhi
= newTemp(Ity_I64
);
13810 IRTemp vNlo
= newTemp(Ity_I64
);
13811 assign(vD
, getQReg128(dd
));
13812 assign(vN
, getQReg128(nn
));
13813 assign(vDhi
, unop(Iop_V128HIto64
, mkexpr(vD
)));
13814 assign(vDlo
, unop(Iop_V128to64
, mkexpr(vD
)));
13815 assign(vNhi
, unop(Iop_V128HIto64
, mkexpr(vN
)));
13816 assign(vNlo
, unop(Iop_V128to64
, mkexpr(vN
)));
13817 /* Mask off any bits of the N register operand that aren't actually
13818 needed, so that Memcheck doesn't complain unnecessarily. Also
13819 construct the calls, given that the helper functions don't take
13820 the same number of arguments. */
13821 IRDirty
* di
= NULL
;
13822 IRTemp res
= newTemp(Ity_V128
);
13824 case BITS5(0,0,0,0,0): {
13825 IRExpr
* vNloMasked
= unop(Iop_32Uto64
,
13826 unop(Iop_64to32
, mkexpr(vNlo
)));
13827 di
= unsafeIRDirty_1_N( res
, 0/*regparms*/,
13828 "arm64g_dirtyhelper_SHA1H",
13829 &arm64g_dirtyhelper_SHA1H
,
13832 mkU64(0), vNloMasked
) );
13835 case BITS5(0,0,0,0,1):
13836 di
= unsafeIRDirty_1_N( res
, 0/*regparms*/,
13837 "arm64g_dirtyhelper_SHA1SU1",
13838 &arm64g_dirtyhelper_SHA1SU1
,
13841 mkexpr(vDhi
), mkexpr(vDlo
),
13842 mkexpr(vNhi
), mkexpr(vNlo
)) );
13844 case BITS5(0,0,0,1,0):
13845 di
= unsafeIRDirty_1_N( res
, 0/*regparms*/,
13846 "arm64g_dirtyhelper_SHA256SU0",
13847 &arm64g_dirtyhelper_SHA256SU0
,
13850 mkexpr(vDhi
), mkexpr(vDlo
),
13851 mkexpr(vNhi
), mkexpr(vNlo
)) );
13856 stmt(IRStmt_Dirty(di
));
13857 putQReg128(dd
, mkexpr(res
));
13859 case BITS5(0,0,0,0,0):
13860 DIP("%s s%u, s%u\n", inames
[opc
], dd
, nn
);
13862 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,0):
13863 DIP("%s v%u.4s, v%u.4s\n", inames
[opc
], dd
, nn
);
13877 Bool
dis_AdvSIMD_fp_compare(/*MB_OUT*/DisResult
* dres
, UInt insn
)
13879 /* 31 28 23 21 20 15 13 9 4
13880 000 11110 ty 1 m op 1000 n opcode2
13881 The first 3 bits are really "M 0 S", but M and S are always zero.
13882 Decode fields are: ty,op,opcode2
13884 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13885 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13886 || INSN(21,21) != 1 || INSN(13,10) != BITS4(1,0,0,0)) {
13889 UInt ty
= INSN(23,22);
13890 UInt mm
= INSN(20,16);
13891 UInt op
= INSN(15,14);
13892 UInt nn
= INSN(9,5);
13893 UInt opcode2
= INSN(4,0);
13896 if (ty
<= X01
&& op
== X00
13897 && (opcode2
& BITS5(0,0,1,1,1)) == BITS5(0,0,0,0,0)) {
13898 /* -------- 0x,00,00000 FCMP d_d, s_s -------- */
13899 /* -------- 0x,00,01000 FCMP d_#0, s_#0 -------- */
13900 /* -------- 0x,00,10000 FCMPE d_d, s_s -------- */
13901 /* -------- 0x,00,11000 FCMPE d_#0, s_#0 -------- */
13903 000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm
13904 000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0
13905 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm
13906 000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0
13908 000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm
13909 000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0
13910 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm
13911 000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0
13913 FCMPE generates Invalid Operation exn if either arg is any kind
13914 of NaN. FCMP generates Invalid Operation exn if either arg is a
13915 signalling NaN. We ignore this detail here and produce the same
13918 Bool isD
= (ty
& 1) == 1;
13919 Bool isCMPE
= (opcode2
& 16) == 16;
13920 Bool cmpZero
= (opcode2
& 8) == 8;
13921 IRType ity
= isD
? Ity_F64
: Ity_F32
;
13923 if (cmpZero
&& mm
!= 0) valid
= False
;
13925 IRTemp argL
= newTemp(ity
);
13926 IRTemp argR
= newTemp(ity
);
13927 IRTemp irRes
= newTemp(Ity_I32
);
13928 assign(argL
, getQRegLO(nn
, ity
));
13931 ? (IRExpr_Const(isD
? IRConst_F64i(0) : IRConst_F32i(0)))
13932 : getQRegLO(mm
, ity
));
13933 assign(irRes
, binop(isD
? Iop_CmpF64
: Iop_CmpF32
,
13934 mkexpr(argL
), mkexpr(argR
)));
13935 IRTemp nzcv
= mk_convert_IRCmpF64Result_to_NZCV(irRes
);
13936 IRTemp nzcv_28x0
= newTemp(Ity_I64
);
13937 assign(nzcv_28x0
, binop(Iop_Shl64
, mkexpr(nzcv
), mkU8(28)));
13938 setFlags_COPY(nzcv_28x0
);
13939 DIP("fcmp%s %s, %s\n", isCMPE
? "e" : "", nameQRegLO(nn
, ity
),
13940 cmpZero
? "#0.0" : nameQRegLO(mm
, ity
));
13952 Bool
dis_AdvSIMD_fp_conditional_compare(/*MB_OUT*/DisResult
* dres
, UInt insn
)
13954 /* 31 28 23 21 20 15 11 9 4 3
13955 000 11110 ty 1 m cond 01 n op nzcv
13956 The first 3 bits are really "M 0 S", but M and S are always zero.
13957 Decode fields are: ty,op
13959 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13960 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13961 || INSN(21,21) != 1 || INSN(11,10) != BITS2(0,1)) {
13964 UInt ty
= INSN(23,22);
13965 UInt mm
= INSN(20,16);
13966 UInt cond
= INSN(15,12);
13967 UInt nn
= INSN(9,5);
13968 UInt op
= INSN(4,4);
13969 UInt nzcv
= INSN(3,0);
13970 vassert(ty
< 4 && op
<= 1);
13972 if (ty
<= BITS2(0,1)) {
13973 /* -------- 00,0 FCCMP s_s -------- */
13974 /* -------- 00,1 FCCMPE s_s -------- */
13975 /* -------- 01,0 FCCMP d_d -------- */
13976 /* -------- 01,1 FCCMPE d_d -------- */
13978 /* FCCMPE generates Invalid Operation exn if either arg is any kind
13979 of NaN. FCCMP generates Invalid Operation exn if either arg is a
13980 signalling NaN. We ignore this detail here and produce the same
13983 Bool isD
= (ty
& 1) == 1;
13984 Bool isCMPE
= op
== 1;
13985 IRType ity
= isD
? Ity_F64
: Ity_F32
;
13986 IRTemp argL
= newTemp(ity
);
13987 IRTemp argR
= newTemp(ity
);
13988 IRTemp irRes
= newTemp(Ity_I32
);
13989 assign(argL
, getQRegLO(nn
, ity
));
13990 assign(argR
, getQRegLO(mm
, ity
));
13991 assign(irRes
, binop(isD
? Iop_CmpF64
: Iop_CmpF32
,
13992 mkexpr(argL
), mkexpr(argR
)));
13993 IRTemp condT
= newTemp(Ity_I1
);
13994 assign(condT
, unop(Iop_64to1
, mk_arm64g_calculate_condition(cond
)));
13995 IRTemp nzcvT
= mk_convert_IRCmpF64Result_to_NZCV(irRes
);
13997 IRTemp nzcvT_28x0
= newTemp(Ity_I64
);
13998 assign(nzcvT_28x0
, binop(Iop_Shl64
, mkexpr(nzcvT
), mkU8(28)));
14000 IRExpr
* nzcvF_28x0
= mkU64(((ULong
)nzcv
) << 28);
14002 IRTemp nzcv_28x0
= newTemp(Ity_I64
);
14003 assign(nzcv_28x0
, IRExpr_ITE(mkexpr(condT
),
14004 mkexpr(nzcvT_28x0
), nzcvF_28x0
));
14005 setFlags_COPY(nzcv_28x0
);
14006 DIP("fccmp%s %s, %s, #%u, %s\n", isCMPE
? "e" : "",
14007 nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
), nzcv
, nameCC(cond
));
14017 Bool
dis_AdvSIMD_fp_conditional_select(/*MB_OUT*/DisResult
* dres
, UInt insn
)
14019 /* 31 23 21 20 15 11 9 5
14020 000 11110 ty 1 m cond 11 n d
14021 The first 3 bits are really "M 0 S", but M and S are always zero.
14024 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14025 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) || INSN(21,21) != 1
14026 || INSN(11,10) != BITS2(1,1)) {
14029 UInt ty
= INSN(23,22);
14030 UInt mm
= INSN(20,16);
14031 UInt cond
= INSN(15,12);
14032 UInt nn
= INSN(9,5);
14033 UInt dd
= INSN(4,0);
14035 /* -------- 00: FCSEL s_s -------- */
14036 /* -------- 00: FCSEL d_d -------- */
14037 IRType ity
= ty
== X01
? Ity_F64
: Ity_F32
;
14038 IRTemp srcT
= newTemp(ity
);
14039 IRTemp srcF
= newTemp(ity
);
14040 IRTemp res
= newTemp(ity
);
14041 assign(srcT
, getQRegLO(nn
, ity
));
14042 assign(srcF
, getQRegLO(mm
, ity
));
14043 assign(res
, IRExpr_ITE(
14044 unop(Iop_64to1
, mk_arm64g_calculate_condition(cond
)),
14045 mkexpr(srcT
), mkexpr(srcF
)));
14046 putQReg128(dd
, mkV128(0x0000));
14047 putQRegLO(dd
, mkexpr(res
));
14048 DIP("fcsel %s, %s, %s, %s\n",
14049 nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
),
14059 Bool
dis_AdvSIMD_fp_data_proc_1_source(/*MB_OUT*/DisResult
* dres
, UInt insn
)
14061 /* 31 28 23 21 20 14 9 4
14062 000 11110 ty 1 opcode 10000 n d
14063 The first 3 bits are really "M 0 S", but M and S are always zero.
14064 Decode fields: ty,opcode
14066 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14067 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
14068 || INSN(21,21) != 1 || INSN(14,10) != BITS5(1,0,0,0,0)) {
14071 UInt ty
= INSN(23,22);
14072 UInt opcode
= INSN(20,15);
14073 UInt nn
= INSN(9,5);
14074 UInt dd
= INSN(4,0);
14076 if (ty
<= X01
&& opcode
<= BITS6(0,0,0,0,1,1)) {
14077 /* -------- 0x,000000: FMOV d_d, s_s -------- */
14078 /* -------- 0x,000001: FABS d_d, s_s -------- */
14079 /* -------- 0x,000010: FNEG d_d, s_s -------- */
14080 /* -------- 0x,000011: FSQRT d_d, s_s -------- */
14081 IRType ity
= ty
== X01
? Ity_F64
: Ity_F32
;
14082 IRTemp src
= newTemp(ity
);
14083 IRTemp res
= newTemp(ity
);
14084 const HChar
* nm
= "??";
14085 assign(src
, getQRegLO(nn
, ity
));
14087 case BITS6(0,0,0,0,0,0):
14088 nm
= "fmov"; assign(res
, mkexpr(src
)); break;
14089 case BITS6(0,0,0,0,0,1):
14090 nm
= "fabs"; assign(res
, unop(mkABSF(ity
), mkexpr(src
))); break;
14091 case BITS6(0,0,0,0,1,0):
14092 nm
= "fabs"; assign(res
, unop(mkNEGF(ity
), mkexpr(src
))); break;
14093 case BITS6(0,0,0,0,1,1):
14095 assign(res
, binop(mkSQRTF(ity
),
14096 mkexpr(mk_get_IR_rounding_mode()),
14097 mkexpr(src
))); break;
14101 putQReg128(dd
, mkV128(0x0000));
14102 putQRegLO(dd
, mkexpr(res
));
14103 DIP("%s %s, %s\n", nm
, nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
));
14107 if ( (ty
== X11
&& (opcode
== BITS6(0,0,0,1,0,0)
14108 || opcode
== BITS6(0,0,0,1,0,1)))
14109 || (ty
== X00
&& (opcode
== BITS6(0,0,0,1,1,1)
14110 || opcode
== BITS6(0,0,0,1,0,1)))
14111 || (ty
== X01
&& (opcode
== BITS6(0,0,0,1,1,1)
14112 || opcode
== BITS6(0,0,0,1,0,0)))) {
14113 /* -------- 11,000100: FCVT s_h -------- */
14114 /* -------- 11,000101: FCVT d_h -------- */
14115 /* -------- 00,000111: FCVT h_s -------- */
14116 /* -------- 00,000101: FCVT d_s -------- */
14117 /* -------- 01,000111: FCVT h_d -------- */
14118 /* -------- 01,000100: FCVT s_d -------- */
14119 /* 31 23 21 16 14 9 4
14120 000 11110 11 10001 00 10000 n d FCVT Sd, Hn
14121 --------- 11 ----- 01 --------- FCVT Dd, Hn
14122 --------- 00 ----- 11 --------- FCVT Hd, Sn
14123 --------- 00 ----- 01 --------- FCVT Dd, Sn
14124 --------- 01 ----- 11 --------- FCVT Hd, Dn
14125 --------- 01 ----- 00 --------- FCVT Sd, Dn
14126 Rounding, when dst is smaller than src, is per the FPCR.
14129 UInt b1615
= opcode
& BITS2(1,1);
14130 switch ((b2322
<< 2) | b1615
) {
14131 case BITS4(0,0,0,1): // S -> D
14132 case BITS4(1,1,0,1): { // H -> D
14133 Bool srcIsH
= b2322
== BITS2(1,1);
14134 IRType srcTy
= srcIsH
? Ity_F16
: Ity_F32
;
14135 IRTemp res
= newTemp(Ity_F64
);
14136 assign(res
, unop(srcIsH
? Iop_F16toF64
: Iop_F32toF64
,
14137 getQRegLO(nn
, srcTy
)));
14138 putQReg128(dd
, mkV128(0x0000));
14139 putQRegLO(dd
, mkexpr(res
));
14140 DIP("fcvt %s, %s\n",
14141 nameQRegLO(dd
, Ity_F64
), nameQRegLO(nn
, srcTy
));
14144 case BITS4(0,1,0,0): // D -> S
14145 case BITS4(0,1,1,1): { // D -> H
14146 Bool dstIsH
= b1615
== BITS2(1,1);
14147 IRType dstTy
= dstIsH
? Ity_F16
: Ity_F32
;
14148 IRTemp res
= newTemp(dstTy
);
14149 assign(res
, binop(dstIsH
? Iop_F64toF16
: Iop_F64toF32
,
14150 mkexpr(mk_get_IR_rounding_mode()),
14151 getQRegLO(nn
, Ity_F64
)));
14152 putQReg128(dd
, mkV128(0x0000));
14153 putQRegLO(dd
, mkexpr(res
));
14154 DIP("fcvt %s, %s\n",
14155 nameQRegLO(dd
, dstTy
), nameQRegLO(nn
, Ity_F64
));
14158 case BITS4(0,0,1,1): // S -> H
14159 case BITS4(1,1,0,0): { // H -> S
14160 Bool toH
= b1615
== BITS2(1,1);
14161 IRType srcTy
= toH
? Ity_F32
: Ity_F16
;
14162 IRType dstTy
= toH
? Ity_F16
: Ity_F32
;
14163 IRTemp res
= newTemp(dstTy
);
14165 assign(res
, binop(Iop_F32toF16
,
14166 mkexpr(mk_get_IR_rounding_mode()),
14167 getQRegLO(nn
, srcTy
)));
14170 assign(res
, unop(Iop_F16toF32
,
14171 getQRegLO(nn
, srcTy
)));
14173 putQReg128(dd
, mkV128(0x0000));
14174 putQRegLO(dd
, mkexpr(res
));
14175 DIP("fcvt %s, %s\n",
14176 nameQRegLO(dd
, dstTy
), nameQRegLO(nn
, srcTy
));
14182 /* else unhandled */
14187 && opcode
>= BITS6(0,0,1,0,0,0) && opcode
<= BITS6(0,0,1,1,1,1)
14188 && opcode
!= BITS6(0,0,1,1,0,1)) {
14189 /* -------- 0x,001000 FRINTN d_d, s_s -------- */
14190 /* -------- 0x,001001 FRINTP d_d, s_s -------- */
14191 /* -------- 0x,001010 FRINTM d_d, s_s -------- */
14192 /* -------- 0x,001011 FRINTZ d_d, s_s -------- */
14193 /* -------- 0x,001100 FRINTA d_d, s_s -------- */
14194 /* -------- 0x,001110 FRINTX d_d, s_s -------- */
14195 /* -------- 0x,001111 FRINTI d_d, s_s -------- */
14196 /* 31 23 21 17 14 9 4
14197 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR)
14199 x==0 => S-registers, x==1 => D-registers
14200 rm (17:15) encodings:
14201 111 per FPCR (FRINTI)
14205 000 tieeven (FRINTN) -- !! FIXME KLUDGED !!
14206 100 tieaway (FRINTA) -- !! FIXME KLUDGED !!
14207 110 per FPCR + "exact = TRUE" (FRINTX)
14210 Bool isD
= (ty
& 1) == 1;
14211 UInt rm
= opcode
& BITS6(0,0,0,1,1,1);
14212 IRType ity
= isD
? Ity_F64
: Ity_F32
;
14213 IRExpr
* irrmE
= NULL
;
14216 case BITS3(0,1,1): ch
= 'z'; irrmE
= mkU32(Irrm_ZERO
); break;
14217 case BITS3(0,1,0): ch
= 'm'; irrmE
= mkU32(Irrm_NegINF
); break;
14218 case BITS3(0,0,1): ch
= 'p'; irrmE
= mkU32(Irrm_PosINF
); break;
14219 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
14220 case BITS3(1,0,0): ch
= 'a'; irrmE
= mkU32(Irrm_NEAREST
); break;
14221 // I am unsure about the following, due to the "integral exact"
14222 // description in the manual. What does it mean? (frintx, that is)
14224 ch
= 'x'; irrmE
= mkexpr(mk_get_IR_rounding_mode()); break;
14226 ch
= 'i'; irrmE
= mkexpr(mk_get_IR_rounding_mode()); break;
14227 // The following is a kludge. There's no Irrm_ value to represent
14228 // this ("to nearest, with ties to even")
14229 case BITS3(0,0,0): ch
= 'n'; irrmE
= mkU32(Irrm_NEAREST
); break;
14233 IRTemp src
= newTemp(ity
);
14234 IRTemp dst
= newTemp(ity
);
14235 assign(src
, getQRegLO(nn
, ity
));
14236 assign(dst
, binop(isD
? Iop_RoundF64toInt
: Iop_RoundF32toInt
,
14237 irrmE
, mkexpr(src
)));
14238 putQReg128(dd
, mkV128(0x0000));
14239 putQRegLO(dd
, mkexpr(dst
));
14240 DIP("frint%c %s, %s\n",
14241 ch
, nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
));
14253 Bool
dis_AdvSIMD_fp_data_proc_2_source(/*MB_OUT*/DisResult
* dres
, UInt insn
)
14255 /* 31 28 23 21 20 15 11 9 4
14256 000 11110 ty 1 m opcode 10 n d
14257 The first 3 bits are really "M 0 S", but M and S are always zero.
14258 Decode fields: ty, opcode
14260 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14261 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
14262 || INSN(21,21) != 1 || INSN(11,10) != BITS2(1,0)) {
14265 UInt ty
= INSN(23,22);
14266 UInt mm
= INSN(20,16);
14267 UInt opcode
= INSN(15,12);
14268 UInt nn
= INSN(9,5);
14269 UInt dd
= INSN(4,0);
14271 if (ty
<= X01
&& opcode
<= BITS4(0,1,1,1)) {
14272 /* ------- 0x,0000: FMUL d_d, s_s ------- */
14273 /* ------- 0x,0001: FDIV d_d, s_s ------- */
14274 /* ------- 0x,0010: FADD d_d, s_s ------- */
14275 /* ------- 0x,0011: FSUB d_d, s_s ------- */
14276 /* ------- 0x,0100: FMAX d_d, s_s ------- */
14277 /* ------- 0x,0101: FMIN d_d, s_s ------- */
14278 /* ------- 0x,0110: FMAXNM d_d, s_s ------- (FIXME KLUDGED) */
14279 /* ------- 0x,0111: FMINNM d_d, s_s ------- (FIXME KLUDGED) */
14280 IRType ity
= ty
== X00
? Ity_F32
: Ity_F64
;
14281 IROp iop
= Iop_INVALID
;
14282 const HChar
* nm
= "???";
14284 case BITS4(0,0,0,0): nm
= "fmul"; iop
= mkMULF(ity
); break;
14285 case BITS4(0,0,0,1): nm
= "fdiv"; iop
= mkDIVF(ity
); break;
14286 case BITS4(0,0,1,0): nm
= "fadd"; iop
= mkADDF(ity
); break;
14287 case BITS4(0,0,1,1): nm
= "fsub"; iop
= mkSUBF(ity
); break;
14288 case BITS4(0,1,0,0): nm
= "fmax"; iop
= mkVecMAXF(ty
+2); break;
14289 case BITS4(0,1,0,1): nm
= "fmin"; iop
= mkVecMINF(ty
+2); break;
14290 case BITS4(0,1,1,0): nm
= "fmaxnm"; iop
= mkVecMAXF(ty
+2); break; //!!
14291 case BITS4(0,1,1,1): nm
= "fminnm"; iop
= mkVecMINF(ty
+2); break; //!!
14292 default: vassert(0);
14294 if (opcode
<= BITS4(0,0,1,1)) {
14295 // This is really not good code. TODO: avoid width-changing
14296 IRTemp res
= newTemp(ity
);
14297 assign(res
, triop(iop
, mkexpr(mk_get_IR_rounding_mode()),
14298 getQRegLO(nn
, ity
), getQRegLO(mm
, ity
)));
14299 putQReg128(dd
, mkV128(0));
14300 putQRegLO(dd
, mkexpr(res
));
14302 putQReg128(dd
, unop(mkVecZEROHIxxOFV128(ty
+2),
14303 binop(iop
, getQReg128(nn
), getQReg128(mm
))));
14305 DIP("%s %s, %s, %s\n",
14306 nm
, nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
));
14310 if (ty
<= X01
&& opcode
== BITS4(1,0,0,0)) {
14311 /* ------- 0x,1000: FNMUL d_d, s_s ------- */
14312 IRType ity
= ty
== X00
? Ity_F32
: Ity_F64
;
14313 IROp iop
= mkMULF(ity
);
14314 IROp iopn
= mkNEGF(ity
);
14315 const HChar
* nm
= "fnmul";
14316 IRExpr
* resE
= unop(iopn
,
14317 triop(iop
, mkexpr(mk_get_IR_rounding_mode()),
14318 getQRegLO(nn
, ity
), getQRegLO(mm
, ity
)));
14319 IRTemp res
= newTemp(ity
);
14321 putQReg128(dd
, mkV128(0));
14322 putQRegLO(dd
, mkexpr(res
));
14323 DIP("%s %s, %s, %s\n",
14324 nm
, nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
));
14334 Bool
dis_AdvSIMD_fp_data_proc_3_source(/*MB_OUT*/DisResult
* dres
, UInt insn
)
14336 /* 31 28 23 21 20 15 14 9 4
14337 000 11111 ty o1 m o0 a n d
14338 The first 3 bits are really "M 0 S", but M and S are always zero.
14339 Decode fields: ty,o1,o0
14341 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14342 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,1)) {
14345 UInt ty
= INSN(23,22);
14346 UInt bitO1
= INSN(21,21);
14347 UInt mm
= INSN(20,16);
14348 UInt bitO0
= INSN(15,15);
14349 UInt aa
= INSN(14,10);
14350 UInt nn
= INSN(9,5);
14351 UInt dd
= INSN(4,0);
14355 /* -------- 0x,0,0 FMADD d_d_d_d, s_s_s_s -------- */
14356 /* -------- 0x,0,1 FMSUB d_d_d_d, s_s_s_s -------- */
14357 /* -------- 0x,1,0 FNMADD d_d_d_d, s_s_s_s -------- */
14358 /* -------- 0x,1,1 FNMSUB d_d_d_d, s_s_s_s -------- */
14359 /* -------------------- F{N}M{ADD,SUB} -------------------- */
14360 /* 31 22 20 15 14 9 4 ix
14361 000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa
14362 000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa
14363 000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa
14364 000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa
14365 where Fx=Dx when sz=1, Fx=Sx when sz=0
14367 -----SPEC------ ----IMPL----
14368 fmadd a + n * m a + n * m
14369 fmsub a + (-n) * m a - n * m
14370 fnmadd (-a) + (-n) * m -(a + n * m)
14371 fnmsub (-a) + n * m -(a - n * m)
14373 Bool isD
= (ty
& 1) == 1;
14374 UInt ix
= (bitO1
<< 1) | bitO0
;
14375 IRType ity
= isD
? Ity_F64
: Ity_F32
;
14376 IROp opADD
= mkADDF(ity
);
14377 IROp opSUB
= mkSUBF(ity
);
14378 IROp opMUL
= mkMULF(ity
);
14379 IROp opNEG
= mkNEGF(ity
);
14380 IRTemp res
= newTemp(ity
);
14381 IRExpr
* eA
= getQRegLO(aa
, ity
);
14382 IRExpr
* eN
= getQRegLO(nn
, ity
);
14383 IRExpr
* eM
= getQRegLO(mm
, ity
);
14384 IRExpr
* rm
= mkexpr(mk_get_IR_rounding_mode());
14385 IRExpr
* eNxM
= triop(opMUL
, rm
, eN
, eM
);
14387 case 0: assign(res
, triop(opADD
, rm
, eA
, eNxM
)); break;
14388 case 1: assign(res
, triop(opSUB
, rm
, eA
, eNxM
)); break;
14389 case 2: assign(res
, unop(opNEG
, triop(opADD
, rm
, eA
, eNxM
))); break;
14390 case 3: assign(res
, unop(opNEG
, triop(opSUB
, rm
, eA
, eNxM
))); break;
14391 default: vassert(0);
14393 putQReg128(dd
, mkV128(0x0000));
14394 putQRegLO(dd
, mkexpr(res
));
14395 const HChar
* names
[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
14396 DIP("%s %s, %s, %s, %s\n",
14397 names
[ix
], nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
),
14398 nameQRegLO(mm
, ity
), nameQRegLO(aa
, ity
));
14408 Bool
dis_AdvSIMD_fp_immediate(/*MB_OUT*/DisResult
* dres
, UInt insn
)
14410 /* 31 28 23 21 20 12 9 4
14411 000 11110 ty 1 imm8 100 imm5 d
14412 The first 3 bits are really "M 0 S", but M and S are always zero.
14414 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14415 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
14416 || INSN(21,21) != 1 || INSN(12,10) != BITS3(1,0,0)) {
14419 UInt ty
= INSN(23,22);
14420 UInt imm8
= INSN(20,13);
14421 UInt imm5
= INSN(9,5);
14422 UInt dd
= INSN(4,0);
14424 /* ------- 00,00000: FMOV s_imm ------- */
14425 /* ------- 01,00000: FMOV d_imm ------- */
14426 if (ty
<= X01
&& imm5
== BITS5(0,0,0,0,0)) {
14427 Bool isD
= (ty
& 1) == 1;
14428 ULong imm
= VFPExpandImm(imm8
, isD
? 64 : 32);
14430 vassert(0 == (imm
& 0xFFFFFFFF00000000ULL
));
14432 putQReg128(dd
, mkV128(0));
14433 putQRegLO(dd
, isD
? mkU64(imm
) : mkU32(imm
& 0xFFFFFFFFULL
));
14434 DIP("fmov %s, #0x%llx\n",
14435 nameQRegLO(dd
, isD
? Ity_F64
: Ity_F32
), imm
);
14445 Bool
dis_AdvSIMD_fp_to_from_fixedp_conv(/*MB_OUT*/DisResult
* dres
, UInt insn
)
14447 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14448 /* 31 30 29 28 23 21 20 18 15 9 4
14449 sf 0 0 11110 type 0 rmode opcode scale n d
14450 The first 3 bits are really "sf 0 S", but S is always zero.
14451 Decode fields: sf,type,rmode,opcode
14453 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14454 if (INSN(30,29) != BITS2(0,0)
14455 || INSN(28,24) != BITS5(1,1,1,1,0)
14456 || INSN(21,21) != 0) {
14459 UInt bitSF
= INSN(31,31);
14460 UInt ty
= INSN(23,22); // type
14461 UInt rm
= INSN(20,19); // rmode
14462 UInt op
= INSN(18,16); // opcode
14463 UInt sc
= INSN(15,10); // scale
14464 UInt nn
= INSN(9,5);
14465 UInt dd
= INSN(4,0);
14467 if (ty
<= X01
&& rm
== X11
14468 && (op
== BITS3(0,0,0) || op
== BITS3(0,0,1))) {
14469 /* -------- (ix) sf ty rm opc -------- */
14470 /* -------- 0 0 00 11 000: FCVTZS w_s_#fbits -------- */
14471 /* -------- 1 0 01 11 000: FCVTZS w_d_#fbits -------- */
14472 /* -------- 2 1 00 11 000: FCVTZS x_s_#fbits -------- */
14473 /* -------- 3 1 01 11 000: FCVTZS x_d_#fbits -------- */
14475 /* -------- 4 0 00 11 001: FCVTZU w_s_#fbits -------- */
14476 /* -------- 5 0 01 11 001: FCVTZU w_d_#fbits -------- */
14477 /* -------- 6 1 00 11 001: FCVTZU x_s_#fbits -------- */
14478 /* -------- 7 1 01 11 001: FCVTZU x_d_#fbits -------- */
14479 Bool isI64
= bitSF
== 1;
14480 Bool isF64
= (ty
& 1) == 1;
14481 Bool isU
= (op
& 1) == 1;
14482 UInt ix
= (isU
? 4 : 0) | (isI64
? 2 : 0) | (isF64
? 1 : 0);
14484 Int fbits
= 64 - sc
;
14485 vassert(fbits
>= 1 && fbits
<= (isI64
? 64 : 32));
14487 Double scale
= two_to_the_plus(fbits
);
14488 IRExpr
* scaleE
= isF64
? IRExpr_Const(IRConst_F64(scale
))
14489 : IRExpr_Const(IRConst_F32( (Float
)scale
));
14490 IROp opMUL
= isF64
? Iop_MulF64
: Iop_MulF32
;
14493 = { Iop_F32toI32S
, Iop_F64toI32S
, Iop_F32toI64S
, Iop_F64toI64S
,
14494 Iop_F32toI32U
, Iop_F64toI32U
, Iop_F32toI64U
, Iop_F64toI64U
};
14495 IRTemp irrm
= newTemp(Ity_I32
);
14496 assign(irrm
, mkU32(Irrm_ZERO
));
14498 IRExpr
* src
= getQRegLO(nn
, isF64
? Ity_F64
: Ity_F32
);
14499 IRExpr
* res
= binop(ops
[ix
], mkexpr(irrm
),
14500 triop(opMUL
, mkexpr(irrm
), src
, scaleE
));
14501 putIRegOrZR(isI64
, dd
, res
);
14503 DIP("fcvtz%c %s, %s, #%d\n",
14504 isU
? 'u' : 's', nameIRegOrZR(isI64
, dd
),
14505 nameQRegLO(nn
, isF64
? Ity_F64
: Ity_F32
), fbits
);
14509 /* ------ sf,ty,rm,opc ------ */
14510 /* ------ x,0x,00,010 SCVTF s/d, w/x, #fbits ------ */
14511 /* ------ x,0x,00,011 UCVTF s/d, w/x, #fbits ------ */
14512 /* (ix) sf S 28 ty rm opc 15 9 4
14513 0 0 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Wn, #fbits
14514 1 0 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Wn, #fbits
14515 2 1 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Xn, #fbits
14516 3 1 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Xn, #fbits
14518 4 0 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Wn, #fbits
14519 5 0 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Wn, #fbits
14520 6 1 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Xn, #fbits
14521 7 1 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Xn, #fbits
14523 These are signed/unsigned conversion from integer registers to
14524 FP registers, all 4 32/64-bit combinations, rounded per FPCR,
14525 scaled per |scale|.
14527 if (ty
<= X01
&& rm
== X00
14528 && (op
== BITS3(0,1,0) || op
== BITS3(0,1,1))
14529 && (bitSF
== 1 || ((sc
>> 5) & 1) == 1)) {
14530 Bool isI64
= bitSF
== 1;
14531 Bool isF64
= (ty
& 1) == 1;
14532 Bool isU
= (op
& 1) == 1;
14533 UInt ix
= (isU
? 4 : 0) | (isI64
? 2 : 0) | (isF64
? 1 : 0);
14535 Int fbits
= 64 - sc
;
14536 vassert(fbits
>= 1 && fbits
<= (isI64
? 64 : 32));
14538 Double scale
= two_to_the_minus(fbits
);
14539 IRExpr
* scaleE
= isF64
? IRExpr_Const(IRConst_F64(scale
))
14540 : IRExpr_Const(IRConst_F32( (Float
)scale
));
14541 IROp opMUL
= isF64
? Iop_MulF64
: Iop_MulF32
;
14544 = { Iop_I32StoF32
, Iop_I32StoF64
, Iop_I64StoF32
, Iop_I64StoF64
,
14545 Iop_I32UtoF32
, Iop_I32UtoF64
, Iop_I64UtoF32
, Iop_I64UtoF64
};
14546 IRExpr
* src
= getIRegOrZR(isI64
, nn
);
14547 IRExpr
* res
= (isF64
&& !isI64
)
14548 ? unop(ops
[ix
], src
)
14550 mkexpr(mk_get_IR_rounding_mode()), src
);
14551 putQReg128(dd
, mkV128(0));
14552 putQRegLO(dd
, triop(opMUL
, mkU32(Irrm_NEAREST
), res
, scaleE
));
14554 DIP("%ccvtf %s, %s, #%d\n",
14555 isU
? 'u' : 's', nameQRegLO(dd
, isF64
? Ity_F64
: Ity_F32
),
14556 nameIRegOrZR(isI64
, nn
), fbits
);
14566 Bool
dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult
* dres
, UInt insn
)
14568 /* 31 30 29 28 23 21 20 18 15 9 4
14569 sf 0 0 11110 type 1 rmode opcode 000000 n d
14570 The first 3 bits are really "sf 0 S", but S is always zero.
14571 Decode fields: sf,type,rmode,opcode
14573 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14574 if (INSN(30,29) != BITS2(0,0)
14575 || INSN(28,24) != BITS5(1,1,1,1,0)
14576 || INSN(21,21) != 1
14577 || INSN(15,10) != BITS6(0,0,0,0,0,0)) {
14580 UInt bitSF
= INSN(31,31);
14581 UInt ty
= INSN(23,22); // type
14582 UInt rm
= INSN(20,19); // rmode
14583 UInt op
= INSN(18,16); // opcode
14584 UInt nn
= INSN(9,5);
14585 UInt dd
= INSN(4,0);
14588 /* -------- FCVT{N,P,M,Z,A}{S,U} (scalar, integer) -------- */
14589 /* 30 23 20 18 15 9 4
14590 sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to
14591 sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest)
14592 ---------------- 01 -------------- FCVTP-------- (round to +inf)
14593 ---------------- 10 -------------- FCVTM-------- (round to -inf)
14594 ---------------- 11 -------------- FCVTZ-------- (round to zero)
14595 ---------------- 00 100 ---------- FCVTAS------- (nearest, ties away)
14596 ---------------- 00 101 ---------- FCVTAU------- (nearest, ties away)
14598 Rd is Xd when sf==1, Wd when sf==0
14599 Fn is Dn when x==1, Sn when x==0
14600 20:19 carry the rounding mode, using the same encoding as FPCR
14603 && ( ((op
== BITS3(0,0,0) || op
== BITS3(0,0,1)) && True
)
14604 || ((op
== BITS3(1,0,0) || op
== BITS3(1,0,1)) && rm
== BITS2(0,0))
14607 Bool isI64
= bitSF
== 1;
14608 Bool isF64
= (ty
& 1) == 1;
14609 Bool isU
= (op
& 1) == 1;
14610 /* Decide on the IR rounding mode to use. */
14611 IRRoundingMode irrm
= 8; /*impossible*/
14613 if (op
== BITS3(0,0,0) || op
== BITS3(0,0,1)) {
14615 case BITS2(0,0): ch
= 'n'; irrm
= Irrm_NEAREST
; break;
14616 case BITS2(0,1): ch
= 'p'; irrm
= Irrm_PosINF
; break;
14617 case BITS2(1,0): ch
= 'm'; irrm
= Irrm_NegINF
; break;
14618 case BITS2(1,1): ch
= 'z'; irrm
= Irrm_ZERO
; break;
14619 default: vassert(0);
14622 vassert(op
== BITS3(1,0,0) || op
== BITS3(1,0,1));
14624 case BITS2(0,0): ch
= 'a'; irrm
= Irrm_NEAREST
; break;
14625 default: vassert(0);
14628 vassert(irrm
!= 8);
14629 /* Decide on the conversion primop, based on the source size,
14630 dest size and signedness (8 possibilities). Case coding:
14640 UInt ix
= (isF64
? 4 : 0) | (isI64
? 2 : 0) | (isU
? 1 : 0);
14643 = { Iop_F32toI32S
, Iop_F32toI32U
, Iop_F32toI64S
, Iop_F32toI64U
,
14644 Iop_F64toI32S
, Iop_F64toI32U
, Iop_F64toI64S
, Iop_F64toI64U
};
14645 IROp iop
= iops
[ix
];
14646 // A bit of ATCery: bounce all cases we haven't seen an example of.
14647 if (/* F32toI32S */
14648 (iop
== Iop_F32toI32S
&& irrm
== Irrm_ZERO
) /* FCVTZS Wd,Sn */
14649 || (iop
== Iop_F32toI32S
&& irrm
== Irrm_NegINF
) /* FCVTMS Wd,Sn */
14650 || (iop
== Iop_F32toI32S
&& irrm
== Irrm_PosINF
) /* FCVTPS Wd,Sn */
14651 || (iop
== Iop_F32toI32S
&& irrm
== Irrm_NEAREST
)/* FCVT{A,N}S W,S */
14653 || (iop
== Iop_F32toI32U
&& irrm
== Irrm_ZERO
) /* FCVTZU Wd,Sn */
14654 || (iop
== Iop_F32toI32U
&& irrm
== Irrm_NegINF
) /* FCVTMU Wd,Sn */
14655 || (iop
== Iop_F32toI32U
&& irrm
== Irrm_PosINF
) /* FCVTPU Wd,Sn */
14656 || (iop
== Iop_F32toI32U
&& irrm
== Irrm_NEAREST
)/* FCVT{A,N}U W,S */
14658 || (iop
== Iop_F32toI64S
&& irrm
== Irrm_ZERO
) /* FCVTZS Xd,Sn */
14659 || (iop
== Iop_F32toI64S
&& irrm
== Irrm_NegINF
) /* FCVTMS Xd,Sn */
14660 || (iop
== Iop_F32toI64S
&& irrm
== Irrm_PosINF
) /* FCVTPS Xd,Sn */
14661 || (iop
== Iop_F32toI64S
&& irrm
== Irrm_NEAREST
)/* FCVT{A,N}S X,S */
14663 || (iop
== Iop_F32toI64U
&& irrm
== Irrm_ZERO
) /* FCVTZU Xd,Sn */
14664 || (iop
== Iop_F32toI64U
&& irrm
== Irrm_NegINF
) /* FCVTMU Xd,Sn */
14665 || (iop
== Iop_F32toI64U
&& irrm
== Irrm_PosINF
) /* FCVTPU Xd,Sn */
14666 || (iop
== Iop_F32toI64U
&& irrm
== Irrm_NEAREST
)/* FCVT{A,N}U X,S */
14668 || (iop
== Iop_F64toI32S
&& irrm
== Irrm_ZERO
) /* FCVTZS Wd,Dn */
14669 || (iop
== Iop_F64toI32S
&& irrm
== Irrm_NegINF
) /* FCVTMS Wd,Dn */
14670 || (iop
== Iop_F64toI32S
&& irrm
== Irrm_PosINF
) /* FCVTPS Wd,Dn */
14671 || (iop
== Iop_F64toI32S
&& irrm
== Irrm_NEAREST
)/* FCVT{A,N}S W,D */
14673 || (iop
== Iop_F64toI32U
&& irrm
== Irrm_ZERO
) /* FCVTZU Wd,Dn */
14674 || (iop
== Iop_F64toI32U
&& irrm
== Irrm_NegINF
) /* FCVTMU Wd,Dn */
14675 || (iop
== Iop_F64toI32U
&& irrm
== Irrm_PosINF
) /* FCVTPU Wd,Dn */
14676 || (iop
== Iop_F64toI32U
&& irrm
== Irrm_NEAREST
)/* FCVT{A,N}U W,D */
14678 || (iop
== Iop_F64toI64S
&& irrm
== Irrm_ZERO
) /* FCVTZS Xd,Dn */
14679 || (iop
== Iop_F64toI64S
&& irrm
== Irrm_NegINF
) /* FCVTMS Xd,Dn */
14680 || (iop
== Iop_F64toI64S
&& irrm
== Irrm_PosINF
) /* FCVTPS Xd,Dn */
14681 || (iop
== Iop_F64toI64S
&& irrm
== Irrm_NEAREST
)/* FCVT{A,N}S X,D */
14683 || (iop
== Iop_F64toI64U
&& irrm
== Irrm_ZERO
) /* FCVTZU Xd,Dn */
14684 || (iop
== Iop_F64toI64U
&& irrm
== Irrm_NegINF
) /* FCVTMU Xd,Dn */
14685 || (iop
== Iop_F64toI64U
&& irrm
== Irrm_PosINF
) /* FCVTPU Xd,Dn */
14686 || (iop
== Iop_F64toI64U
&& irrm
== Irrm_NEAREST
)/* FCVT{A,N}U X,D */
14692 IRType srcTy
= isF64
? Ity_F64
: Ity_F32
;
14693 IRType dstTy
= isI64
? Ity_I64
: Ity_I32
;
14694 IRTemp src
= newTemp(srcTy
);
14695 IRTemp dst
= newTemp(dstTy
);
14696 assign(src
, getQRegLO(nn
, srcTy
));
14697 assign(dst
, binop(iop
, mkU32(irrm
), mkexpr(src
)));
14698 putIRegOrZR(isI64
, dd
, mkexpr(dst
));
14699 DIP("fcvt%c%c %s, %s\n", ch
, isU
? 'u' : 's',
14700 nameIRegOrZR(isI64
, dd
), nameQRegLO(nn
, srcTy
));
14705 /* -------------- {S,U}CVTF (scalar, integer) -------------- */
14706 /* (ix) sf S 28 ty rm op 15 9 4
14707 0 0 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
14708 1 0 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
14709 2 1 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
14710 3 1 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
14712 4 0 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
14713 5 0 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
14714 6 1 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
14715 7 1 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
14717 These are signed/unsigned conversion from integer registers to
14718 FP registers, all 4 32/64-bit combinations, rounded per FPCR.
14720 if (ty
<= X01
&& rm
== X00
&& (op
== BITS3(0,1,0) || op
== BITS3(0,1,1))) {
14721 Bool isI64
= bitSF
== 1;
14722 Bool isF64
= (ty
& 1) == 1;
14723 Bool isU
= (op
& 1) == 1;
14724 UInt ix
= (isU
? 4 : 0) | (isI64
? 2 : 0) | (isF64
? 1 : 0);
14726 = { Iop_I32StoF32
, Iop_I32StoF64
, Iop_I64StoF32
, Iop_I64StoF64
,
14727 Iop_I32UtoF32
, Iop_I32UtoF64
, Iop_I64UtoF32
, Iop_I64UtoF64
};
14728 IRExpr
* src
= getIRegOrZR(isI64
, nn
);
14729 IRExpr
* res
= (isF64
&& !isI64
)
14730 ? unop(ops
[ix
], src
)
14732 mkexpr(mk_get_IR_rounding_mode()), src
);
14733 putQReg128(dd
, mkV128(0));
14734 putQRegLO(dd
, res
);
14735 DIP("%ccvtf %s, %s\n",
14736 isU
? 'u' : 's', nameQRegLO(dd
, isF64
? Ity_F64
: Ity_F32
),
14737 nameIRegOrZR(isI64
, nn
));
14742 /* -------- FMOV (general) -------- */
14743 /* case sf S ty rm op 15 9 4
14744 (1) 0 0 0 11110 00 1 00 111 000000 n d FMOV Sd, Wn
14745 (2) 1 0 0 11110 01 1 00 111 000000 n d FMOV Dd, Xn
14746 (3) 1 0 0 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn
14748 (4) 0 0 0 11110 00 1 00 110 000000 n d FMOV Wd, Sn
14749 (5) 1 0 0 11110 01 1 00 110 000000 n d FMOV Xd, Dn
14750 (6) 1 0 0 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1]
14753 UInt ix
= 0; // case
14755 if (ty
== BITS2(0,0) && rm
== BITS2(0,0) && op
== BITS3(1,1,1))
14758 if (ty
== BITS2(0,0) && rm
== BITS2(0,0) && op
== BITS3(1,1,0))
14761 vassert(bitSF
== 1);
14762 if (ty
== BITS2(0,1) && rm
== BITS2(0,0) && op
== BITS3(1,1,1))
14765 if (ty
== BITS2(0,1) && rm
== BITS2(0,0) && op
== BITS3(1,1,0))
14768 if (ty
== BITS2(1,0) && rm
== BITS2(0,1) && op
== BITS3(1,1,1))
14771 if (ty
== BITS2(1,0) && rm
== BITS2(0,1) && op
== BITS3(1,1,0))
14777 putQReg128(dd
, mkV128(0));
14778 putQRegLO(dd
, getIReg32orZR(nn
));
14779 DIP("fmov s%u, w%u\n", dd
, nn
);
14782 putQReg128(dd
, mkV128(0));
14783 putQRegLO(dd
, getIReg64orZR(nn
));
14784 DIP("fmov d%u, x%u\n", dd
, nn
);
14787 putQRegHI64(dd
, getIReg64orZR(nn
));
14788 DIP("fmov v%u.d[1], x%u\n", dd
, nn
);
14791 putIReg32orZR(dd
, getQRegLO(nn
, Ity_I32
));
14792 DIP("fmov w%u, s%u\n", dd
, nn
);
14795 putIReg64orZR(dd
, getQRegLO(nn
, Ity_I64
));
14796 DIP("fmov x%u, d%u\n", dd
, nn
);
14799 putIReg64orZR(dd
, getQRegHI64(nn
));
14800 DIP("fmov x%u, v%u.d[1]\n", dd
, nn
);
14807 /* undecodable; fall through */
14816 Bool
dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult
* dres
, UInt insn
)
14819 ok
= dis_AdvSIMD_EXT(dres
, insn
);
14820 if (UNLIKELY(ok
)) return True
;
14821 ok
= dis_AdvSIMD_TBL_TBX(dres
, insn
);
14822 if (UNLIKELY(ok
)) return True
;
14823 ok
= dis_AdvSIMD_ZIP_UZP_TRN(dres
, insn
);
14824 if (UNLIKELY(ok
)) return True
;
14825 ok
= dis_AdvSIMD_across_lanes(dres
, insn
);
14826 if (UNLIKELY(ok
)) return True
;
14827 ok
= dis_AdvSIMD_copy(dres
, insn
);
14828 if (UNLIKELY(ok
)) return True
;
14829 ok
= dis_AdvSIMD_modified_immediate(dres
, insn
);
14830 if (UNLIKELY(ok
)) return True
;
14831 ok
= dis_AdvSIMD_scalar_copy(dres
, insn
);
14832 if (UNLIKELY(ok
)) return True
;
14833 ok
= dis_AdvSIMD_scalar_pairwise(dres
, insn
);
14834 if (UNLIKELY(ok
)) return True
;
14835 ok
= dis_AdvSIMD_scalar_shift_by_imm(dres
, insn
);
14836 if (UNLIKELY(ok
)) return True
;
14837 ok
= dis_AdvSIMD_scalar_three_different(dres
, insn
);
14838 if (UNLIKELY(ok
)) return True
;
14839 ok
= dis_AdvSIMD_scalar_three_same(dres
, insn
);
14840 if (UNLIKELY(ok
)) return True
;
14841 ok
= dis_AdvSIMD_scalar_three_same_extra(dres
, insn
);
14842 if (UNLIKELY(ok
)) return True
;
14843 ok
= dis_AdvSIMD_scalar_two_reg_misc(dres
, insn
);
14844 if (UNLIKELY(ok
)) return True
;
14845 ok
= dis_AdvSIMD_scalar_x_indexed_element(dres
, insn
);
14846 if (UNLIKELY(ok
)) return True
;
14847 ok
= dis_AdvSIMD_shift_by_immediate(dres
, insn
);
14848 if (UNLIKELY(ok
)) return True
;
14849 ok
= dis_AdvSIMD_three_different(dres
, insn
);
14850 if (UNLIKELY(ok
)) return True
;
14851 ok
= dis_AdvSIMD_three_same(dres
, insn
);
14852 if (UNLIKELY(ok
)) return True
;
14853 ok
= dis_AdvSIMD_three_same_extra(dres
, insn
);
14854 if (UNLIKELY(ok
)) return True
;
14855 ok
= dis_AdvSIMD_two_reg_misc(dres
, insn
);
14856 if (UNLIKELY(ok
)) return True
;
14857 ok
= dis_AdvSIMD_vector_x_indexed_elem(dres
, insn
);
14858 if (UNLIKELY(ok
)) return True
;
14859 ok
= dis_AdvSIMD_crypto_aes(dres
, insn
);
14860 if (UNLIKELY(ok
)) return True
;
14861 ok
= dis_AdvSIMD_crypto_three_reg_sha(dres
, insn
);
14862 if (UNLIKELY(ok
)) return True
;
14863 ok
= dis_AdvSIMD_crypto_two_reg_sha(dres
, insn
);
14864 if (UNLIKELY(ok
)) return True
;
14865 ok
= dis_AdvSIMD_fp_compare(dres
, insn
);
14866 if (UNLIKELY(ok
)) return True
;
14867 ok
= dis_AdvSIMD_fp_conditional_compare(dres
, insn
);
14868 if (UNLIKELY(ok
)) return True
;
14869 ok
= dis_AdvSIMD_fp_conditional_select(dres
, insn
);
14870 if (UNLIKELY(ok
)) return True
;
14871 ok
= dis_AdvSIMD_fp_data_proc_1_source(dres
, insn
);
14872 if (UNLIKELY(ok
)) return True
;
14873 ok
= dis_AdvSIMD_fp_data_proc_2_source(dres
, insn
);
14874 if (UNLIKELY(ok
)) return True
;
14875 ok
= dis_AdvSIMD_fp_data_proc_3_source(dres
, insn
);
14876 if (UNLIKELY(ok
)) return True
;
14877 ok
= dis_AdvSIMD_fp_immediate(dres
, insn
);
14878 if (UNLIKELY(ok
)) return True
;
14879 ok
= dis_AdvSIMD_fp_to_from_fixedp_conv(dres
, insn
);
14880 if (UNLIKELY(ok
)) return True
;
14881 ok
= dis_AdvSIMD_fp_to_from_int_conv(dres
, insn
);
14882 if (UNLIKELY(ok
)) return True
;
14887 /*------------------------------------------------------------*/
14888 /*--- Disassemble a single ARM64 instruction ---*/
14889 /*------------------------------------------------------------*/
14891 /* Disassemble a single ARM64 instruction into IR. The instruction
14892 has is located at |guest_instr| and has guest IP of
14893 |guest_PC_curr_instr|, which will have been set before the call
14894 here. Returns True iff the instruction was decoded, in which case
14895 *dres will be set accordingly, or False, in which case *dres should
14896 be ignored by the caller. */
14899 Bool
disInstr_ARM64_WRK (
14900 /*MB_OUT*/DisResult
* dres
,
14901 const UChar
* guest_instr
,
14902 const VexArchInfo
* archinfo
,
14903 const VexAbiInfo
* abiinfo
,
14907 // A macro to fish bits out of 'insn'.
14908 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14910 //ZZ DisResult dres;
14912 //ZZ //Bool allow_VFP = False;
14913 //ZZ //UInt hwcaps = archinfo->hwcaps;
14914 //ZZ IRTemp condT; /* :: Ity_I32 */
14916 //ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text
14918 //ZZ /* What insn variants are we supporting today? */
14919 //ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
14922 /* Set result defaults. */
14923 dres
->whatNext
= Dis_Continue
;
14925 dres
->jk_StopHere
= Ijk_INVALID
;
14926 dres
->hint
= Dis_HintNone
;
14928 /* At least this is simple on ARM64: insns are all 4 bytes long, and
14929 4-aligned. So just fish the whole thing out of memory right now
14931 UInt insn
= getUIntLittleEndianly( guest_instr
);
14933 if (0) vex_printf("insn: 0x%x\n", insn
);
14935 DIP("\t(arm64) 0x%llx: ", (ULong
)guest_PC_curr_instr
);
14937 vassert(0 == (guest_PC_curr_instr
& 3ULL));
14939 /* ----------------------------------------------------------- */
14941 /* Spot "Special" instructions (see comment at top of file). */
14943 const UChar
* code
= guest_instr
;
14944 /* Spot the 16-byte preamble:
14945 93CC0D8C ror x12, x12, #3
14946 93CC358C ror x12, x12, #13
14947 93CCCD8C ror x12, x12, #51
14948 93CCF58C ror x12, x12, #61
14950 UInt word1
= 0x93CC0D8C;
14951 UInt word2
= 0x93CC358C;
14952 UInt word3
= 0x93CCCD8C;
14953 UInt word4
= 0x93CCF58C;
14954 if (getUIntLittleEndianly(code
+ 0) == word1
&&
14955 getUIntLittleEndianly(code
+ 4) == word2
&&
14956 getUIntLittleEndianly(code
+ 8) == word3
&&
14957 getUIntLittleEndianly(code
+12) == word4
) {
14958 /* Got a "Special" instruction preamble. Which one is it? */
14959 if (getUIntLittleEndianly(code
+16) == 0xAA0A014A
14960 /* orr x10,x10,x10 */) {
14961 /* X3 = client_request ( X4 ) */
14962 DIP("x3 = client_request ( x4 )\n");
14963 putPC(mkU64( guest_PC_curr_instr
+ 20 ));
14964 dres
->jk_StopHere
= Ijk_ClientReq
;
14965 dres
->whatNext
= Dis_StopHere
;
14969 if (getUIntLittleEndianly(code
+16) == 0xAA0B016B
14970 /* orr x11,x11,x11 */) {
14971 /* X3 = guest_NRADDR */
14972 DIP("x3 = guest_NRADDR\n");
14974 putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR
, Ity_I64
));
14978 if (getUIntLittleEndianly(code
+16) == 0xAA0C018C
14979 /* orr x12,x12,x12 */) {
14980 /* branch-and-link-to-noredir X8 */
14981 DIP("branch-and-link-to-noredir x8\n");
14982 putIReg64orZR(30, mkU64(guest_PC_curr_instr
+ 20));
14983 putPC(getIReg64orZR(8));
14984 dres
->jk_StopHere
= Ijk_NoRedir
;
14985 dres
->whatNext
= Dis_StopHere
;
14989 if (getUIntLittleEndianly(code
+16) == 0xAA090129
14990 /* orr x9,x9,x9 */) {
14992 DIP("IR injection\n");
14993 vex_inject_ir(irsb
, Iend_LE
);
14994 // Invalidate the current insn. The reason is that the IRop we're
14995 // injecting here can change. In which case the translation has to
14996 // be redone. For ease of handling, we simply invalidate all the
14998 stmt(IRStmt_Put(OFFB_CMSTART
, mkU64(guest_PC_curr_instr
)));
14999 stmt(IRStmt_Put(OFFB_CMLEN
, mkU64(20)));
15000 putPC(mkU64( guest_PC_curr_instr
+ 20 ));
15001 dres
->whatNext
= Dis_StopHere
;
15002 dres
->jk_StopHere
= Ijk_InvalICache
;
15005 /* We don't know what it is. */
15011 /* ----------------------------------------------------------- */
15013 /* Main ARM64 instruction decoder starts here. */
15017 /* insn[28:25] determines the top-level grouping, so let's start
15020 For all of these dis_ARM64_ functions, we pass *dres with the
15021 normal default results "insn OK, 4 bytes long, keep decoding" so
15022 they don't need to change it. However, decodes of control-flow
15023 insns may cause *dres to change.
15025 switch (INSN(28,25)) {
15026 case BITS4(1,0,0,0): case BITS4(1,0,0,1):
15027 // Data processing - immediate
15028 ok
= dis_ARM64_data_processing_immediate(dres
, insn
, sigill_diag
);
15030 case BITS4(1,0,1,0): case BITS4(1,0,1,1):
15031 // Branch, exception generation and system instructions
15032 ok
= dis_ARM64_branch_etc(dres
, insn
, archinfo
, abiinfo
, sigill_diag
);
15034 case BITS4(0,1,0,0): case BITS4(0,1,1,0):
15035 case BITS4(1,1,0,0): case BITS4(1,1,1,0):
15036 // Loads and stores
15037 ok
= dis_ARM64_load_store(dres
, insn
, abiinfo
, sigill_diag
);
15039 case BITS4(0,1,0,1): case BITS4(1,1,0,1):
15040 // Data processing - register
15041 ok
= dis_ARM64_data_processing_register(dres
, insn
, sigill_diag
);
15043 case BITS4(0,1,1,1): case BITS4(1,1,1,1):
15044 // Data processing - SIMD and floating point
15045 ok
= dis_ARM64_simd_and_fp(dres
, insn
);
15047 case BITS4(0,0,0,0): case BITS4(0,0,0,1):
15048 case BITS4(0,0,1,0): case BITS4(0,0,1,1):
15052 vassert(0); /* Can't happen */
15055 /* If the next-level down decoders failed, make sure |dres| didn't
15058 vassert(dres
->whatNext
== Dis_Continue
);
15059 vassert(dres
->len
== 4);
15060 vassert(dres
->jk_StopHere
== Ijk_INVALID
);
15069 /*------------------------------------------------------------*/
15070 /*--- Top-level fn ---*/
15071 /*------------------------------------------------------------*/
15073 /* Disassemble a single instruction into IR. The instruction
15074 is located in host memory at &guest_code[delta]. */
15076 DisResult
disInstr_ARM64 ( IRSB
* irsb_IN
,
15077 const UChar
* guest_code_IN
,
15080 VexArch guest_arch
,
15081 const VexArchInfo
* archinfo
,
15082 const VexAbiInfo
* abiinfo
,
15083 VexEndness host_endness_IN
,
15084 Bool sigill_diag_IN
)
15087 vex_bzero(&dres
, sizeof(dres
));
15089 /* Set globals (see top of this file) */
15090 vassert(guest_arch
== VexArchARM64
);
15093 host_endness
= host_endness_IN
;
15094 guest_PC_curr_instr
= (Addr64
)guest_IP
;
15096 /* Sanity checks */
15097 /* (x::UInt - 2) <= 15 === x >= 2 && x <= 17 (I hope) */
15098 vassert((archinfo
->arm64_dMinLine_lg2_szB
- 2) <= 15);
15099 vassert((archinfo
->arm64_iMinLine_lg2_szB
- 2) <= 15);
15101 /* Try to decode */
15102 Bool ok
= disInstr_ARM64_WRK( &dres
,
15103 &guest_code_IN
[delta_IN
],
15104 archinfo
, abiinfo
, sigill_diag_IN
);
15106 /* All decode successes end up here. */
15107 vassert(dres
.len
== 4 || dres
.len
== 20);
15108 switch (dres
.whatNext
) {
15110 putPC( mkU64(dres
.len
+ guest_PC_curr_instr
) );
15119 /* All decode failures end up here. */
15120 if (sigill_diag_IN
) {
15124 = getUIntLittleEndianly( &guest_code_IN
[delta_IN
] );
15125 vex_bzero(buf
, sizeof(buf
));
15126 for (i
= j
= 0; i
< 32; i
++) {
15128 if ((i
& 7) == 0) buf
[j
++] = ' ';
15129 else if ((i
& 3) == 0) buf
[j
++] = '\'';
15131 buf
[j
++] = (insn
& (1<<(31-i
))) ? '1' : '0';
15133 vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn
);
15134 vex_printf("disInstr(arm64): %s\n", buf
);
15137 /* Tell the dispatcher that this insn cannot be decoded, and so
15138 has not been executed, and (is currently) the next to be
15139 executed. PC should be up-to-date since it is made so at the
15140 start of each insn, but nevertheless be paranoid and update
15141 it again right now. */
15142 putPC( mkU64(guest_PC_curr_instr
) );
15144 dres
.whatNext
= Dis_StopHere
;
15145 dres
.jk_StopHere
= Ijk_NoDecode
;
15151 /*--------------------------------------------------------------------*/
15152 /*--- end guest_arm64_toIR.c ---*/
15153 /*--------------------------------------------------------------------*/