hw/display/omap_dss: Replace fprintf() call by qemu_log_mask(LOG_UNIMP)
[qemu/ar7.git] / target / arm / translate-a64.c
blob874f3eb4f97572efb4171f143b7c90cceba91e83
1 /*
2 * AArch64 translation
4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de>
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg/tcg-op.h"
24 #include "tcg/tcg-op-gvec.h"
25 #include "qemu/log.h"
26 #include "arm_ldst.h"
27 #include "translate.h"
28 #include "internals.h"
29 #include "qemu/host-utils.h"
31 #include "hw/semihosting/semihost.h"
32 #include "exec/gen-icount.h"
34 #include "exec/helper-proto.h"
35 #include "exec/helper-gen.h"
36 #include "exec/log.h"
38 #include "trace-tcg.h"
39 #include "translate-a64.h"
40 #include "qemu/atomic128.h"
42 static TCGv_i64 cpu_X[32];
43 static TCGv_i64 cpu_pc;
45 /* Load/store exclusive handling */
46 static TCGv_i64 cpu_exclusive_high;
48 static const char *regnames[] = {
49 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
50 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
51 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
52 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
55 enum a64_shift_type {
56 A64_SHIFT_TYPE_LSL = 0,
57 A64_SHIFT_TYPE_LSR = 1,
58 A64_SHIFT_TYPE_ASR = 2,
59 A64_SHIFT_TYPE_ROR = 3
62 /* Table based decoder typedefs - used when the relevant bits for decode
63 * are too awkwardly scattered across the instruction (eg SIMD).
65 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
67 typedef struct AArch64DecodeTable {
68 uint32_t pattern;
69 uint32_t mask;
70 AArch64DecodeFn *disas_fn;
71 } AArch64DecodeTable;
73 /* initialize TCG globals. */
74 void a64_translate_init(void)
76 int i;
78 cpu_pc = tcg_global_mem_new_i64(cpu_env,
79 offsetof(CPUARMState, pc),
80 "pc");
81 for (i = 0; i < 32; i++) {
82 cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
83 offsetof(CPUARMState, xregs[i]),
84 regnames[i]);
87 cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
88 offsetof(CPUARMState, exclusive_high), "exclusive_high");
92 * Return the core mmu_idx to use for A64 "unprivileged load/store" insns
94 static int get_a64_user_mem_index(DisasContext *s)
97 * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
98 * which is the usual mmu_idx for this cpu state.
100 ARMMMUIdx useridx = s->mmu_idx;
102 if (s->unpriv) {
104 * We have pre-computed the condition for AccType_UNPRIV.
105 * Therefore we should never get here with a mmu_idx for
106 * which we do not know the corresponding user mmu_idx.
108 switch (useridx) {
109 case ARMMMUIdx_E10_1:
110 case ARMMMUIdx_E10_1_PAN:
111 useridx = ARMMMUIdx_E10_0;
112 break;
113 case ARMMMUIdx_E20_2:
114 case ARMMMUIdx_E20_2_PAN:
115 useridx = ARMMMUIdx_E20_0;
116 break;
117 case ARMMMUIdx_SE10_1:
118 case ARMMMUIdx_SE10_1_PAN:
119 useridx = ARMMMUIdx_SE10_0;
120 break;
121 default:
122 g_assert_not_reached();
125 return arm_to_core_mmu_idx(useridx);
128 static void reset_btype(DisasContext *s)
130 if (s->btype != 0) {
131 TCGv_i32 zero = tcg_const_i32(0);
132 tcg_gen_st_i32(zero, cpu_env, offsetof(CPUARMState, btype));
133 tcg_temp_free_i32(zero);
134 s->btype = 0;
138 static void set_btype(DisasContext *s, int val)
140 TCGv_i32 tcg_val;
142 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */
143 tcg_debug_assert(val >= 1 && val <= 3);
145 tcg_val = tcg_const_i32(val);
146 tcg_gen_st_i32(tcg_val, cpu_env, offsetof(CPUARMState, btype));
147 tcg_temp_free_i32(tcg_val);
148 s->btype = -1;
151 void gen_a64_set_pc_im(uint64_t val)
153 tcg_gen_movi_i64(cpu_pc, val);
157 * Handle Top Byte Ignore (TBI) bits.
159 * If address tagging is enabled via the TCR TBI bits:
160 * + for EL2 and EL3 there is only one TBI bit, and if it is set
161 * then the address is zero-extended, clearing bits [63:56]
162 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
163 * and TBI1 controls addressses with bit 55 == 1.
164 * If the appropriate TBI bit is set for the address then
165 * the address is sign-extended from bit 55 into bits [63:56]
167 * Here We have concatenated TBI{1,0} into tbi.
169 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
170 TCGv_i64 src, int tbi)
172 if (tbi == 0) {
173 /* Load unmodified address */
174 tcg_gen_mov_i64(dst, src);
175 } else if (!regime_has_2_ranges(s->mmu_idx)) {
176 /* Force tag byte to all zero */
177 tcg_gen_extract_i64(dst, src, 0, 56);
178 } else {
179 /* Sign-extend from bit 55. */
180 tcg_gen_sextract_i64(dst, src, 0, 56);
182 if (tbi != 3) {
183 TCGv_i64 tcg_zero = tcg_const_i64(0);
186 * The two TBI bits differ.
187 * If tbi0, then !tbi1: only use the extension if positive.
188 * if !tbi0, then tbi1: only use the extension if negative.
190 tcg_gen_movcond_i64(tbi == 1 ? TCG_COND_GE : TCG_COND_LT,
191 dst, dst, tcg_zero, dst, src);
192 tcg_temp_free_i64(tcg_zero);
197 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
200 * If address tagging is enabled for instructions via the TCR TBI bits,
201 * then loading an address into the PC will clear out any tag.
203 gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
207 * Return a "clean" address for ADDR according to TBID.
208 * This is always a fresh temporary, as we need to be able to
209 * increment this independently of a dirty write-back address.
211 static TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
213 TCGv_i64 clean = new_tmp_a64(s);
215 * In order to get the correct value in the FAR_ELx register,
216 * we must present the memory subsystem with the "dirty" address
217 * including the TBI. In system mode we can make this work via
218 * the TLB, dropping the TBI during translation. But for user-only
219 * mode we don't have that option, and must remove the top byte now.
221 #ifdef CONFIG_USER_ONLY
222 gen_top_byte_ignore(s, clean, addr, s->tbid);
223 #else
224 tcg_gen_mov_i64(clean, addr);
225 #endif
226 return clean;
229 typedef struct DisasCompare64 {
230 TCGCond cond;
231 TCGv_i64 value;
232 } DisasCompare64;
234 static void a64_test_cc(DisasCompare64 *c64, int cc)
236 DisasCompare c32;
238 arm_test_cc(&c32, cc);
240 /* Sign-extend the 32-bit value so that the GE/LT comparisons work
241 * properly. The NE/EQ comparisons are also fine with this choice. */
242 c64->cond = c32.cond;
243 c64->value = tcg_temp_new_i64();
244 tcg_gen_ext_i32_i64(c64->value, c32.value);
246 arm_free_cc(&c32);
249 static void a64_free_cc(DisasCompare64 *c64)
251 tcg_temp_free_i64(c64->value);
254 static void gen_exception_internal(int excp)
256 TCGv_i32 tcg_excp = tcg_const_i32(excp);
258 assert(excp_is_internal(excp));
259 gen_helper_exception_internal(cpu_env, tcg_excp);
260 tcg_temp_free_i32(tcg_excp);
263 static void gen_exception_internal_insn(DisasContext *s, uint64_t pc, int excp)
265 gen_a64_set_pc_im(pc);
266 gen_exception_internal(excp);
267 s->base.is_jmp = DISAS_NORETURN;
270 static void gen_exception_insn(DisasContext *s, uint64_t pc, int excp,
271 uint32_t syndrome, uint32_t target_el)
273 gen_a64_set_pc_im(pc);
274 gen_exception(excp, syndrome, target_el);
275 s->base.is_jmp = DISAS_NORETURN;
278 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
280 TCGv_i32 tcg_syn;
282 gen_a64_set_pc_im(s->pc_curr);
283 tcg_syn = tcg_const_i32(syndrome);
284 gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
285 tcg_temp_free_i32(tcg_syn);
286 s->base.is_jmp = DISAS_NORETURN;
289 static void gen_step_complete_exception(DisasContext *s)
291 /* We just completed step of an insn. Move from Active-not-pending
292 * to Active-pending, and then also take the swstep exception.
293 * This corresponds to making the (IMPDEF) choice to prioritize
294 * swstep exceptions over asynchronous exceptions taken to an exception
295 * level where debug is disabled. This choice has the advantage that
296 * we do not need to maintain internal state corresponding to the
297 * ISV/EX syndrome bits between completion of the step and generation
298 * of the exception, and our syndrome information is always correct.
300 gen_ss_advance(s);
301 gen_swstep_exception(s, 1, s->is_ldex);
302 s->base.is_jmp = DISAS_NORETURN;
305 static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
307 /* No direct tb linking with singlestep (either QEMU's or the ARM
308 * debug architecture kind) or deterministic io
310 if (s->base.singlestep_enabled || s->ss_active ||
311 (tb_cflags(s->base.tb) & CF_LAST_IO)) {
312 return false;
315 #ifndef CONFIG_USER_ONLY
316 /* Only link tbs from inside the same guest page */
317 if ((s->base.tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
318 return false;
320 #endif
322 return true;
325 static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
327 TranslationBlock *tb;
329 tb = s->base.tb;
330 if (use_goto_tb(s, n, dest)) {
331 tcg_gen_goto_tb(n);
332 gen_a64_set_pc_im(dest);
333 tcg_gen_exit_tb(tb, n);
334 s->base.is_jmp = DISAS_NORETURN;
335 } else {
336 gen_a64_set_pc_im(dest);
337 if (s->ss_active) {
338 gen_step_complete_exception(s);
339 } else if (s->base.singlestep_enabled) {
340 gen_exception_internal(EXCP_DEBUG);
341 } else {
342 tcg_gen_lookup_and_goto_ptr();
343 s->base.is_jmp = DISAS_NORETURN;
348 void unallocated_encoding(DisasContext *s)
350 /* Unallocated and reserved encodings are uncategorized */
351 gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
352 default_exception_el(s));
355 static void init_tmp_a64_array(DisasContext *s)
357 #ifdef CONFIG_DEBUG_TCG
358 memset(s->tmp_a64, 0, sizeof(s->tmp_a64));
359 #endif
360 s->tmp_a64_count = 0;
363 static void free_tmp_a64(DisasContext *s)
365 int i;
366 for (i = 0; i < s->tmp_a64_count; i++) {
367 tcg_temp_free_i64(s->tmp_a64[i]);
369 init_tmp_a64_array(s);
372 TCGv_i64 new_tmp_a64(DisasContext *s)
374 assert(s->tmp_a64_count < TMP_A64_MAX);
375 return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
378 TCGv_i64 new_tmp_a64_zero(DisasContext *s)
380 TCGv_i64 t = new_tmp_a64(s);
381 tcg_gen_movi_i64(t, 0);
382 return t;
386 * Register access functions
388 * These functions are used for directly accessing a register in where
389 * changes to the final register value are likely to be made. If you
390 * need to use a register for temporary calculation (e.g. index type
391 * operations) use the read_* form.
393 * B1.2.1 Register mappings
395 * In instruction register encoding 31 can refer to ZR (zero register) or
396 * the SP (stack pointer) depending on context. In QEMU's case we map SP
397 * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
398 * This is the point of the _sp forms.
400 TCGv_i64 cpu_reg(DisasContext *s, int reg)
402 if (reg == 31) {
403 return new_tmp_a64_zero(s);
404 } else {
405 return cpu_X[reg];
409 /* register access for when 31 == SP */
410 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
412 return cpu_X[reg];
415 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
416 * representing the register contents. This TCGv is an auto-freed
417 * temporary so it need not be explicitly freed, and may be modified.
419 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
421 TCGv_i64 v = new_tmp_a64(s);
422 if (reg != 31) {
423 if (sf) {
424 tcg_gen_mov_i64(v, cpu_X[reg]);
425 } else {
426 tcg_gen_ext32u_i64(v, cpu_X[reg]);
428 } else {
429 tcg_gen_movi_i64(v, 0);
431 return v;
434 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
436 TCGv_i64 v = new_tmp_a64(s);
437 if (sf) {
438 tcg_gen_mov_i64(v, cpu_X[reg]);
439 } else {
440 tcg_gen_ext32u_i64(v, cpu_X[reg]);
442 return v;
445 /* Return the offset into CPUARMState of a slice (from
446 * the least significant end) of FP register Qn (ie
447 * Dn, Sn, Hn or Bn).
448 * (Note that this is not the same mapping as for A32; see cpu.h)
450 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
452 return vec_reg_offset(s, regno, 0, size);
455 /* Offset of the high half of the 128 bit vector Qn */
456 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
458 return vec_reg_offset(s, regno, 1, MO_64);
461 /* Convenience accessors for reading and writing single and double
462 * FP registers. Writing clears the upper parts of the associated
463 * 128 bit vector register, as required by the architecture.
464 * Note that unlike the GP register accessors, the values returned
465 * by the read functions must be manually freed.
467 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
469 TCGv_i64 v = tcg_temp_new_i64();
471 tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
472 return v;
475 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
477 TCGv_i32 v = tcg_temp_new_i32();
479 tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
480 return v;
483 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
485 TCGv_i32 v = tcg_temp_new_i32();
487 tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16));
488 return v;
491 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
492 * If SVE is not enabled, then there are only 128 bits in the vector.
494 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
496 unsigned ofs = fp_reg_offset(s, rd, MO_64);
497 unsigned vsz = vec_full_reg_size(s);
499 /* Nop move, with side effect of clearing the tail. */
500 tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
503 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
505 unsigned ofs = fp_reg_offset(s, reg, MO_64);
507 tcg_gen_st_i64(v, cpu_env, ofs);
508 clear_vec_high(s, false, reg);
511 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
513 TCGv_i64 tmp = tcg_temp_new_i64();
515 tcg_gen_extu_i32_i64(tmp, v);
516 write_fp_dreg(s, reg, tmp);
517 tcg_temp_free_i64(tmp);
520 TCGv_ptr get_fpstatus_ptr(bool is_f16)
522 TCGv_ptr statusptr = tcg_temp_new_ptr();
523 int offset;
525 /* In A64 all instructions (both FP and Neon) use the FPCR; there
526 * is no equivalent of the A32 Neon "standard FPSCR value".
527 * However half-precision operations operate under a different
528 * FZ16 flag and use vfp.fp_status_f16 instead of vfp.fp_status.
530 if (is_f16) {
531 offset = offsetof(CPUARMState, vfp.fp_status_f16);
532 } else {
533 offset = offsetof(CPUARMState, vfp.fp_status);
535 tcg_gen_addi_ptr(statusptr, cpu_env, offset);
536 return statusptr;
539 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */
540 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
541 GVecGen2Fn *gvec_fn, int vece)
543 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
544 is_q ? 16 : 8, vec_full_reg_size(s));
547 /* Expand a 2-operand + immediate AdvSIMD vector operation using
548 * an expander function.
550 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
551 int64_t imm, GVecGen2iFn *gvec_fn, int vece)
553 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
554 imm, is_q ? 16 : 8, vec_full_reg_size(s));
557 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */
558 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
559 GVecGen3Fn *gvec_fn, int vece)
561 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
562 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
565 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */
566 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
567 int rx, GVecGen4Fn *gvec_fn, int vece)
569 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
570 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
571 is_q ? 16 : 8, vec_full_reg_size(s));
574 /* Expand a 3-operand operation using an out-of-line helper. */
575 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
576 int rn, int rm, int data, gen_helper_gvec_3 *fn)
578 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
579 vec_full_reg_offset(s, rn),
580 vec_full_reg_offset(s, rm),
581 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
584 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
585 * an out-of-line helper.
587 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
588 int rm, bool is_fp16, int data,
589 gen_helper_gvec_3_ptr *fn)
591 TCGv_ptr fpst = get_fpstatus_ptr(is_fp16);
592 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
593 vec_full_reg_offset(s, rn),
594 vec_full_reg_offset(s, rm), fpst,
595 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
596 tcg_temp_free_ptr(fpst);
599 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
600 * than the 32 bit equivalent.
602 static inline void gen_set_NZ64(TCGv_i64 result)
604 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
605 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
608 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
609 static inline void gen_logic_CC(int sf, TCGv_i64 result)
611 if (sf) {
612 gen_set_NZ64(result);
613 } else {
614 tcg_gen_extrl_i64_i32(cpu_ZF, result);
615 tcg_gen_mov_i32(cpu_NF, cpu_ZF);
617 tcg_gen_movi_i32(cpu_CF, 0);
618 tcg_gen_movi_i32(cpu_VF, 0);
621 /* dest = T0 + T1; compute C, N, V and Z flags */
622 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
624 if (sf) {
625 TCGv_i64 result, flag, tmp;
626 result = tcg_temp_new_i64();
627 flag = tcg_temp_new_i64();
628 tmp = tcg_temp_new_i64();
630 tcg_gen_movi_i64(tmp, 0);
631 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
633 tcg_gen_extrl_i64_i32(cpu_CF, flag);
635 gen_set_NZ64(result);
637 tcg_gen_xor_i64(flag, result, t0);
638 tcg_gen_xor_i64(tmp, t0, t1);
639 tcg_gen_andc_i64(flag, flag, tmp);
640 tcg_temp_free_i64(tmp);
641 tcg_gen_extrh_i64_i32(cpu_VF, flag);
643 tcg_gen_mov_i64(dest, result);
644 tcg_temp_free_i64(result);
645 tcg_temp_free_i64(flag);
646 } else {
647 /* 32 bit arithmetic */
648 TCGv_i32 t0_32 = tcg_temp_new_i32();
649 TCGv_i32 t1_32 = tcg_temp_new_i32();
650 TCGv_i32 tmp = tcg_temp_new_i32();
652 tcg_gen_movi_i32(tmp, 0);
653 tcg_gen_extrl_i64_i32(t0_32, t0);
654 tcg_gen_extrl_i64_i32(t1_32, t1);
655 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
656 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
657 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
658 tcg_gen_xor_i32(tmp, t0_32, t1_32);
659 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
660 tcg_gen_extu_i32_i64(dest, cpu_NF);
662 tcg_temp_free_i32(tmp);
663 tcg_temp_free_i32(t0_32);
664 tcg_temp_free_i32(t1_32);
668 /* dest = T0 - T1; compute C, N, V and Z flags */
669 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
671 if (sf) {
672 /* 64 bit arithmetic */
673 TCGv_i64 result, flag, tmp;
675 result = tcg_temp_new_i64();
676 flag = tcg_temp_new_i64();
677 tcg_gen_sub_i64(result, t0, t1);
679 gen_set_NZ64(result);
681 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
682 tcg_gen_extrl_i64_i32(cpu_CF, flag);
684 tcg_gen_xor_i64(flag, result, t0);
685 tmp = tcg_temp_new_i64();
686 tcg_gen_xor_i64(tmp, t0, t1);
687 tcg_gen_and_i64(flag, flag, tmp);
688 tcg_temp_free_i64(tmp);
689 tcg_gen_extrh_i64_i32(cpu_VF, flag);
690 tcg_gen_mov_i64(dest, result);
691 tcg_temp_free_i64(flag);
692 tcg_temp_free_i64(result);
693 } else {
694 /* 32 bit arithmetic */
695 TCGv_i32 t0_32 = tcg_temp_new_i32();
696 TCGv_i32 t1_32 = tcg_temp_new_i32();
697 TCGv_i32 tmp;
699 tcg_gen_extrl_i64_i32(t0_32, t0);
700 tcg_gen_extrl_i64_i32(t1_32, t1);
701 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
702 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
703 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
704 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
705 tmp = tcg_temp_new_i32();
706 tcg_gen_xor_i32(tmp, t0_32, t1_32);
707 tcg_temp_free_i32(t0_32);
708 tcg_temp_free_i32(t1_32);
709 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
710 tcg_temp_free_i32(tmp);
711 tcg_gen_extu_i32_i64(dest, cpu_NF);
715 /* dest = T0 + T1 + CF; do not compute flags. */
716 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
718 TCGv_i64 flag = tcg_temp_new_i64();
719 tcg_gen_extu_i32_i64(flag, cpu_CF);
720 tcg_gen_add_i64(dest, t0, t1);
721 tcg_gen_add_i64(dest, dest, flag);
722 tcg_temp_free_i64(flag);
724 if (!sf) {
725 tcg_gen_ext32u_i64(dest, dest);
729 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
730 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
732 if (sf) {
733 TCGv_i64 result, cf_64, vf_64, tmp;
734 result = tcg_temp_new_i64();
735 cf_64 = tcg_temp_new_i64();
736 vf_64 = tcg_temp_new_i64();
737 tmp = tcg_const_i64(0);
739 tcg_gen_extu_i32_i64(cf_64, cpu_CF);
740 tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
741 tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
742 tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
743 gen_set_NZ64(result);
745 tcg_gen_xor_i64(vf_64, result, t0);
746 tcg_gen_xor_i64(tmp, t0, t1);
747 tcg_gen_andc_i64(vf_64, vf_64, tmp);
748 tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
750 tcg_gen_mov_i64(dest, result);
752 tcg_temp_free_i64(tmp);
753 tcg_temp_free_i64(vf_64);
754 tcg_temp_free_i64(cf_64);
755 tcg_temp_free_i64(result);
756 } else {
757 TCGv_i32 t0_32, t1_32, tmp;
758 t0_32 = tcg_temp_new_i32();
759 t1_32 = tcg_temp_new_i32();
760 tmp = tcg_const_i32(0);
762 tcg_gen_extrl_i64_i32(t0_32, t0);
763 tcg_gen_extrl_i64_i32(t1_32, t1);
764 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
765 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
767 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
768 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
769 tcg_gen_xor_i32(tmp, t0_32, t1_32);
770 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
771 tcg_gen_extu_i32_i64(dest, cpu_NF);
773 tcg_temp_free_i32(tmp);
774 tcg_temp_free_i32(t1_32);
775 tcg_temp_free_i32(t0_32);
780 * Load/Store generators
784 * Store from GPR register to memory.
786 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
787 TCGv_i64 tcg_addr, int size, int memidx,
788 bool iss_valid,
789 unsigned int iss_srt,
790 bool iss_sf, bool iss_ar)
792 g_assert(size <= 3);
793 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size);
795 if (iss_valid) {
796 uint32_t syn;
798 syn = syn_data_abort_with_iss(0,
799 size,
800 false,
801 iss_srt,
802 iss_sf,
803 iss_ar,
804 0, 0, 0, 0, 0, false);
805 disas_set_insn_syndrome(s, syn);
809 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
810 TCGv_i64 tcg_addr, int size,
811 bool iss_valid,
812 unsigned int iss_srt,
813 bool iss_sf, bool iss_ar)
815 do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s),
816 iss_valid, iss_srt, iss_sf, iss_ar);
820 * Load from memory to GPR register
822 static void do_gpr_ld_memidx(DisasContext *s,
823 TCGv_i64 dest, TCGv_i64 tcg_addr,
824 int size, bool is_signed,
825 bool extend, int memidx,
826 bool iss_valid, unsigned int iss_srt,
827 bool iss_sf, bool iss_ar)
829 MemOp memop = s->be_data + size;
831 g_assert(size <= 3);
833 if (is_signed) {
834 memop += MO_SIGN;
837 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
839 if (extend && is_signed) {
840 g_assert(size < 3);
841 tcg_gen_ext32u_i64(dest, dest);
844 if (iss_valid) {
845 uint32_t syn;
847 syn = syn_data_abort_with_iss(0,
848 size,
849 is_signed,
850 iss_srt,
851 iss_sf,
852 iss_ar,
853 0, 0, 0, 0, 0, false);
854 disas_set_insn_syndrome(s, syn);
858 static void do_gpr_ld(DisasContext *s,
859 TCGv_i64 dest, TCGv_i64 tcg_addr,
860 int size, bool is_signed, bool extend,
861 bool iss_valid, unsigned int iss_srt,
862 bool iss_sf, bool iss_ar)
864 do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
865 get_mem_index(s),
866 iss_valid, iss_srt, iss_sf, iss_ar);
870 * Store from FP register to memory
872 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
874 /* This writes the bottom N bits of a 128 bit wide vector to memory */
875 TCGv_i64 tmp = tcg_temp_new_i64();
876 tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
877 if (size < 4) {
878 tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s),
879 s->be_data + size);
880 } else {
881 bool be = s->be_data == MO_BE;
882 TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
884 tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
885 tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
886 s->be_data | MO_Q);
887 tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
888 tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
889 s->be_data | MO_Q);
890 tcg_temp_free_i64(tcg_hiaddr);
893 tcg_temp_free_i64(tmp);
897 * Load from memory to FP register
899 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
901 /* This always zero-extends and writes to a full 128 bit wide vector */
902 TCGv_i64 tmplo = tcg_temp_new_i64();
903 TCGv_i64 tmphi = NULL;
905 if (size < 4) {
906 MemOp memop = s->be_data + size;
907 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
908 } else {
909 bool be = s->be_data == MO_BE;
910 TCGv_i64 tcg_hiaddr;
912 tmphi = tcg_temp_new_i64();
913 tcg_hiaddr = tcg_temp_new_i64();
915 tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
916 tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
917 s->be_data | MO_Q);
918 tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
919 s->be_data | MO_Q);
920 tcg_temp_free_i64(tcg_hiaddr);
923 tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
924 tcg_temp_free_i64(tmplo);
926 if (tmphi) {
927 tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
928 tcg_temp_free_i64(tmphi);
930 clear_vec_high(s, tmphi != NULL, destidx);
934 * Vector load/store helpers.
936 * The principal difference between this and a FP load is that we don't
937 * zero extend as we are filling a partial chunk of the vector register.
938 * These functions don't support 128 bit loads/stores, which would be
939 * normal load/store operations.
941 * The _i32 versions are useful when operating on 32 bit quantities
942 * (eg for floating point single or using Neon helper functions).
945 /* Get value of an element within a vector register */
946 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
947 int element, MemOp memop)
949 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
950 switch (memop) {
951 case MO_8:
952 tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
953 break;
954 case MO_16:
955 tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
956 break;
957 case MO_32:
958 tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
959 break;
960 case MO_8|MO_SIGN:
961 tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
962 break;
963 case MO_16|MO_SIGN:
964 tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
965 break;
966 case MO_32|MO_SIGN:
967 tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
968 break;
969 case MO_64:
970 case MO_64|MO_SIGN:
971 tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
972 break;
973 default:
974 g_assert_not_reached();
978 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
979 int element, MemOp memop)
981 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
982 switch (memop) {
983 case MO_8:
984 tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
985 break;
986 case MO_16:
987 tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
988 break;
989 case MO_8|MO_SIGN:
990 tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
991 break;
992 case MO_16|MO_SIGN:
993 tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
994 break;
995 case MO_32:
996 case MO_32|MO_SIGN:
997 tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
998 break;
999 default:
1000 g_assert_not_reached();
1004 /* Set value of an element within a vector register */
1005 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1006 int element, MemOp memop)
1008 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1009 switch (memop) {
1010 case MO_8:
1011 tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1012 break;
1013 case MO_16:
1014 tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1015 break;
1016 case MO_32:
1017 tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1018 break;
1019 case MO_64:
1020 tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1021 break;
1022 default:
1023 g_assert_not_reached();
1027 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1028 int destidx, int element, MemOp memop)
1030 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1031 switch (memop) {
1032 case MO_8:
1033 tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1034 break;
1035 case MO_16:
1036 tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1037 break;
1038 case MO_32:
1039 tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1040 break;
1041 default:
1042 g_assert_not_reached();
1046 /* Store from vector register to memory */
1047 static void do_vec_st(DisasContext *s, int srcidx, int element,
1048 TCGv_i64 tcg_addr, int size, MemOp endian)
1050 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1052 read_vec_element(s, tcg_tmp, srcidx, element, size);
1053 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size);
1055 tcg_temp_free_i64(tcg_tmp);
1058 /* Load from memory to vector register */
1059 static void do_vec_ld(DisasContext *s, int destidx, int element,
1060 TCGv_i64 tcg_addr, int size, MemOp endian)
1062 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1064 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size);
1065 write_vec_element(s, tcg_tmp, destidx, element, size);
1067 tcg_temp_free_i64(tcg_tmp);
1070 /* Check that FP/Neon access is enabled. If it is, return
1071 * true. If not, emit code to generate an appropriate exception,
1072 * and return false; the caller should not emit any code for
1073 * the instruction. Note that this check must happen after all
1074 * unallocated-encoding checks (otherwise the syndrome information
1075 * for the resulting exception will be incorrect).
1077 static inline bool fp_access_check(DisasContext *s)
1079 assert(!s->fp_access_checked);
1080 s->fp_access_checked = true;
1082 if (!s->fp_excp_el) {
1083 return true;
1086 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
1087 syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
1088 return false;
1091 /* Check that SVE access is enabled. If it is, return true.
1092 * If not, emit code to generate an appropriate exception and return false.
1094 bool sve_access_check(DisasContext *s)
1096 if (s->sve_excp_el) {
1097 gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_sve_access_trap(),
1098 s->sve_excp_el);
1099 return false;
1101 return fp_access_check(s);
1105 * This utility function is for doing register extension with an
1106 * optional shift. You will likely want to pass a temporary for the
1107 * destination register. See DecodeRegExtend() in the ARM ARM.
1109 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1110 int option, unsigned int shift)
1112 int extsize = extract32(option, 0, 2);
1113 bool is_signed = extract32(option, 2, 1);
1115 if (is_signed) {
1116 switch (extsize) {
1117 case 0:
1118 tcg_gen_ext8s_i64(tcg_out, tcg_in);
1119 break;
1120 case 1:
1121 tcg_gen_ext16s_i64(tcg_out, tcg_in);
1122 break;
1123 case 2:
1124 tcg_gen_ext32s_i64(tcg_out, tcg_in);
1125 break;
1126 case 3:
1127 tcg_gen_mov_i64(tcg_out, tcg_in);
1128 break;
1130 } else {
1131 switch (extsize) {
1132 case 0:
1133 tcg_gen_ext8u_i64(tcg_out, tcg_in);
1134 break;
1135 case 1:
1136 tcg_gen_ext16u_i64(tcg_out, tcg_in);
1137 break;
1138 case 2:
1139 tcg_gen_ext32u_i64(tcg_out, tcg_in);
1140 break;
1141 case 3:
1142 tcg_gen_mov_i64(tcg_out, tcg_in);
1143 break;
1147 if (shift) {
1148 tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1152 static inline void gen_check_sp_alignment(DisasContext *s)
1154 /* The AArch64 architecture mandates that (if enabled via PSTATE
1155 * or SCTLR bits) there is a check that SP is 16-aligned on every
1156 * SP-relative load or store (with an exception generated if it is not).
1157 * In line with general QEMU practice regarding misaligned accesses,
1158 * we omit these checks for the sake of guest program performance.
1159 * This function is provided as a hook so we can more easily add these
1160 * checks in future (possibly as a "favour catching guest program bugs
1161 * over speed" user selectable option).
1166 * This provides a simple table based table lookup decoder. It is
1167 * intended to be used when the relevant bits for decode are too
1168 * awkwardly placed and switch/if based logic would be confusing and
1169 * deeply nested. Since it's a linear search through the table, tables
1170 * should be kept small.
1172 * It returns the first handler where insn & mask == pattern, or
1173 * NULL if there is no match.
1174 * The table is terminated by an empty mask (i.e. 0)
1176 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1177 uint32_t insn)
1179 const AArch64DecodeTable *tptr = table;
1181 while (tptr->mask) {
1182 if ((insn & tptr->mask) == tptr->pattern) {
1183 return tptr->disas_fn;
1185 tptr++;
1187 return NULL;
1191 * The instruction disassembly implemented here matches
1192 * the instruction encoding classifications in chapter C4
1193 * of the ARM Architecture Reference Manual (DDI0487B_a);
1194 * classification names and decode diagrams here should generally
1195 * match up with those in the manual.
1198 /* Unconditional branch (immediate)
1199 * 31 30 26 25 0
1200 * +----+-----------+-------------------------------------+
1201 * | op | 0 0 1 0 1 | imm26 |
1202 * +----+-----------+-------------------------------------+
1204 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1206 uint64_t addr = s->pc_curr + sextract32(insn, 0, 26) * 4;
1208 if (insn & (1U << 31)) {
1209 /* BL Branch with link */
1210 tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
1213 /* B Branch / BL Branch with link */
1214 reset_btype(s);
1215 gen_goto_tb(s, 0, addr);
1218 /* Compare and branch (immediate)
1219 * 31 30 25 24 23 5 4 0
1220 * +----+-------------+----+---------------------+--------+
1221 * | sf | 0 1 1 0 1 0 | op | imm19 | Rt |
1222 * +----+-------------+----+---------------------+--------+
1224 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1226 unsigned int sf, op, rt;
1227 uint64_t addr;
1228 TCGLabel *label_match;
1229 TCGv_i64 tcg_cmp;
1231 sf = extract32(insn, 31, 1);
1232 op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1233 rt = extract32(insn, 0, 5);
1234 addr = s->pc_curr + sextract32(insn, 5, 19) * 4;
1236 tcg_cmp = read_cpu_reg(s, rt, sf);
1237 label_match = gen_new_label();
1239 reset_btype(s);
1240 tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1241 tcg_cmp, 0, label_match);
1243 gen_goto_tb(s, 0, s->base.pc_next);
1244 gen_set_label(label_match);
1245 gen_goto_tb(s, 1, addr);
1248 /* Test and branch (immediate)
1249 * 31 30 25 24 23 19 18 5 4 0
1250 * +----+-------------+----+-------+-------------+------+
1251 * | b5 | 0 1 1 0 1 1 | op | b40 | imm14 | Rt |
1252 * +----+-------------+----+-------+-------------+------+
1254 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1256 unsigned int bit_pos, op, rt;
1257 uint64_t addr;
1258 TCGLabel *label_match;
1259 TCGv_i64 tcg_cmp;
1261 bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1262 op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1263 addr = s->pc_curr + sextract32(insn, 5, 14) * 4;
1264 rt = extract32(insn, 0, 5);
1266 tcg_cmp = tcg_temp_new_i64();
1267 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1268 label_match = gen_new_label();
1270 reset_btype(s);
1271 tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1272 tcg_cmp, 0, label_match);
1273 tcg_temp_free_i64(tcg_cmp);
1274 gen_goto_tb(s, 0, s->base.pc_next);
1275 gen_set_label(label_match);
1276 gen_goto_tb(s, 1, addr);
1279 /* Conditional branch (immediate)
1280 * 31 25 24 23 5 4 3 0
1281 * +---------------+----+---------------------+----+------+
1282 * | 0 1 0 1 0 1 0 | o1 | imm19 | o0 | cond |
1283 * +---------------+----+---------------------+----+------+
1285 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1287 unsigned int cond;
1288 uint64_t addr;
1290 if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1291 unallocated_encoding(s);
1292 return;
1294 addr = s->pc_curr + sextract32(insn, 5, 19) * 4;
1295 cond = extract32(insn, 0, 4);
1297 reset_btype(s);
1298 if (cond < 0x0e) {
1299 /* genuinely conditional branches */
1300 TCGLabel *label_match = gen_new_label();
1301 arm_gen_test_cc(cond, label_match);
1302 gen_goto_tb(s, 0, s->base.pc_next);
1303 gen_set_label(label_match);
1304 gen_goto_tb(s, 1, addr);
1305 } else {
1306 /* 0xe and 0xf are both "always" conditions */
1307 gen_goto_tb(s, 0, addr);
1311 /* HINT instruction group, including various allocated HINTs */
1312 static void handle_hint(DisasContext *s, uint32_t insn,
1313 unsigned int op1, unsigned int op2, unsigned int crm)
1315 unsigned int selector = crm << 3 | op2;
1317 if (op1 != 3) {
1318 unallocated_encoding(s);
1319 return;
1322 switch (selector) {
1323 case 0b00000: /* NOP */
1324 break;
1325 case 0b00011: /* WFI */
1326 s->base.is_jmp = DISAS_WFI;
1327 break;
1328 case 0b00001: /* YIELD */
1329 /* When running in MTTCG we don't generate jumps to the yield and
1330 * WFE helpers as it won't affect the scheduling of other vCPUs.
1331 * If we wanted to more completely model WFE/SEV so we don't busy
1332 * spin unnecessarily we would need to do something more involved.
1334 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1335 s->base.is_jmp = DISAS_YIELD;
1337 break;
1338 case 0b00010: /* WFE */
1339 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1340 s->base.is_jmp = DISAS_WFE;
1342 break;
1343 case 0b00100: /* SEV */
1344 case 0b00101: /* SEVL */
1345 /* we treat all as NOP at least for now */
1346 break;
1347 case 0b00111: /* XPACLRI */
1348 if (s->pauth_active) {
1349 gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]);
1351 break;
1352 case 0b01000: /* PACIA1716 */
1353 if (s->pauth_active) {
1354 gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1356 break;
1357 case 0b01010: /* PACIB1716 */
1358 if (s->pauth_active) {
1359 gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1361 break;
1362 case 0b01100: /* AUTIA1716 */
1363 if (s->pauth_active) {
1364 gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1366 break;
1367 case 0b01110: /* AUTIB1716 */
1368 if (s->pauth_active) {
1369 gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1371 break;
1372 case 0b11000: /* PACIAZ */
1373 if (s->pauth_active) {
1374 gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30],
1375 new_tmp_a64_zero(s));
1377 break;
1378 case 0b11001: /* PACIASP */
1379 if (s->pauth_active) {
1380 gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1382 break;
1383 case 0b11010: /* PACIBZ */
1384 if (s->pauth_active) {
1385 gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30],
1386 new_tmp_a64_zero(s));
1388 break;
1389 case 0b11011: /* PACIBSP */
1390 if (s->pauth_active) {
1391 gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1393 break;
1394 case 0b11100: /* AUTIAZ */
1395 if (s->pauth_active) {
1396 gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30],
1397 new_tmp_a64_zero(s));
1399 break;
1400 case 0b11101: /* AUTIASP */
1401 if (s->pauth_active) {
1402 gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1404 break;
1405 case 0b11110: /* AUTIBZ */
1406 if (s->pauth_active) {
1407 gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30],
1408 new_tmp_a64_zero(s));
1410 break;
1411 case 0b11111: /* AUTIBSP */
1412 if (s->pauth_active) {
1413 gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1415 break;
1416 default:
1417 /* default specified as NOP equivalent */
1418 break;
1422 static void gen_clrex(DisasContext *s, uint32_t insn)
1424 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1427 /* CLREX, DSB, DMB, ISB */
1428 static void handle_sync(DisasContext *s, uint32_t insn,
1429 unsigned int op1, unsigned int op2, unsigned int crm)
1431 TCGBar bar;
1433 if (op1 != 3) {
1434 unallocated_encoding(s);
1435 return;
1438 switch (op2) {
1439 case 2: /* CLREX */
1440 gen_clrex(s, insn);
1441 return;
1442 case 4: /* DSB */
1443 case 5: /* DMB */
1444 switch (crm & 3) {
1445 case 1: /* MBReqTypes_Reads */
1446 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1447 break;
1448 case 2: /* MBReqTypes_Writes */
1449 bar = TCG_BAR_SC | TCG_MO_ST_ST;
1450 break;
1451 default: /* MBReqTypes_All */
1452 bar = TCG_BAR_SC | TCG_MO_ALL;
1453 break;
1455 tcg_gen_mb(bar);
1456 return;
1457 case 6: /* ISB */
1458 /* We need to break the TB after this insn to execute
1459 * a self-modified code correctly and also to take
1460 * any pending interrupts immediately.
1462 reset_btype(s);
1463 gen_goto_tb(s, 0, s->base.pc_next);
1464 return;
1466 case 7: /* SB */
1467 if (crm != 0 || !dc_isar_feature(aa64_sb, s)) {
1468 goto do_unallocated;
1471 * TODO: There is no speculation barrier opcode for TCG;
1472 * MB and end the TB instead.
1474 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1475 gen_goto_tb(s, 0, s->base.pc_next);
1476 return;
1478 default:
1479 do_unallocated:
1480 unallocated_encoding(s);
1481 return;
1485 static void gen_xaflag(void)
1487 TCGv_i32 z = tcg_temp_new_i32();
1489 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
1492 * (!C & !Z) << 31
1493 * (!(C | Z)) << 31
1494 * ~((C | Z) << 31)
1495 * ~-(C | Z)
1496 * (C | Z) - 1
1498 tcg_gen_or_i32(cpu_NF, cpu_CF, z);
1499 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
1501 /* !(Z & C) */
1502 tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
1503 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
1505 /* (!C & Z) << 31 -> -(Z & ~C) */
1506 tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
1507 tcg_gen_neg_i32(cpu_VF, cpu_VF);
1509 /* C | Z */
1510 tcg_gen_or_i32(cpu_CF, cpu_CF, z);
1512 tcg_temp_free_i32(z);
1515 static void gen_axflag(void)
1517 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */
1518 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */
1520 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
1521 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
1523 tcg_gen_movi_i32(cpu_NF, 0);
1524 tcg_gen_movi_i32(cpu_VF, 0);
1527 /* MSR (immediate) - move immediate to processor state field */
1528 static void handle_msr_i(DisasContext *s, uint32_t insn,
1529 unsigned int op1, unsigned int op2, unsigned int crm)
1531 TCGv_i32 t1;
1532 int op = op1 << 3 | op2;
1534 /* End the TB by default, chaining is ok. */
1535 s->base.is_jmp = DISAS_TOO_MANY;
1537 switch (op) {
1538 case 0x00: /* CFINV */
1539 if (crm != 0 || !dc_isar_feature(aa64_condm_4, s)) {
1540 goto do_unallocated;
1542 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
1543 s->base.is_jmp = DISAS_NEXT;
1544 break;
1546 case 0x01: /* XAFlag */
1547 if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1548 goto do_unallocated;
1550 gen_xaflag();
1551 s->base.is_jmp = DISAS_NEXT;
1552 break;
1554 case 0x02: /* AXFlag */
1555 if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1556 goto do_unallocated;
1558 gen_axflag();
1559 s->base.is_jmp = DISAS_NEXT;
1560 break;
1562 case 0x03: /* UAO */
1563 if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
1564 goto do_unallocated;
1566 if (crm & 1) {
1567 set_pstate_bits(PSTATE_UAO);
1568 } else {
1569 clear_pstate_bits(PSTATE_UAO);
1571 t1 = tcg_const_i32(s->current_el);
1572 gen_helper_rebuild_hflags_a64(cpu_env, t1);
1573 tcg_temp_free_i32(t1);
1574 break;
1576 case 0x04: /* PAN */
1577 if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
1578 goto do_unallocated;
1580 if (crm & 1) {
1581 set_pstate_bits(PSTATE_PAN);
1582 } else {
1583 clear_pstate_bits(PSTATE_PAN);
1585 t1 = tcg_const_i32(s->current_el);
1586 gen_helper_rebuild_hflags_a64(cpu_env, t1);
1587 tcg_temp_free_i32(t1);
1588 break;
1590 case 0x05: /* SPSel */
1591 if (s->current_el == 0) {
1592 goto do_unallocated;
1594 t1 = tcg_const_i32(crm & PSTATE_SP);
1595 gen_helper_msr_i_spsel(cpu_env, t1);
1596 tcg_temp_free_i32(t1);
1597 break;
1599 case 0x1e: /* DAIFSet */
1600 t1 = tcg_const_i32(crm);
1601 gen_helper_msr_i_daifset(cpu_env, t1);
1602 tcg_temp_free_i32(t1);
1603 break;
1605 case 0x1f: /* DAIFClear */
1606 t1 = tcg_const_i32(crm);
1607 gen_helper_msr_i_daifclear(cpu_env, t1);
1608 tcg_temp_free_i32(t1);
1609 /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs. */
1610 s->base.is_jmp = DISAS_UPDATE;
1611 break;
1613 default:
1614 do_unallocated:
1615 unallocated_encoding(s);
1616 return;
1620 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1622 TCGv_i32 tmp = tcg_temp_new_i32();
1623 TCGv_i32 nzcv = tcg_temp_new_i32();
1625 /* build bit 31, N */
1626 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1627 /* build bit 30, Z */
1628 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1629 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1630 /* build bit 29, C */
1631 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1632 /* build bit 28, V */
1633 tcg_gen_shri_i32(tmp, cpu_VF, 31);
1634 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1635 /* generate result */
1636 tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1638 tcg_temp_free_i32(nzcv);
1639 tcg_temp_free_i32(tmp);
1642 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1644 TCGv_i32 nzcv = tcg_temp_new_i32();
1646 /* take NZCV from R[t] */
1647 tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1649 /* bit 31, N */
1650 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1651 /* bit 30, Z */
1652 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1653 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1654 /* bit 29, C */
1655 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1656 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1657 /* bit 28, V */
1658 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1659 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1660 tcg_temp_free_i32(nzcv);
1663 /* MRS - move from system register
1664 * MSR (register) - move to system register
1665 * SYS
1666 * SYSL
1667 * These are all essentially the same insn in 'read' and 'write'
1668 * versions, with varying op0 fields.
1670 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1671 unsigned int op0, unsigned int op1, unsigned int op2,
1672 unsigned int crn, unsigned int crm, unsigned int rt)
1674 const ARMCPRegInfo *ri;
1675 TCGv_i64 tcg_rt;
1677 ri = get_arm_cp_reginfo(s->cp_regs,
1678 ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1679 crn, crm, op0, op1, op2));
1681 if (!ri) {
1682 /* Unknown register; this might be a guest error or a QEMU
1683 * unimplemented feature.
1685 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1686 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1687 isread ? "read" : "write", op0, op1, crn, crm, op2);
1688 unallocated_encoding(s);
1689 return;
1692 /* Check access permissions */
1693 if (!cp_access_ok(s->current_el, ri, isread)) {
1694 unallocated_encoding(s);
1695 return;
1698 if (ri->accessfn) {
1699 /* Emit code to perform further access permissions checks at
1700 * runtime; this may result in an exception.
1702 TCGv_ptr tmpptr;
1703 TCGv_i32 tcg_syn, tcg_isread;
1704 uint32_t syndrome;
1706 gen_a64_set_pc_im(s->pc_curr);
1707 tmpptr = tcg_const_ptr(ri);
1708 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1709 tcg_syn = tcg_const_i32(syndrome);
1710 tcg_isread = tcg_const_i32(isread);
1711 gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
1712 tcg_temp_free_ptr(tmpptr);
1713 tcg_temp_free_i32(tcg_syn);
1714 tcg_temp_free_i32(tcg_isread);
1715 } else if (ri->type & ARM_CP_RAISES_EXC) {
1717 * The readfn or writefn might raise an exception;
1718 * synchronize the CPU state in case it does.
1720 gen_a64_set_pc_im(s->pc_curr);
1723 /* Handle special cases first */
1724 switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1725 case ARM_CP_NOP:
1726 return;
1727 case ARM_CP_NZCV:
1728 tcg_rt = cpu_reg(s, rt);
1729 if (isread) {
1730 gen_get_nzcv(tcg_rt);
1731 } else {
1732 gen_set_nzcv(tcg_rt);
1734 return;
1735 case ARM_CP_CURRENTEL:
1736 /* Reads as current EL value from pstate, which is
1737 * guaranteed to be constant by the tb flags.
1739 tcg_rt = cpu_reg(s, rt);
1740 tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1741 return;
1742 case ARM_CP_DC_ZVA:
1743 /* Writes clear the aligned block of memory which rt points into. */
1744 tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
1745 gen_helper_dc_zva(cpu_env, tcg_rt);
1746 return;
1747 default:
1748 break;
1750 if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) {
1751 return;
1752 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
1753 return;
1756 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1757 gen_io_start();
1760 tcg_rt = cpu_reg(s, rt);
1762 if (isread) {
1763 if (ri->type & ARM_CP_CONST) {
1764 tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1765 } else if (ri->readfn) {
1766 TCGv_ptr tmpptr;
1767 tmpptr = tcg_const_ptr(ri);
1768 gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1769 tcg_temp_free_ptr(tmpptr);
1770 } else {
1771 tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1773 } else {
1774 if (ri->type & ARM_CP_CONST) {
1775 /* If not forbidden by access permissions, treat as WI */
1776 return;
1777 } else if (ri->writefn) {
1778 TCGv_ptr tmpptr;
1779 tmpptr = tcg_const_ptr(ri);
1780 gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1781 tcg_temp_free_ptr(tmpptr);
1782 } else {
1783 tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1787 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1788 /* I/O operations must end the TB here (whether read or write) */
1789 s->base.is_jmp = DISAS_UPDATE;
1791 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1793 * A write to any coprocessor regiser that ends a TB
1794 * must rebuild the hflags for the next TB.
1796 TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
1797 gen_helper_rebuild_hflags_a64(cpu_env, tcg_el);
1798 tcg_temp_free_i32(tcg_el);
1800 * We default to ending the TB on a coprocessor register write,
1801 * but allow this to be suppressed by the register definition
1802 * (usually only necessary to work around guest bugs).
1804 s->base.is_jmp = DISAS_UPDATE;
1808 /* System
1809 * 31 22 21 20 19 18 16 15 12 11 8 7 5 4 0
1810 * +---------------------+---+-----+-----+-------+-------+-----+------+
1811 * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 | CRn | CRm | op2 | Rt |
1812 * +---------------------+---+-----+-----+-------+-------+-----+------+
1814 static void disas_system(DisasContext *s, uint32_t insn)
1816 unsigned int l, op0, op1, crn, crm, op2, rt;
1817 l = extract32(insn, 21, 1);
1818 op0 = extract32(insn, 19, 2);
1819 op1 = extract32(insn, 16, 3);
1820 crn = extract32(insn, 12, 4);
1821 crm = extract32(insn, 8, 4);
1822 op2 = extract32(insn, 5, 3);
1823 rt = extract32(insn, 0, 5);
1825 if (op0 == 0) {
1826 if (l || rt != 31) {
1827 unallocated_encoding(s);
1828 return;
1830 switch (crn) {
1831 case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */
1832 handle_hint(s, insn, op1, op2, crm);
1833 break;
1834 case 3: /* CLREX, DSB, DMB, ISB */
1835 handle_sync(s, insn, op1, op2, crm);
1836 break;
1837 case 4: /* MSR (immediate) */
1838 handle_msr_i(s, insn, op1, op2, crm);
1839 break;
1840 default:
1841 unallocated_encoding(s);
1842 break;
1844 return;
1846 handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1849 /* Exception generation
1851 * 31 24 23 21 20 5 4 2 1 0
1852 * +-----------------+-----+------------------------+-----+----+
1853 * | 1 1 0 1 0 1 0 0 | opc | imm16 | op2 | LL |
1854 * +-----------------------+------------------------+----------+
1856 static void disas_exc(DisasContext *s, uint32_t insn)
1858 int opc = extract32(insn, 21, 3);
1859 int op2_ll = extract32(insn, 0, 5);
1860 int imm16 = extract32(insn, 5, 16);
1861 TCGv_i32 tmp;
1863 switch (opc) {
1864 case 0:
1865 /* For SVC, HVC and SMC we advance the single-step state
1866 * machine before taking the exception. This is architecturally
1867 * mandated, to ensure that single-stepping a system call
1868 * instruction works properly.
1870 switch (op2_ll) {
1871 case 1: /* SVC */
1872 gen_ss_advance(s);
1873 gen_exception_insn(s, s->base.pc_next, EXCP_SWI,
1874 syn_aa64_svc(imm16), default_exception_el(s));
1875 break;
1876 case 2: /* HVC */
1877 if (s->current_el == 0) {
1878 unallocated_encoding(s);
1879 break;
1881 /* The pre HVC helper handles cases when HVC gets trapped
1882 * as an undefined insn by runtime configuration.
1884 gen_a64_set_pc_im(s->pc_curr);
1885 gen_helper_pre_hvc(cpu_env);
1886 gen_ss_advance(s);
1887 gen_exception_insn(s, s->base.pc_next, EXCP_HVC,
1888 syn_aa64_hvc(imm16), 2);
1889 break;
1890 case 3: /* SMC */
1891 if (s->current_el == 0) {
1892 unallocated_encoding(s);
1893 break;
1895 gen_a64_set_pc_im(s->pc_curr);
1896 tmp = tcg_const_i32(syn_aa64_smc(imm16));
1897 gen_helper_pre_smc(cpu_env, tmp);
1898 tcg_temp_free_i32(tmp);
1899 gen_ss_advance(s);
1900 gen_exception_insn(s, s->base.pc_next, EXCP_SMC,
1901 syn_aa64_smc(imm16), 3);
1902 break;
1903 default:
1904 unallocated_encoding(s);
1905 break;
1907 break;
1908 case 1:
1909 if (op2_ll != 0) {
1910 unallocated_encoding(s);
1911 break;
1913 /* BRK */
1914 gen_exception_bkpt_insn(s, syn_aa64_bkpt(imm16));
1915 break;
1916 case 2:
1917 if (op2_ll != 0) {
1918 unallocated_encoding(s);
1919 break;
1921 /* HLT. This has two purposes.
1922 * Architecturally, it is an external halting debug instruction.
1923 * Since QEMU doesn't implement external debug, we treat this as
1924 * it is required for halting debug disabled: it will UNDEF.
1925 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1927 if (semihosting_enabled() && imm16 == 0xf000) {
1928 #ifndef CONFIG_USER_ONLY
1929 /* In system mode, don't allow userspace access to semihosting,
1930 * to provide some semblance of security (and for consistency
1931 * with our 32-bit semihosting).
1933 if (s->current_el == 0) {
1934 unsupported_encoding(s, insn);
1935 break;
1937 #endif
1938 gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
1939 } else {
1940 unsupported_encoding(s, insn);
1942 break;
1943 case 5:
1944 if (op2_ll < 1 || op2_ll > 3) {
1945 unallocated_encoding(s);
1946 break;
1948 /* DCPS1, DCPS2, DCPS3 */
1949 unsupported_encoding(s, insn);
1950 break;
1951 default:
1952 unallocated_encoding(s);
1953 break;
1957 /* Unconditional branch (register)
1958 * 31 25 24 21 20 16 15 10 9 5 4 0
1959 * +---------------+-------+-------+-------+------+-------+
1960 * | 1 1 0 1 0 1 1 | opc | op2 | op3 | Rn | op4 |
1961 * +---------------+-------+-------+-------+------+-------+
1963 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1965 unsigned int opc, op2, op3, rn, op4;
1966 unsigned btype_mod = 2; /* 0: BR, 1: BLR, 2: other */
1967 TCGv_i64 dst;
1968 TCGv_i64 modifier;
1970 opc = extract32(insn, 21, 4);
1971 op2 = extract32(insn, 16, 5);
1972 op3 = extract32(insn, 10, 6);
1973 rn = extract32(insn, 5, 5);
1974 op4 = extract32(insn, 0, 5);
1976 if (op2 != 0x1f) {
1977 goto do_unallocated;
1980 switch (opc) {
1981 case 0: /* BR */
1982 case 1: /* BLR */
1983 case 2: /* RET */
1984 btype_mod = opc;
1985 switch (op3) {
1986 case 0:
1987 /* BR, BLR, RET */
1988 if (op4 != 0) {
1989 goto do_unallocated;
1991 dst = cpu_reg(s, rn);
1992 break;
1994 case 2:
1995 case 3:
1996 if (!dc_isar_feature(aa64_pauth, s)) {
1997 goto do_unallocated;
1999 if (opc == 2) {
2000 /* RETAA, RETAB */
2001 if (rn != 0x1f || op4 != 0x1f) {
2002 goto do_unallocated;
2004 rn = 30;
2005 modifier = cpu_X[31];
2006 } else {
2007 /* BRAAZ, BRABZ, BLRAAZ, BLRABZ */
2008 if (op4 != 0x1f) {
2009 goto do_unallocated;
2011 modifier = new_tmp_a64_zero(s);
2013 if (s->pauth_active) {
2014 dst = new_tmp_a64(s);
2015 if (op3 == 2) {
2016 gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier);
2017 } else {
2018 gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier);
2020 } else {
2021 dst = cpu_reg(s, rn);
2023 break;
2025 default:
2026 goto do_unallocated;
2028 gen_a64_set_pc(s, dst);
2029 /* BLR also needs to load return address */
2030 if (opc == 1) {
2031 tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
2033 break;
2035 case 8: /* BRAA */
2036 case 9: /* BLRAA */
2037 if (!dc_isar_feature(aa64_pauth, s)) {
2038 goto do_unallocated;
2040 if ((op3 & ~1) != 2) {
2041 goto do_unallocated;
2043 btype_mod = opc & 1;
2044 if (s->pauth_active) {
2045 dst = new_tmp_a64(s);
2046 modifier = cpu_reg_sp(s, op4);
2047 if (op3 == 2) {
2048 gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier);
2049 } else {
2050 gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier);
2052 } else {
2053 dst = cpu_reg(s, rn);
2055 gen_a64_set_pc(s, dst);
2056 /* BLRAA also needs to load return address */
2057 if (opc == 9) {
2058 tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
2060 break;
2062 case 4: /* ERET */
2063 if (s->current_el == 0) {
2064 goto do_unallocated;
2066 switch (op3) {
2067 case 0: /* ERET */
2068 if (op4 != 0) {
2069 goto do_unallocated;
2071 dst = tcg_temp_new_i64();
2072 tcg_gen_ld_i64(dst, cpu_env,
2073 offsetof(CPUARMState, elr_el[s->current_el]));
2074 break;
2076 case 2: /* ERETAA */
2077 case 3: /* ERETAB */
2078 if (!dc_isar_feature(aa64_pauth, s)) {
2079 goto do_unallocated;
2081 if (rn != 0x1f || op4 != 0x1f) {
2082 goto do_unallocated;
2084 dst = tcg_temp_new_i64();
2085 tcg_gen_ld_i64(dst, cpu_env,
2086 offsetof(CPUARMState, elr_el[s->current_el]));
2087 if (s->pauth_active) {
2088 modifier = cpu_X[31];
2089 if (op3 == 2) {
2090 gen_helper_autia(dst, cpu_env, dst, modifier);
2091 } else {
2092 gen_helper_autib(dst, cpu_env, dst, modifier);
2095 break;
2097 default:
2098 goto do_unallocated;
2100 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2101 gen_io_start();
2104 gen_helper_exception_return(cpu_env, dst);
2105 tcg_temp_free_i64(dst);
2106 /* Must exit loop to check un-masked IRQs */
2107 s->base.is_jmp = DISAS_EXIT;
2108 return;
2110 case 5: /* DRPS */
2111 if (op3 != 0 || op4 != 0 || rn != 0x1f) {
2112 goto do_unallocated;
2113 } else {
2114 unsupported_encoding(s, insn);
2116 return;
2118 default:
2119 do_unallocated:
2120 unallocated_encoding(s);
2121 return;
2124 switch (btype_mod) {
2125 case 0: /* BR */
2126 if (dc_isar_feature(aa64_bti, s)) {
2127 /* BR to {x16,x17} or !guard -> 1, else 3. */
2128 set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3);
2130 break;
2132 case 1: /* BLR */
2133 if (dc_isar_feature(aa64_bti, s)) {
2134 /* BLR sets BTYPE to 2, regardless of source guarded page. */
2135 set_btype(s, 2);
2137 break;
2139 default: /* RET or none of the above. */
2140 /* BTYPE will be set to 0 by normal end-of-insn processing. */
2141 break;
2144 s->base.is_jmp = DISAS_JUMP;
2147 /* Branches, exception generating and system instructions */
2148 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
2150 switch (extract32(insn, 25, 7)) {
2151 case 0x0a: case 0x0b:
2152 case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
2153 disas_uncond_b_imm(s, insn);
2154 break;
2155 case 0x1a: case 0x5a: /* Compare & branch (immediate) */
2156 disas_comp_b_imm(s, insn);
2157 break;
2158 case 0x1b: case 0x5b: /* Test & branch (immediate) */
2159 disas_test_b_imm(s, insn);
2160 break;
2161 case 0x2a: /* Conditional branch (immediate) */
2162 disas_cond_b_imm(s, insn);
2163 break;
2164 case 0x6a: /* Exception generation / System */
2165 if (insn & (1 << 24)) {
2166 if (extract32(insn, 22, 2) == 0) {
2167 disas_system(s, insn);
2168 } else {
2169 unallocated_encoding(s);
2171 } else {
2172 disas_exc(s, insn);
2174 break;
2175 case 0x6b: /* Unconditional branch (register) */
2176 disas_uncond_b_reg(s, insn);
2177 break;
2178 default:
2179 unallocated_encoding(s);
2180 break;
2185 * Load/Store exclusive instructions are implemented by remembering
2186 * the value/address loaded, and seeing if these are the same
2187 * when the store is performed. This is not actually the architecturally
2188 * mandated semantics, but it works for typical guest code sequences
2189 * and avoids having to monitor regular stores.
2191 * The store exclusive uses the atomic cmpxchg primitives to avoid
2192 * races in multi-threaded linux-user and when MTTCG softmmu is
2193 * enabled.
2195 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
2196 TCGv_i64 addr, int size, bool is_pair)
2198 int idx = get_mem_index(s);
2199 MemOp memop = s->be_data;
2201 g_assert(size <= 3);
2202 if (is_pair) {
2203 g_assert(size >= 2);
2204 if (size == 2) {
2205 /* The pair must be single-copy atomic for the doubleword. */
2206 memop |= MO_64 | MO_ALIGN;
2207 tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2208 if (s->be_data == MO_LE) {
2209 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2210 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2211 } else {
2212 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2213 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2215 } else {
2216 /* The pair must be single-copy atomic for *each* doubleword, not
2217 the entire quadword, however it must be quadword aligned. */
2218 memop |= MO_64;
2219 tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
2220 memop | MO_ALIGN_16);
2222 TCGv_i64 addr2 = tcg_temp_new_i64();
2223 tcg_gen_addi_i64(addr2, addr, 8);
2224 tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
2225 tcg_temp_free_i64(addr2);
2227 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2228 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2230 } else {
2231 memop |= size | MO_ALIGN;
2232 tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2233 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2235 tcg_gen_mov_i64(cpu_exclusive_addr, addr);
2238 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2239 TCGv_i64 addr, int size, int is_pair)
2241 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2242 * && (!is_pair || env->exclusive_high == [addr + datasize])) {
2243 * [addr] = {Rt};
2244 * if (is_pair) {
2245 * [addr + datasize] = {Rt2};
2247 * {Rd} = 0;
2248 * } else {
2249 * {Rd} = 1;
2251 * env->exclusive_addr = -1;
2253 TCGLabel *fail_label = gen_new_label();
2254 TCGLabel *done_label = gen_new_label();
2255 TCGv_i64 tmp;
2257 tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
2259 tmp = tcg_temp_new_i64();
2260 if (is_pair) {
2261 if (size == 2) {
2262 if (s->be_data == MO_LE) {
2263 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2264 } else {
2265 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2267 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2268 cpu_exclusive_val, tmp,
2269 get_mem_index(s),
2270 MO_64 | MO_ALIGN | s->be_data);
2271 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2272 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2273 if (!HAVE_CMPXCHG128) {
2274 gen_helper_exit_atomic(cpu_env);
2275 s->base.is_jmp = DISAS_NORETURN;
2276 } else if (s->be_data == MO_LE) {
2277 gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env,
2278 cpu_exclusive_addr,
2279 cpu_reg(s, rt),
2280 cpu_reg(s, rt2));
2281 } else {
2282 gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env,
2283 cpu_exclusive_addr,
2284 cpu_reg(s, rt),
2285 cpu_reg(s, rt2));
2287 } else if (s->be_data == MO_LE) {
2288 gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
2289 cpu_reg(s, rt), cpu_reg(s, rt2));
2290 } else {
2291 gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
2292 cpu_reg(s, rt), cpu_reg(s, rt2));
2294 } else {
2295 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2296 cpu_reg(s, rt), get_mem_index(s),
2297 size | MO_ALIGN | s->be_data);
2298 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2300 tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2301 tcg_temp_free_i64(tmp);
2302 tcg_gen_br(done_label);
2304 gen_set_label(fail_label);
2305 tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2306 gen_set_label(done_label);
2307 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2310 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2311 int rn, int size)
2313 TCGv_i64 tcg_rs = cpu_reg(s, rs);
2314 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2315 int memidx = get_mem_index(s);
2316 TCGv_i64 clean_addr;
2318 if (rn == 31) {
2319 gen_check_sp_alignment(s);
2321 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2322 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx,
2323 size | MO_ALIGN | s->be_data);
2326 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2327 int rn, int size)
2329 TCGv_i64 s1 = cpu_reg(s, rs);
2330 TCGv_i64 s2 = cpu_reg(s, rs + 1);
2331 TCGv_i64 t1 = cpu_reg(s, rt);
2332 TCGv_i64 t2 = cpu_reg(s, rt + 1);
2333 TCGv_i64 clean_addr;
2334 int memidx = get_mem_index(s);
2336 if (rn == 31) {
2337 gen_check_sp_alignment(s);
2339 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2341 if (size == 2) {
2342 TCGv_i64 cmp = tcg_temp_new_i64();
2343 TCGv_i64 val = tcg_temp_new_i64();
2345 if (s->be_data == MO_LE) {
2346 tcg_gen_concat32_i64(val, t1, t2);
2347 tcg_gen_concat32_i64(cmp, s1, s2);
2348 } else {
2349 tcg_gen_concat32_i64(val, t2, t1);
2350 tcg_gen_concat32_i64(cmp, s2, s1);
2353 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx,
2354 MO_64 | MO_ALIGN | s->be_data);
2355 tcg_temp_free_i64(val);
2357 if (s->be_data == MO_LE) {
2358 tcg_gen_extr32_i64(s1, s2, cmp);
2359 } else {
2360 tcg_gen_extr32_i64(s2, s1, cmp);
2362 tcg_temp_free_i64(cmp);
2363 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2364 if (HAVE_CMPXCHG128) {
2365 TCGv_i32 tcg_rs = tcg_const_i32(rs);
2366 if (s->be_data == MO_LE) {
2367 gen_helper_casp_le_parallel(cpu_env, tcg_rs,
2368 clean_addr, t1, t2);
2369 } else {
2370 gen_helper_casp_be_parallel(cpu_env, tcg_rs,
2371 clean_addr, t1, t2);
2373 tcg_temp_free_i32(tcg_rs);
2374 } else {
2375 gen_helper_exit_atomic(cpu_env);
2376 s->base.is_jmp = DISAS_NORETURN;
2378 } else {
2379 TCGv_i64 d1 = tcg_temp_new_i64();
2380 TCGv_i64 d2 = tcg_temp_new_i64();
2381 TCGv_i64 a2 = tcg_temp_new_i64();
2382 TCGv_i64 c1 = tcg_temp_new_i64();
2383 TCGv_i64 c2 = tcg_temp_new_i64();
2384 TCGv_i64 zero = tcg_const_i64(0);
2386 /* Load the two words, in memory order. */
2387 tcg_gen_qemu_ld_i64(d1, clean_addr, memidx,
2388 MO_64 | MO_ALIGN_16 | s->be_data);
2389 tcg_gen_addi_i64(a2, clean_addr, 8);
2390 tcg_gen_qemu_ld_i64(d2, a2, memidx, MO_64 | s->be_data);
2392 /* Compare the two words, also in memory order. */
2393 tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1);
2394 tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2);
2395 tcg_gen_and_i64(c2, c2, c1);
2397 /* If compare equal, write back new data, else write back old data. */
2398 tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1);
2399 tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2);
2400 tcg_gen_qemu_st_i64(c1, clean_addr, memidx, MO_64 | s->be_data);
2401 tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data);
2402 tcg_temp_free_i64(a2);
2403 tcg_temp_free_i64(c1);
2404 tcg_temp_free_i64(c2);
2405 tcg_temp_free_i64(zero);
2407 /* Write back the data from memory to Rs. */
2408 tcg_gen_mov_i64(s1, d1);
2409 tcg_gen_mov_i64(s2, d2);
2410 tcg_temp_free_i64(d1);
2411 tcg_temp_free_i64(d2);
2415 /* Update the Sixty-Four bit (SF) registersize. This logic is derived
2416 * from the ARMv8 specs for LDR (Shared decode for all encodings).
2418 static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
2420 int opc0 = extract32(opc, 0, 1);
2421 int regsize;
2423 if (is_signed) {
2424 regsize = opc0 ? 32 : 64;
2425 } else {
2426 regsize = size == 3 ? 64 : 32;
2428 return regsize == 64;
2431 /* Load/store exclusive
2433 * 31 30 29 24 23 22 21 20 16 15 14 10 9 5 4 0
2434 * +-----+-------------+----+---+----+------+----+-------+------+------+
2435 * | sz | 0 0 1 0 0 0 | o2 | L | o1 | Rs | o0 | Rt2 | Rn | Rt |
2436 * +-----+-------------+----+---+----+------+----+-------+------+------+
2438 * sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
2439 * L: 0 -> store, 1 -> load
2440 * o2: 0 -> exclusive, 1 -> not
2441 * o1: 0 -> single register, 1 -> register pair
2442 * o0: 1 -> load-acquire/store-release, 0 -> not
2444 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
2446 int rt = extract32(insn, 0, 5);
2447 int rn = extract32(insn, 5, 5);
2448 int rt2 = extract32(insn, 10, 5);
2449 int rs = extract32(insn, 16, 5);
2450 int is_lasr = extract32(insn, 15, 1);
2451 int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
2452 int size = extract32(insn, 30, 2);
2453 TCGv_i64 clean_addr;
2455 switch (o2_L_o1_o0) {
2456 case 0x0: /* STXR */
2457 case 0x1: /* STLXR */
2458 if (rn == 31) {
2459 gen_check_sp_alignment(s);
2461 if (is_lasr) {
2462 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2464 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2465 gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false);
2466 return;
2468 case 0x4: /* LDXR */
2469 case 0x5: /* LDAXR */
2470 if (rn == 31) {
2471 gen_check_sp_alignment(s);
2473 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2474 s->is_ldex = true;
2475 gen_load_exclusive(s, rt, rt2, clean_addr, size, false);
2476 if (is_lasr) {
2477 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2479 return;
2481 case 0x8: /* STLLR */
2482 if (!dc_isar_feature(aa64_lor, s)) {
2483 break;
2485 /* StoreLORelease is the same as Store-Release for QEMU. */
2486 /* fall through */
2487 case 0x9: /* STLR */
2488 /* Generate ISS for non-exclusive accesses including LASR. */
2489 if (rn == 31) {
2490 gen_check_sp_alignment(s);
2492 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2493 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2494 do_gpr_st(s, cpu_reg(s, rt), clean_addr, size, true, rt,
2495 disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2496 return;
2498 case 0xc: /* LDLAR */
2499 if (!dc_isar_feature(aa64_lor, s)) {
2500 break;
2502 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */
2503 /* fall through */
2504 case 0xd: /* LDAR */
2505 /* Generate ISS for non-exclusive accesses including LASR. */
2506 if (rn == 31) {
2507 gen_check_sp_alignment(s);
2509 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2510 do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, true, rt,
2511 disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2512 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2513 return;
2515 case 0x2: case 0x3: /* CASP / STXP */
2516 if (size & 2) { /* STXP / STLXP */
2517 if (rn == 31) {
2518 gen_check_sp_alignment(s);
2520 if (is_lasr) {
2521 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2523 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2524 gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true);
2525 return;
2527 if (rt2 == 31
2528 && ((rt | rs) & 1) == 0
2529 && dc_isar_feature(aa64_atomics, s)) {
2530 /* CASP / CASPL */
2531 gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2532 return;
2534 break;
2536 case 0x6: case 0x7: /* CASPA / LDXP */
2537 if (size & 2) { /* LDXP / LDAXP */
2538 if (rn == 31) {
2539 gen_check_sp_alignment(s);
2541 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2542 s->is_ldex = true;
2543 gen_load_exclusive(s, rt, rt2, clean_addr, size, true);
2544 if (is_lasr) {
2545 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2547 return;
2549 if (rt2 == 31
2550 && ((rt | rs) & 1) == 0
2551 && dc_isar_feature(aa64_atomics, s)) {
2552 /* CASPA / CASPAL */
2553 gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2554 return;
2556 break;
2558 case 0xa: /* CAS */
2559 case 0xb: /* CASL */
2560 case 0xe: /* CASA */
2561 case 0xf: /* CASAL */
2562 if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) {
2563 gen_compare_and_swap(s, rs, rt, rn, size);
2564 return;
2566 break;
2568 unallocated_encoding(s);
2572 * Load register (literal)
2574 * 31 30 29 27 26 25 24 23 5 4 0
2575 * +-----+-------+---+-----+-------------------+-------+
2576 * | opc | 0 1 1 | V | 0 0 | imm19 | Rt |
2577 * +-----+-------+---+-----+-------------------+-------+
2579 * V: 1 -> vector (simd/fp)
2580 * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2581 * 10-> 32 bit signed, 11 -> prefetch
2582 * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2584 static void disas_ld_lit(DisasContext *s, uint32_t insn)
2586 int rt = extract32(insn, 0, 5);
2587 int64_t imm = sextract32(insn, 5, 19) << 2;
2588 bool is_vector = extract32(insn, 26, 1);
2589 int opc = extract32(insn, 30, 2);
2590 bool is_signed = false;
2591 int size = 2;
2592 TCGv_i64 tcg_rt, clean_addr;
2594 if (is_vector) {
2595 if (opc == 3) {
2596 unallocated_encoding(s);
2597 return;
2599 size = 2 + opc;
2600 if (!fp_access_check(s)) {
2601 return;
2603 } else {
2604 if (opc == 3) {
2605 /* PRFM (literal) : prefetch */
2606 return;
2608 size = 2 + extract32(opc, 0, 1);
2609 is_signed = extract32(opc, 1, 1);
2612 tcg_rt = cpu_reg(s, rt);
2614 clean_addr = tcg_const_i64(s->pc_curr + imm);
2615 if (is_vector) {
2616 do_fp_ld(s, rt, clean_addr, size);
2617 } else {
2618 /* Only unsigned 32bit loads target 32bit registers. */
2619 bool iss_sf = opc != 0;
2621 do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, false,
2622 true, rt, iss_sf, false);
2624 tcg_temp_free_i64(clean_addr);
2628 * LDNP (Load Pair - non-temporal hint)
2629 * LDP (Load Pair - non vector)
2630 * LDPSW (Load Pair Signed Word - non vector)
2631 * STNP (Store Pair - non-temporal hint)
2632 * STP (Store Pair - non vector)
2633 * LDNP (Load Pair of SIMD&FP - non-temporal hint)
2634 * LDP (Load Pair of SIMD&FP)
2635 * STNP (Store Pair of SIMD&FP - non-temporal hint)
2636 * STP (Store Pair of SIMD&FP)
2638 * 31 30 29 27 26 25 24 23 22 21 15 14 10 9 5 4 0
2639 * +-----+-------+---+---+-------+---+-----------------------------+
2640 * | opc | 1 0 1 | V | 0 | index | L | imm7 | Rt2 | Rn | Rt |
2641 * +-----+-------+---+---+-------+---+-------+-------+------+------+
2643 * opc: LDP/STP/LDNP/STNP 00 -> 32 bit, 10 -> 64 bit
2644 * LDPSW 01
2645 * LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2646 * V: 0 -> GPR, 1 -> Vector
2647 * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2648 * 10 -> signed offset, 11 -> pre-index
2649 * L: 0 -> Store 1 -> Load
2651 * Rt, Rt2 = GPR or SIMD registers to be stored
2652 * Rn = general purpose register containing address
2653 * imm7 = signed offset (multiple of 4 or 8 depending on size)
2655 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2657 int rt = extract32(insn, 0, 5);
2658 int rn = extract32(insn, 5, 5);
2659 int rt2 = extract32(insn, 10, 5);
2660 uint64_t offset = sextract64(insn, 15, 7);
2661 int index = extract32(insn, 23, 2);
2662 bool is_vector = extract32(insn, 26, 1);
2663 bool is_load = extract32(insn, 22, 1);
2664 int opc = extract32(insn, 30, 2);
2666 bool is_signed = false;
2667 bool postindex = false;
2668 bool wback = false;
2670 TCGv_i64 clean_addr, dirty_addr;
2672 int size;
2674 if (opc == 3) {
2675 unallocated_encoding(s);
2676 return;
2679 if (is_vector) {
2680 size = 2 + opc;
2681 } else {
2682 size = 2 + extract32(opc, 1, 1);
2683 is_signed = extract32(opc, 0, 1);
2684 if (!is_load && is_signed) {
2685 unallocated_encoding(s);
2686 return;
2690 switch (index) {
2691 case 1: /* post-index */
2692 postindex = true;
2693 wback = true;
2694 break;
2695 case 0:
2696 /* signed offset with "non-temporal" hint. Since we don't emulate
2697 * caches we don't care about hints to the cache system about
2698 * data access patterns, and handle this identically to plain
2699 * signed offset.
2701 if (is_signed) {
2702 /* There is no non-temporal-hint version of LDPSW */
2703 unallocated_encoding(s);
2704 return;
2706 postindex = false;
2707 break;
2708 case 2: /* signed offset, rn not updated */
2709 postindex = false;
2710 break;
2711 case 3: /* pre-index */
2712 postindex = false;
2713 wback = true;
2714 break;
2717 if (is_vector && !fp_access_check(s)) {
2718 return;
2721 offset <<= size;
2723 if (rn == 31) {
2724 gen_check_sp_alignment(s);
2727 dirty_addr = read_cpu_reg_sp(s, rn, 1);
2728 if (!postindex) {
2729 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2731 clean_addr = clean_data_tbi(s, dirty_addr);
2733 if (is_vector) {
2734 if (is_load) {
2735 do_fp_ld(s, rt, clean_addr, size);
2736 } else {
2737 do_fp_st(s, rt, clean_addr, size);
2739 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2740 if (is_load) {
2741 do_fp_ld(s, rt2, clean_addr, size);
2742 } else {
2743 do_fp_st(s, rt2, clean_addr, size);
2745 } else {
2746 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2747 TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2749 if (is_load) {
2750 TCGv_i64 tmp = tcg_temp_new_i64();
2752 /* Do not modify tcg_rt before recognizing any exception
2753 * from the second load.
2755 do_gpr_ld(s, tmp, clean_addr, size, is_signed, false,
2756 false, 0, false, false);
2757 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2758 do_gpr_ld(s, tcg_rt2, clean_addr, size, is_signed, false,
2759 false, 0, false, false);
2761 tcg_gen_mov_i64(tcg_rt, tmp);
2762 tcg_temp_free_i64(tmp);
2763 } else {
2764 do_gpr_st(s, tcg_rt, clean_addr, size,
2765 false, 0, false, false);
2766 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2767 do_gpr_st(s, tcg_rt2, clean_addr, size,
2768 false, 0, false, false);
2772 if (wback) {
2773 if (postindex) {
2774 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2776 tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
2781 * Load/store (immediate post-indexed)
2782 * Load/store (immediate pre-indexed)
2783 * Load/store (unscaled immediate)
2785 * 31 30 29 27 26 25 24 23 22 21 20 12 11 10 9 5 4 0
2786 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2787 * |size| 1 1 1 | V | 0 0 | opc | 0 | imm9 | idx | Rn | Rt |
2788 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2790 * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2791 10 -> unprivileged
2792 * V = 0 -> non-vector
2793 * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2794 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2796 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
2797 int opc,
2798 int size,
2799 int rt,
2800 bool is_vector)
2802 int rn = extract32(insn, 5, 5);
2803 int imm9 = sextract32(insn, 12, 9);
2804 int idx = extract32(insn, 10, 2);
2805 bool is_signed = false;
2806 bool is_store = false;
2807 bool is_extended = false;
2808 bool is_unpriv = (idx == 2);
2809 bool iss_valid = !is_vector;
2810 bool post_index;
2811 bool writeback;
2813 TCGv_i64 clean_addr, dirty_addr;
2815 if (is_vector) {
2816 size |= (opc & 2) << 1;
2817 if (size > 4 || is_unpriv) {
2818 unallocated_encoding(s);
2819 return;
2821 is_store = ((opc & 1) == 0);
2822 if (!fp_access_check(s)) {
2823 return;
2825 } else {
2826 if (size == 3 && opc == 2) {
2827 /* PRFM - prefetch */
2828 if (idx != 0) {
2829 unallocated_encoding(s);
2830 return;
2832 return;
2834 if (opc == 3 && size > 1) {
2835 unallocated_encoding(s);
2836 return;
2838 is_store = (opc == 0);
2839 is_signed = extract32(opc, 1, 1);
2840 is_extended = (size < 3) && extract32(opc, 0, 1);
2843 switch (idx) {
2844 case 0:
2845 case 2:
2846 post_index = false;
2847 writeback = false;
2848 break;
2849 case 1:
2850 post_index = true;
2851 writeback = true;
2852 break;
2853 case 3:
2854 post_index = false;
2855 writeback = true;
2856 break;
2857 default:
2858 g_assert_not_reached();
2861 if (rn == 31) {
2862 gen_check_sp_alignment(s);
2865 dirty_addr = read_cpu_reg_sp(s, rn, 1);
2866 if (!post_index) {
2867 tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
2869 clean_addr = clean_data_tbi(s, dirty_addr);
2871 if (is_vector) {
2872 if (is_store) {
2873 do_fp_st(s, rt, clean_addr, size);
2874 } else {
2875 do_fp_ld(s, rt, clean_addr, size);
2877 } else {
2878 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2879 int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2880 bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2882 if (is_store) {
2883 do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx,
2884 iss_valid, rt, iss_sf, false);
2885 } else {
2886 do_gpr_ld_memidx(s, tcg_rt, clean_addr, size,
2887 is_signed, is_extended, memidx,
2888 iss_valid, rt, iss_sf, false);
2892 if (writeback) {
2893 TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2894 if (post_index) {
2895 tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
2897 tcg_gen_mov_i64(tcg_rn, dirty_addr);
2902 * Load/store (register offset)
2904 * 31 30 29 27 26 25 24 23 22 21 20 16 15 13 12 11 10 9 5 4 0
2905 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2906 * |size| 1 1 1 | V | 0 0 | opc | 1 | Rm | opt | S| 1 0 | Rn | Rt |
2907 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2909 * For non-vector:
2910 * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2911 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2912 * For vector:
2913 * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2914 * opc<0>: 0 -> store, 1 -> load
2915 * V: 1 -> vector/simd
2916 * opt: extend encoding (see DecodeRegExtend)
2917 * S: if S=1 then scale (essentially index by sizeof(size))
2918 * Rt: register to transfer into/out of
2919 * Rn: address register or SP for base
2920 * Rm: offset register or ZR for offset
2922 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
2923 int opc,
2924 int size,
2925 int rt,
2926 bool is_vector)
2928 int rn = extract32(insn, 5, 5);
2929 int shift = extract32(insn, 12, 1);
2930 int rm = extract32(insn, 16, 5);
2931 int opt = extract32(insn, 13, 3);
2932 bool is_signed = false;
2933 bool is_store = false;
2934 bool is_extended = false;
2936 TCGv_i64 tcg_rm, clean_addr, dirty_addr;
2938 if (extract32(opt, 1, 1) == 0) {
2939 unallocated_encoding(s);
2940 return;
2943 if (is_vector) {
2944 size |= (opc & 2) << 1;
2945 if (size > 4) {
2946 unallocated_encoding(s);
2947 return;
2949 is_store = !extract32(opc, 0, 1);
2950 if (!fp_access_check(s)) {
2951 return;
2953 } else {
2954 if (size == 3 && opc == 2) {
2955 /* PRFM - prefetch */
2956 return;
2958 if (opc == 3 && size > 1) {
2959 unallocated_encoding(s);
2960 return;
2962 is_store = (opc == 0);
2963 is_signed = extract32(opc, 1, 1);
2964 is_extended = (size < 3) && extract32(opc, 0, 1);
2967 if (rn == 31) {
2968 gen_check_sp_alignment(s);
2970 dirty_addr = read_cpu_reg_sp(s, rn, 1);
2972 tcg_rm = read_cpu_reg(s, rm, 1);
2973 ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2975 tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm);
2976 clean_addr = clean_data_tbi(s, dirty_addr);
2978 if (is_vector) {
2979 if (is_store) {
2980 do_fp_st(s, rt, clean_addr, size);
2981 } else {
2982 do_fp_ld(s, rt, clean_addr, size);
2984 } else {
2985 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2986 bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2987 if (is_store) {
2988 do_gpr_st(s, tcg_rt, clean_addr, size,
2989 true, rt, iss_sf, false);
2990 } else {
2991 do_gpr_ld(s, tcg_rt, clean_addr, size,
2992 is_signed, is_extended,
2993 true, rt, iss_sf, false);
2999 * Load/store (unsigned immediate)
3001 * 31 30 29 27 26 25 24 23 22 21 10 9 5
3002 * +----+-------+---+-----+-----+------------+-------+------+
3003 * |size| 1 1 1 | V | 0 1 | opc | imm12 | Rn | Rt |
3004 * +----+-------+---+-----+-----+------------+-------+------+
3006 * For non-vector:
3007 * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
3008 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3009 * For vector:
3010 * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
3011 * opc<0>: 0 -> store, 1 -> load
3012 * Rn: base address register (inc SP)
3013 * Rt: target register
3015 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
3016 int opc,
3017 int size,
3018 int rt,
3019 bool is_vector)
3021 int rn = extract32(insn, 5, 5);
3022 unsigned int imm12 = extract32(insn, 10, 12);
3023 unsigned int offset;
3025 TCGv_i64 clean_addr, dirty_addr;
3027 bool is_store;
3028 bool is_signed = false;
3029 bool is_extended = false;
3031 if (is_vector) {
3032 size |= (opc & 2) << 1;
3033 if (size > 4) {
3034 unallocated_encoding(s);
3035 return;
3037 is_store = !extract32(opc, 0, 1);
3038 if (!fp_access_check(s)) {
3039 return;
3041 } else {
3042 if (size == 3 && opc == 2) {
3043 /* PRFM - prefetch */
3044 return;
3046 if (opc == 3 && size > 1) {
3047 unallocated_encoding(s);
3048 return;
3050 is_store = (opc == 0);
3051 is_signed = extract32(opc, 1, 1);
3052 is_extended = (size < 3) && extract32(opc, 0, 1);
3055 if (rn == 31) {
3056 gen_check_sp_alignment(s);
3058 dirty_addr = read_cpu_reg_sp(s, rn, 1);
3059 offset = imm12 << size;
3060 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3061 clean_addr = clean_data_tbi(s, dirty_addr);
3063 if (is_vector) {
3064 if (is_store) {
3065 do_fp_st(s, rt, clean_addr, size);
3066 } else {
3067 do_fp_ld(s, rt, clean_addr, size);
3069 } else {
3070 TCGv_i64 tcg_rt = cpu_reg(s, rt);
3071 bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3072 if (is_store) {
3073 do_gpr_st(s, tcg_rt, clean_addr, size,
3074 true, rt, iss_sf, false);
3075 } else {
3076 do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, is_extended,
3077 true, rt, iss_sf, false);
3082 /* Atomic memory operations
3084 * 31 30 27 26 24 22 21 16 15 12 10 5 0
3085 * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+
3086 * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn | Rt |
3087 * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+
3089 * Rt: the result register
3090 * Rn: base address or SP
3091 * Rs: the source register for the operation
3092 * V: vector flag (always 0 as of v8.3)
3093 * A: acquire flag
3094 * R: release flag
3096 static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
3097 int size, int rt, bool is_vector)
3099 int rs = extract32(insn, 16, 5);
3100 int rn = extract32(insn, 5, 5);
3101 int o3_opc = extract32(insn, 12, 4);
3102 bool r = extract32(insn, 22, 1);
3103 bool a = extract32(insn, 23, 1);
3104 TCGv_i64 tcg_rs, clean_addr;
3105 AtomicThreeOpFn *fn;
3107 if (is_vector || !dc_isar_feature(aa64_atomics, s)) {
3108 unallocated_encoding(s);
3109 return;
3111 switch (o3_opc) {
3112 case 000: /* LDADD */
3113 fn = tcg_gen_atomic_fetch_add_i64;
3114 break;
3115 case 001: /* LDCLR */
3116 fn = tcg_gen_atomic_fetch_and_i64;
3117 break;
3118 case 002: /* LDEOR */
3119 fn = tcg_gen_atomic_fetch_xor_i64;
3120 break;
3121 case 003: /* LDSET */
3122 fn = tcg_gen_atomic_fetch_or_i64;
3123 break;
3124 case 004: /* LDSMAX */
3125 fn = tcg_gen_atomic_fetch_smax_i64;
3126 break;
3127 case 005: /* LDSMIN */
3128 fn = tcg_gen_atomic_fetch_smin_i64;
3129 break;
3130 case 006: /* LDUMAX */
3131 fn = tcg_gen_atomic_fetch_umax_i64;
3132 break;
3133 case 007: /* LDUMIN */
3134 fn = tcg_gen_atomic_fetch_umin_i64;
3135 break;
3136 case 010: /* SWP */
3137 fn = tcg_gen_atomic_xchg_i64;
3138 break;
3139 case 014: /* LDAPR, LDAPRH, LDAPRB */
3140 if (!dc_isar_feature(aa64_rcpc_8_3, s) ||
3141 rs != 31 || a != 1 || r != 0) {
3142 unallocated_encoding(s);
3143 return;
3145 break;
3146 default:
3147 unallocated_encoding(s);
3148 return;
3151 if (rn == 31) {
3152 gen_check_sp_alignment(s);
3154 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
3156 if (o3_opc == 014) {
3158 * LDAPR* are a special case because they are a simple load, not a
3159 * fetch-and-do-something op.
3160 * The architectural consistency requirements here are weaker than
3161 * full load-acquire (we only need "load-acquire processor consistent"),
3162 * but we choose to implement them as full LDAQ.
3164 do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false,
3165 true, rt, disas_ldst_compute_iss_sf(size, false, 0), true);
3166 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3167 return;
3170 tcg_rs = read_cpu_reg(s, rs, true);
3172 if (o3_opc == 1) { /* LDCLR */
3173 tcg_gen_not_i64(tcg_rs, tcg_rs);
3176 /* The tcg atomic primitives are all full barriers. Therefore we
3177 * can ignore the Acquire and Release bits of this instruction.
3179 fn(cpu_reg(s, rt), clean_addr, tcg_rs, get_mem_index(s),
3180 s->be_data | size | MO_ALIGN);
3184 * PAC memory operations
3186 * 31 30 27 26 24 22 21 12 11 10 5 0
3187 * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3188 * | size | 1 1 1 | V | 0 0 | M S | 1 | imm9 | W | 1 | Rn | Rt |
3189 * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3191 * Rt: the result register
3192 * Rn: base address or SP
3193 * V: vector flag (always 0 as of v8.3)
3194 * M: clear for key DA, set for key DB
3195 * W: pre-indexing flag
3196 * S: sign for imm9.
3198 static void disas_ldst_pac(DisasContext *s, uint32_t insn,
3199 int size, int rt, bool is_vector)
3201 int rn = extract32(insn, 5, 5);
3202 bool is_wback = extract32(insn, 11, 1);
3203 bool use_key_a = !extract32(insn, 23, 1);
3204 int offset;
3205 TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3207 if (size != 3 || is_vector || !dc_isar_feature(aa64_pauth, s)) {
3208 unallocated_encoding(s);
3209 return;
3212 if (rn == 31) {
3213 gen_check_sp_alignment(s);
3215 dirty_addr = read_cpu_reg_sp(s, rn, 1);
3217 if (s->pauth_active) {
3218 if (use_key_a) {
3219 gen_helper_autda(dirty_addr, cpu_env, dirty_addr, cpu_X[31]);
3220 } else {
3221 gen_helper_autdb(dirty_addr, cpu_env, dirty_addr, cpu_X[31]);
3225 /* Form the 10-bit signed, scaled offset. */
3226 offset = (extract32(insn, 22, 1) << 9) | extract32(insn, 12, 9);
3227 offset = sextract32(offset << size, 0, 10 + size);
3228 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3230 /* Note that "clean" and "dirty" here refer to TBI not PAC. */
3231 clean_addr = clean_data_tbi(s, dirty_addr);
3233 tcg_rt = cpu_reg(s, rt);
3234 do_gpr_ld(s, tcg_rt, clean_addr, size, /* is_signed */ false,
3235 /* extend */ false, /* iss_valid */ !is_wback,
3236 /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false);
3238 if (is_wback) {
3239 tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
3244 * LDAPR/STLR (unscaled immediate)
3246 * 31 30 24 22 21 12 10 5 0
3247 * +------+-------------+-----+---+--------+-----+----+-----+
3248 * | size | 0 1 1 0 0 1 | opc | 0 | imm9 | 0 0 | Rn | Rt |
3249 * +------+-------------+-----+---+--------+-----+----+-----+
3251 * Rt: source or destination register
3252 * Rn: base register
3253 * imm9: unscaled immediate offset
3254 * opc: 00: STLUR*, 01/10/11: various LDAPUR*
3255 * size: size of load/store
3257 static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn)
3259 int rt = extract32(insn, 0, 5);
3260 int rn = extract32(insn, 5, 5);
3261 int offset = sextract32(insn, 12, 9);
3262 int opc = extract32(insn, 22, 2);
3263 int size = extract32(insn, 30, 2);
3264 TCGv_i64 clean_addr, dirty_addr;
3265 bool is_store = false;
3266 bool is_signed = false;
3267 bool extend = false;
3268 bool iss_sf;
3270 if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3271 unallocated_encoding(s);
3272 return;
3275 switch (opc) {
3276 case 0: /* STLURB */
3277 is_store = true;
3278 break;
3279 case 1: /* LDAPUR* */
3280 break;
3281 case 2: /* LDAPURS* 64-bit variant */
3282 if (size == 3) {
3283 unallocated_encoding(s);
3284 return;
3286 is_signed = true;
3287 break;
3288 case 3: /* LDAPURS* 32-bit variant */
3289 if (size > 1) {
3290 unallocated_encoding(s);
3291 return;
3293 is_signed = true;
3294 extend = true; /* zero-extend 32->64 after signed load */
3295 break;
3296 default:
3297 g_assert_not_reached();
3300 iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3302 if (rn == 31) {
3303 gen_check_sp_alignment(s);
3306 dirty_addr = read_cpu_reg_sp(s, rn, 1);
3307 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3308 clean_addr = clean_data_tbi(s, dirty_addr);
3310 if (is_store) {
3311 /* Store-Release semantics */
3312 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3313 do_gpr_st(s, cpu_reg(s, rt), clean_addr, size, true, rt, iss_sf, true);
3314 } else {
3316 * Load-AcquirePC semantics; we implement as the slightly more
3317 * restrictive Load-Acquire.
3319 do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, is_signed, extend,
3320 true, rt, iss_sf, true);
3321 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3325 /* Load/store register (all forms) */
3326 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
3328 int rt = extract32(insn, 0, 5);
3329 int opc = extract32(insn, 22, 2);
3330 bool is_vector = extract32(insn, 26, 1);
3331 int size = extract32(insn, 30, 2);
3333 switch (extract32(insn, 24, 2)) {
3334 case 0:
3335 if (extract32(insn, 21, 1) == 0) {
3336 /* Load/store register (unscaled immediate)
3337 * Load/store immediate pre/post-indexed
3338 * Load/store register unprivileged
3340 disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
3341 return;
3343 switch (extract32(insn, 10, 2)) {
3344 case 0:
3345 disas_ldst_atomic(s, insn, size, rt, is_vector);
3346 return;
3347 case 2:
3348 disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
3349 return;
3350 default:
3351 disas_ldst_pac(s, insn, size, rt, is_vector);
3352 return;
3354 break;
3355 case 1:
3356 disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
3357 return;
3359 unallocated_encoding(s);
3362 /* AdvSIMD load/store multiple structures
3364 * 31 30 29 23 22 21 16 15 12 11 10 9 5 4 0
3365 * +---+---+---------------+---+-------------+--------+------+------+------+
3366 * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size | Rn | Rt |
3367 * +---+---+---------------+---+-------------+--------+------+------+------+
3369 * AdvSIMD load/store multiple structures (post-indexed)
3371 * 31 30 29 23 22 21 20 16 15 12 11 10 9 5 4 0
3372 * +---+---+---------------+---+---+---------+--------+------+------+------+
3373 * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 | Rm | opcode | size | Rn | Rt |
3374 * +---+---+---------------+---+---+---------+--------+------+------+------+
3376 * Rt: first (or only) SIMD&FP register to be transferred
3377 * Rn: base address or SP
3378 * Rm (post-index only): post-index register (when !31) or size dependent #imm
3380 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
3382 int rt = extract32(insn, 0, 5);
3383 int rn = extract32(insn, 5, 5);
3384 int rm = extract32(insn, 16, 5);
3385 int size = extract32(insn, 10, 2);
3386 int opcode = extract32(insn, 12, 4);
3387 bool is_store = !extract32(insn, 22, 1);
3388 bool is_postidx = extract32(insn, 23, 1);
3389 bool is_q = extract32(insn, 30, 1);
3390 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3391 MemOp endian = s->be_data;
3393 int ebytes; /* bytes per element */
3394 int elements; /* elements per vector */
3395 int rpt; /* num iterations */
3396 int selem; /* structure elements */
3397 int r;
3399 if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
3400 unallocated_encoding(s);
3401 return;
3404 if (!is_postidx && rm != 0) {
3405 unallocated_encoding(s);
3406 return;
3409 /* From the shared decode logic */
3410 switch (opcode) {
3411 case 0x0:
3412 rpt = 1;
3413 selem = 4;
3414 break;
3415 case 0x2:
3416 rpt = 4;
3417 selem = 1;
3418 break;
3419 case 0x4:
3420 rpt = 1;
3421 selem = 3;
3422 break;
3423 case 0x6:
3424 rpt = 3;
3425 selem = 1;
3426 break;
3427 case 0x7:
3428 rpt = 1;
3429 selem = 1;
3430 break;
3431 case 0x8:
3432 rpt = 1;
3433 selem = 2;
3434 break;
3435 case 0xa:
3436 rpt = 2;
3437 selem = 1;
3438 break;
3439 default:
3440 unallocated_encoding(s);
3441 return;
3444 if (size == 3 && !is_q && selem != 1) {
3445 /* reserved */
3446 unallocated_encoding(s);
3447 return;
3450 if (!fp_access_check(s)) {
3451 return;
3454 if (rn == 31) {
3455 gen_check_sp_alignment(s);
3458 /* For our purposes, bytes are always little-endian. */
3459 if (size == 0) {
3460 endian = MO_LE;
3463 /* Consecutive little-endian elements from a single register
3464 * can be promoted to a larger little-endian operation.
3466 if (selem == 1 && endian == MO_LE) {
3467 size = 3;
3469 ebytes = 1 << size;
3470 elements = (is_q ? 16 : 8) / ebytes;
3472 tcg_rn = cpu_reg_sp(s, rn);
3473 clean_addr = clean_data_tbi(s, tcg_rn);
3474 tcg_ebytes = tcg_const_i64(ebytes);
3476 for (r = 0; r < rpt; r++) {
3477 int e;
3478 for (e = 0; e < elements; e++) {
3479 int xs;
3480 for (xs = 0; xs < selem; xs++) {
3481 int tt = (rt + r + xs) % 32;
3482 if (is_store) {
3483 do_vec_st(s, tt, e, clean_addr, size, endian);
3484 } else {
3485 do_vec_ld(s, tt, e, clean_addr, size, endian);
3487 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3491 tcg_temp_free_i64(tcg_ebytes);
3493 if (!is_store) {
3494 /* For non-quad operations, setting a slice of the low
3495 * 64 bits of the register clears the high 64 bits (in
3496 * the ARM ARM pseudocode this is implicit in the fact
3497 * that 'rval' is a 64 bit wide variable).
3498 * For quad operations, we might still need to zero the
3499 * high bits of SVE.
3501 for (r = 0; r < rpt * selem; r++) {
3502 int tt = (rt + r) % 32;
3503 clear_vec_high(s, is_q, tt);
3507 if (is_postidx) {
3508 if (rm == 31) {
3509 tcg_gen_addi_i64(tcg_rn, tcg_rn, rpt * elements * selem * ebytes);
3510 } else {
3511 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3516 /* AdvSIMD load/store single structure
3518 * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0
3519 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3520 * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size | Rn | Rt |
3521 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3523 * AdvSIMD load/store single structure (post-indexed)
3525 * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0
3526 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3527 * | 0 | Q | 0 0 1 1 0 1 1 | L R | Rm | opc | S | size | Rn | Rt |
3528 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3530 * Rt: first (or only) SIMD&FP register to be transferred
3531 * Rn: base address or SP
3532 * Rm (post-index only): post-index register (when !31) or size dependent #imm
3533 * index = encoded in Q:S:size dependent on size
3535 * lane_size = encoded in R, opc
3536 * transfer width = encoded in opc, S, size
3538 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
3540 int rt = extract32(insn, 0, 5);
3541 int rn = extract32(insn, 5, 5);
3542 int rm = extract32(insn, 16, 5);
3543 int size = extract32(insn, 10, 2);
3544 int S = extract32(insn, 12, 1);
3545 int opc = extract32(insn, 13, 3);
3546 int R = extract32(insn, 21, 1);
3547 int is_load = extract32(insn, 22, 1);
3548 int is_postidx = extract32(insn, 23, 1);
3549 int is_q = extract32(insn, 30, 1);
3551 int scale = extract32(opc, 1, 2);
3552 int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
3553 bool replicate = false;
3554 int index = is_q << 3 | S << 2 | size;
3555 int ebytes, xs;
3556 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3558 if (extract32(insn, 31, 1)) {
3559 unallocated_encoding(s);
3560 return;
3562 if (!is_postidx && rm != 0) {
3563 unallocated_encoding(s);
3564 return;
3567 switch (scale) {
3568 case 3:
3569 if (!is_load || S) {
3570 unallocated_encoding(s);
3571 return;
3573 scale = size;
3574 replicate = true;
3575 break;
3576 case 0:
3577 break;
3578 case 1:
3579 if (extract32(size, 0, 1)) {
3580 unallocated_encoding(s);
3581 return;
3583 index >>= 1;
3584 break;
3585 case 2:
3586 if (extract32(size, 1, 1)) {
3587 unallocated_encoding(s);
3588 return;
3590 if (!extract32(size, 0, 1)) {
3591 index >>= 2;
3592 } else {
3593 if (S) {
3594 unallocated_encoding(s);
3595 return;
3597 index >>= 3;
3598 scale = 3;
3600 break;
3601 default:
3602 g_assert_not_reached();
3605 if (!fp_access_check(s)) {
3606 return;
3609 ebytes = 1 << scale;
3611 if (rn == 31) {
3612 gen_check_sp_alignment(s);
3615 tcg_rn = cpu_reg_sp(s, rn);
3616 clean_addr = clean_data_tbi(s, tcg_rn);
3617 tcg_ebytes = tcg_const_i64(ebytes);
3619 for (xs = 0; xs < selem; xs++) {
3620 if (replicate) {
3621 /* Load and replicate to all elements */
3622 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3624 tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr,
3625 get_mem_index(s), s->be_data + scale);
3626 tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt),
3627 (is_q + 1) * 8, vec_full_reg_size(s),
3628 tcg_tmp);
3629 tcg_temp_free_i64(tcg_tmp);
3630 } else {
3631 /* Load/store one element per register */
3632 if (is_load) {
3633 do_vec_ld(s, rt, index, clean_addr, scale, s->be_data);
3634 } else {
3635 do_vec_st(s, rt, index, clean_addr, scale, s->be_data);
3638 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3639 rt = (rt + 1) % 32;
3641 tcg_temp_free_i64(tcg_ebytes);
3643 if (is_postidx) {
3644 if (rm == 31) {
3645 tcg_gen_addi_i64(tcg_rn, tcg_rn, selem * ebytes);
3646 } else {
3647 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3652 /* Loads and stores */
3653 static void disas_ldst(DisasContext *s, uint32_t insn)
3655 switch (extract32(insn, 24, 6)) {
3656 case 0x08: /* Load/store exclusive */
3657 disas_ldst_excl(s, insn);
3658 break;
3659 case 0x18: case 0x1c: /* Load register (literal) */
3660 disas_ld_lit(s, insn);
3661 break;
3662 case 0x28: case 0x29:
3663 case 0x2c: case 0x2d: /* Load/store pair (all forms) */
3664 disas_ldst_pair(s, insn);
3665 break;
3666 case 0x38: case 0x39:
3667 case 0x3c: case 0x3d: /* Load/store register (all forms) */
3668 disas_ldst_reg(s, insn);
3669 break;
3670 case 0x0c: /* AdvSIMD load/store multiple structures */
3671 disas_ldst_multiple_struct(s, insn);
3672 break;
3673 case 0x0d: /* AdvSIMD load/store single structure */
3674 disas_ldst_single_struct(s, insn);
3675 break;
3676 case 0x19: /* LDAPR/STLR (unscaled immediate) */
3677 if (extract32(insn, 10, 2) != 0 ||
3678 extract32(insn, 21, 1) != 0) {
3679 unallocated_encoding(s);
3680 break;
3682 disas_ldst_ldapr_stlr(s, insn);
3683 break;
3684 default:
3685 unallocated_encoding(s);
3686 break;
3690 /* PC-rel. addressing
3691 * 31 30 29 28 24 23 5 4 0
3692 * +----+-------+-----------+-------------------+------+
3693 * | op | immlo | 1 0 0 0 0 | immhi | Rd |
3694 * +----+-------+-----------+-------------------+------+
3696 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
3698 unsigned int page, rd;
3699 uint64_t base;
3700 uint64_t offset;
3702 page = extract32(insn, 31, 1);
3703 /* SignExtend(immhi:immlo) -> offset */
3704 offset = sextract64(insn, 5, 19);
3705 offset = offset << 2 | extract32(insn, 29, 2);
3706 rd = extract32(insn, 0, 5);
3707 base = s->pc_curr;
3709 if (page) {
3710 /* ADRP (page based) */
3711 base &= ~0xfff;
3712 offset <<= 12;
3715 tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
3719 * Add/subtract (immediate)
3721 * 31 30 29 28 24 23 22 21 10 9 5 4 0
3722 * +--+--+--+-----------+-----+-------------+-----+-----+
3723 * |sf|op| S| 1 0 0 0 1 |shift| imm12 | Rn | Rd |
3724 * +--+--+--+-----------+-----+-------------+-----+-----+
3726 * sf: 0 -> 32bit, 1 -> 64bit
3727 * op: 0 -> add , 1 -> sub
3728 * S: 1 -> set flags
3729 * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
3731 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
3733 int rd = extract32(insn, 0, 5);
3734 int rn = extract32(insn, 5, 5);
3735 uint64_t imm = extract32(insn, 10, 12);
3736 int shift = extract32(insn, 22, 2);
3737 bool setflags = extract32(insn, 29, 1);
3738 bool sub_op = extract32(insn, 30, 1);
3739 bool is_64bit = extract32(insn, 31, 1);
3741 TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
3742 TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
3743 TCGv_i64 tcg_result;
3745 switch (shift) {
3746 case 0x0:
3747 break;
3748 case 0x1:
3749 imm <<= 12;
3750 break;
3751 default:
3752 unallocated_encoding(s);
3753 return;
3756 tcg_result = tcg_temp_new_i64();
3757 if (!setflags) {
3758 if (sub_op) {
3759 tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
3760 } else {
3761 tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
3763 } else {
3764 TCGv_i64 tcg_imm = tcg_const_i64(imm);
3765 if (sub_op) {
3766 gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
3767 } else {
3768 gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
3770 tcg_temp_free_i64(tcg_imm);
3773 if (is_64bit) {
3774 tcg_gen_mov_i64(tcg_rd, tcg_result);
3775 } else {
3776 tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3779 tcg_temp_free_i64(tcg_result);
3782 /* The input should be a value in the bottom e bits (with higher
3783 * bits zero); returns that value replicated into every element
3784 * of size e in a 64 bit integer.
3786 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
3788 assert(e != 0);
3789 while (e < 64) {
3790 mask |= mask << e;
3791 e *= 2;
3793 return mask;
3796 /* Return a value with the bottom len bits set (where 0 < len <= 64) */
3797 static inline uint64_t bitmask64(unsigned int length)
3799 assert(length > 0 && length <= 64);
3800 return ~0ULL >> (64 - length);
3803 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we
3804 * only require the wmask. Returns false if the imms/immr/immn are a reserved
3805 * value (ie should cause a guest UNDEF exception), and true if they are
3806 * valid, in which case the decoded bit pattern is written to result.
3808 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
3809 unsigned int imms, unsigned int immr)
3811 uint64_t mask;
3812 unsigned e, levels, s, r;
3813 int len;
3815 assert(immn < 2 && imms < 64 && immr < 64);
3817 /* The bit patterns we create here are 64 bit patterns which
3818 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
3819 * 64 bits each. Each element contains the same value: a run
3820 * of between 1 and e-1 non-zero bits, rotated within the
3821 * element by between 0 and e-1 bits.
3823 * The element size and run length are encoded into immn (1 bit)
3824 * and imms (6 bits) as follows:
3825 * 64 bit elements: immn = 1, imms = <length of run - 1>
3826 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
3827 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
3828 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1>
3829 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
3830 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
3831 * Notice that immn = 0, imms = 11111x is the only combination
3832 * not covered by one of the above options; this is reserved.
3833 * Further, <length of run - 1> all-ones is a reserved pattern.
3835 * In all cases the rotation is by immr % e (and immr is 6 bits).
3838 /* First determine the element size */
3839 len = 31 - clz32((immn << 6) | (~imms & 0x3f));
3840 if (len < 1) {
3841 /* This is the immn == 0, imms == 0x11111x case */
3842 return false;
3844 e = 1 << len;
3846 levels = e - 1;
3847 s = imms & levels;
3848 r = immr & levels;
3850 if (s == levels) {
3851 /* <length of run - 1> mustn't be all-ones. */
3852 return false;
3855 /* Create the value of one element: s+1 set bits rotated
3856 * by r within the element (which is e bits wide)...
3858 mask = bitmask64(s + 1);
3859 if (r) {
3860 mask = (mask >> r) | (mask << (e - r));
3861 mask &= bitmask64(e);
3863 /* ...then replicate the element over the whole 64 bit value */
3864 mask = bitfield_replicate(mask, e);
3865 *result = mask;
3866 return true;
3869 /* Logical (immediate)
3870 * 31 30 29 28 23 22 21 16 15 10 9 5 4 0
3871 * +----+-----+-------------+---+------+------+------+------+
3872 * | sf | opc | 1 0 0 1 0 0 | N | immr | imms | Rn | Rd |
3873 * +----+-----+-------------+---+------+------+------+------+
3875 static void disas_logic_imm(DisasContext *s, uint32_t insn)
3877 unsigned int sf, opc, is_n, immr, imms, rn, rd;
3878 TCGv_i64 tcg_rd, tcg_rn;
3879 uint64_t wmask;
3880 bool is_and = false;
3882 sf = extract32(insn, 31, 1);
3883 opc = extract32(insn, 29, 2);
3884 is_n = extract32(insn, 22, 1);
3885 immr = extract32(insn, 16, 6);
3886 imms = extract32(insn, 10, 6);
3887 rn = extract32(insn, 5, 5);
3888 rd = extract32(insn, 0, 5);
3890 if (!sf && is_n) {
3891 unallocated_encoding(s);
3892 return;
3895 if (opc == 0x3) { /* ANDS */
3896 tcg_rd = cpu_reg(s, rd);
3897 } else {
3898 tcg_rd = cpu_reg_sp(s, rd);
3900 tcg_rn = cpu_reg(s, rn);
3902 if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
3903 /* some immediate field values are reserved */
3904 unallocated_encoding(s);
3905 return;
3908 if (!sf) {
3909 wmask &= 0xffffffff;
3912 switch (opc) {
3913 case 0x3: /* ANDS */
3914 case 0x0: /* AND */
3915 tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
3916 is_and = true;
3917 break;
3918 case 0x1: /* ORR */
3919 tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
3920 break;
3921 case 0x2: /* EOR */
3922 tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
3923 break;
3924 default:
3925 assert(FALSE); /* must handle all above */
3926 break;
3929 if (!sf && !is_and) {
3930 /* zero extend final result; we know we can skip this for AND
3931 * since the immediate had the high 32 bits clear.
3933 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3936 if (opc == 3) { /* ANDS */
3937 gen_logic_CC(sf, tcg_rd);
3942 * Move wide (immediate)
3944 * 31 30 29 28 23 22 21 20 5 4 0
3945 * +--+-----+-------------+-----+----------------+------+
3946 * |sf| opc | 1 0 0 1 0 1 | hw | imm16 | Rd |
3947 * +--+-----+-------------+-----+----------------+------+
3949 * sf: 0 -> 32 bit, 1 -> 64 bit
3950 * opc: 00 -> N, 10 -> Z, 11 -> K
3951 * hw: shift/16 (0,16, and sf only 32, 48)
3953 static void disas_movw_imm(DisasContext *s, uint32_t insn)
3955 int rd = extract32(insn, 0, 5);
3956 uint64_t imm = extract32(insn, 5, 16);
3957 int sf = extract32(insn, 31, 1);
3958 int opc = extract32(insn, 29, 2);
3959 int pos = extract32(insn, 21, 2) << 4;
3960 TCGv_i64 tcg_rd = cpu_reg(s, rd);
3961 TCGv_i64 tcg_imm;
3963 if (!sf && (pos >= 32)) {
3964 unallocated_encoding(s);
3965 return;
3968 switch (opc) {
3969 case 0: /* MOVN */
3970 case 2: /* MOVZ */
3971 imm <<= pos;
3972 if (opc == 0) {
3973 imm = ~imm;
3975 if (!sf) {
3976 imm &= 0xffffffffu;
3978 tcg_gen_movi_i64(tcg_rd, imm);
3979 break;
3980 case 3: /* MOVK */
3981 tcg_imm = tcg_const_i64(imm);
3982 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
3983 tcg_temp_free_i64(tcg_imm);
3984 if (!sf) {
3985 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3987 break;
3988 default:
3989 unallocated_encoding(s);
3990 break;
3994 /* Bitfield
3995 * 31 30 29 28 23 22 21 16 15 10 9 5 4 0
3996 * +----+-----+-------------+---+------+------+------+------+
3997 * | sf | opc | 1 0 0 1 1 0 | N | immr | imms | Rn | Rd |
3998 * +----+-----+-------------+---+------+------+------+------+
4000 static void disas_bitfield(DisasContext *s, uint32_t insn)
4002 unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
4003 TCGv_i64 tcg_rd, tcg_tmp;
4005 sf = extract32(insn, 31, 1);
4006 opc = extract32(insn, 29, 2);
4007 n = extract32(insn, 22, 1);
4008 ri = extract32(insn, 16, 6);
4009 si = extract32(insn, 10, 6);
4010 rn = extract32(insn, 5, 5);
4011 rd = extract32(insn, 0, 5);
4012 bitsize = sf ? 64 : 32;
4014 if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
4015 unallocated_encoding(s);
4016 return;
4019 tcg_rd = cpu_reg(s, rd);
4021 /* Suppress the zero-extend for !sf. Since RI and SI are constrained
4022 to be smaller than bitsize, we'll never reference data outside the
4023 low 32-bits anyway. */
4024 tcg_tmp = read_cpu_reg(s, rn, 1);
4026 /* Recognize simple(r) extractions. */
4027 if (si >= ri) {
4028 /* Wd<s-r:0> = Wn<s:r> */
4029 len = (si - ri) + 1;
4030 if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */
4031 tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4032 goto done;
4033 } else if (opc == 2) { /* UBFM: UBFX, LSR, UXTB, UXTH */
4034 tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4035 return;
4037 /* opc == 1, BFXIL fall through to deposit */
4038 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4039 pos = 0;
4040 } else {
4041 /* Handle the ri > si case with a deposit
4042 * Wd<32+s-r,32-r> = Wn<s:0>
4044 len = si + 1;
4045 pos = (bitsize - ri) & (bitsize - 1);
4048 if (opc == 0 && len < ri) {
4049 /* SBFM: sign extend the destination field from len to fill
4050 the balance of the word. Let the deposit below insert all
4051 of those sign bits. */
4052 tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4053 len = ri;
4056 if (opc == 1) { /* BFM, BFXIL */
4057 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4058 } else {
4059 /* SBFM or UBFM: We start with zero, and we haven't modified
4060 any bits outside bitsize, therefore the zero-extension
4061 below is unneeded. */
4062 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4063 return;
4066 done:
4067 if (!sf) { /* zero extend final result */
4068 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4072 /* Extract
4073 * 31 30 29 28 23 22 21 20 16 15 10 9 5 4 0
4074 * +----+------+-------------+---+----+------+--------+------+------+
4075 * | sf | op21 | 1 0 0 1 1 1 | N | o0 | Rm | imms | Rn | Rd |
4076 * +----+------+-------------+---+----+------+--------+------+------+
4078 static void disas_extract(DisasContext *s, uint32_t insn)
4080 unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
4082 sf = extract32(insn, 31, 1);
4083 n = extract32(insn, 22, 1);
4084 rm = extract32(insn, 16, 5);
4085 imm = extract32(insn, 10, 6);
4086 rn = extract32(insn, 5, 5);
4087 rd = extract32(insn, 0, 5);
4088 op21 = extract32(insn, 29, 2);
4089 op0 = extract32(insn, 21, 1);
4090 bitsize = sf ? 64 : 32;
4092 if (sf != n || op21 || op0 || imm >= bitsize) {
4093 unallocated_encoding(s);
4094 } else {
4095 TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4097 tcg_rd = cpu_reg(s, rd);
4099 if (unlikely(imm == 0)) {
4100 /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4101 * so an extract from bit 0 is a special case.
4103 if (sf) {
4104 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
4105 } else {
4106 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
4108 } else {
4109 tcg_rm = cpu_reg(s, rm);
4110 tcg_rn = cpu_reg(s, rn);
4112 if (sf) {
4113 /* Specialization to ROR happens in EXTRACT2. */
4114 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, imm);
4115 } else {
4116 TCGv_i32 t0 = tcg_temp_new_i32();
4118 tcg_gen_extrl_i64_i32(t0, tcg_rm);
4119 if (rm == rn) {
4120 tcg_gen_rotri_i32(t0, t0, imm);
4121 } else {
4122 TCGv_i32 t1 = tcg_temp_new_i32();
4123 tcg_gen_extrl_i64_i32(t1, tcg_rn);
4124 tcg_gen_extract2_i32(t0, t0, t1, imm);
4125 tcg_temp_free_i32(t1);
4127 tcg_gen_extu_i32_i64(tcg_rd, t0);
4128 tcg_temp_free_i32(t0);
4134 /* Data processing - immediate */
4135 static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
4137 switch (extract32(insn, 23, 6)) {
4138 case 0x20: case 0x21: /* PC-rel. addressing */
4139 disas_pc_rel_adr(s, insn);
4140 break;
4141 case 0x22: case 0x23: /* Add/subtract (immediate) */
4142 disas_add_sub_imm(s, insn);
4143 break;
4144 case 0x24: /* Logical (immediate) */
4145 disas_logic_imm(s, insn);
4146 break;
4147 case 0x25: /* Move wide (immediate) */
4148 disas_movw_imm(s, insn);
4149 break;
4150 case 0x26: /* Bitfield */
4151 disas_bitfield(s, insn);
4152 break;
4153 case 0x27: /* Extract */
4154 disas_extract(s, insn);
4155 break;
4156 default:
4157 unallocated_encoding(s);
4158 break;
4162 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
4163 * Note that it is the caller's responsibility to ensure that the
4164 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
4165 * mandated semantics for out of range shifts.
4167 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
4168 enum a64_shift_type shift_type, TCGv_i64 shift_amount)
4170 switch (shift_type) {
4171 case A64_SHIFT_TYPE_LSL:
4172 tcg_gen_shl_i64(dst, src, shift_amount);
4173 break;
4174 case A64_SHIFT_TYPE_LSR:
4175 tcg_gen_shr_i64(dst, src, shift_amount);
4176 break;
4177 case A64_SHIFT_TYPE_ASR:
4178 if (!sf) {
4179 tcg_gen_ext32s_i64(dst, src);
4181 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
4182 break;
4183 case A64_SHIFT_TYPE_ROR:
4184 if (sf) {
4185 tcg_gen_rotr_i64(dst, src, shift_amount);
4186 } else {
4187 TCGv_i32 t0, t1;
4188 t0 = tcg_temp_new_i32();
4189 t1 = tcg_temp_new_i32();
4190 tcg_gen_extrl_i64_i32(t0, src);
4191 tcg_gen_extrl_i64_i32(t1, shift_amount);
4192 tcg_gen_rotr_i32(t0, t0, t1);
4193 tcg_gen_extu_i32_i64(dst, t0);
4194 tcg_temp_free_i32(t0);
4195 tcg_temp_free_i32(t1);
4197 break;
4198 default:
4199 assert(FALSE); /* all shift types should be handled */
4200 break;
4203 if (!sf) { /* zero extend final result */
4204 tcg_gen_ext32u_i64(dst, dst);
4208 /* Shift a TCGv src by immediate, put result in dst.
4209 * The shift amount must be in range (this should always be true as the
4210 * relevant instructions will UNDEF on bad shift immediates).
4212 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
4213 enum a64_shift_type shift_type, unsigned int shift_i)
4215 assert(shift_i < (sf ? 64 : 32));
4217 if (shift_i == 0) {
4218 tcg_gen_mov_i64(dst, src);
4219 } else {
4220 TCGv_i64 shift_const;
4222 shift_const = tcg_const_i64(shift_i);
4223 shift_reg(dst, src, sf, shift_type, shift_const);
4224 tcg_temp_free_i64(shift_const);
4228 /* Logical (shifted register)
4229 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0
4230 * +----+-----+-----------+-------+---+------+--------+------+------+
4231 * | sf | opc | 0 1 0 1 0 | shift | N | Rm | imm6 | Rn | Rd |
4232 * +----+-----+-----------+-------+---+------+--------+------+------+
4234 static void disas_logic_reg(DisasContext *s, uint32_t insn)
4236 TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
4237 unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
4239 sf = extract32(insn, 31, 1);
4240 opc = extract32(insn, 29, 2);
4241 shift_type = extract32(insn, 22, 2);
4242 invert = extract32(insn, 21, 1);
4243 rm = extract32(insn, 16, 5);
4244 shift_amount = extract32(insn, 10, 6);
4245 rn = extract32(insn, 5, 5);
4246 rd = extract32(insn, 0, 5);
4248 if (!sf && (shift_amount & (1 << 5))) {
4249 unallocated_encoding(s);
4250 return;
4253 tcg_rd = cpu_reg(s, rd);
4255 if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
4256 /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
4257 * register-register MOV and MVN, so it is worth special casing.
4259 tcg_rm = cpu_reg(s, rm);
4260 if (invert) {
4261 tcg_gen_not_i64(tcg_rd, tcg_rm);
4262 if (!sf) {
4263 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4265 } else {
4266 if (sf) {
4267 tcg_gen_mov_i64(tcg_rd, tcg_rm);
4268 } else {
4269 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
4272 return;
4275 tcg_rm = read_cpu_reg(s, rm, sf);
4277 if (shift_amount) {
4278 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
4281 tcg_rn = cpu_reg(s, rn);
4283 switch (opc | (invert << 2)) {
4284 case 0: /* AND */
4285 case 3: /* ANDS */
4286 tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
4287 break;
4288 case 1: /* ORR */
4289 tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
4290 break;
4291 case 2: /* EOR */
4292 tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
4293 break;
4294 case 4: /* BIC */
4295 case 7: /* BICS */
4296 tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
4297 break;
4298 case 5: /* ORN */
4299 tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
4300 break;
4301 case 6: /* EON */
4302 tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
4303 break;
4304 default:
4305 assert(FALSE);
4306 break;
4309 if (!sf) {
4310 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4313 if (opc == 3) {
4314 gen_logic_CC(sf, tcg_rd);
4319 * Add/subtract (extended register)
4321 * 31|30|29|28 24|23 22|21|20 16|15 13|12 10|9 5|4 0|
4322 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4323 * |sf|op| S| 0 1 0 1 1 | opt | 1| Rm |option| imm3 | Rn | Rd |
4324 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4326 * sf: 0 -> 32bit, 1 -> 64bit
4327 * op: 0 -> add , 1 -> sub
4328 * S: 1 -> set flags
4329 * opt: 00
4330 * option: extension type (see DecodeRegExtend)
4331 * imm3: optional shift to Rm
4333 * Rd = Rn + LSL(extend(Rm), amount)
4335 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
4337 int rd = extract32(insn, 0, 5);
4338 int rn = extract32(insn, 5, 5);
4339 int imm3 = extract32(insn, 10, 3);
4340 int option = extract32(insn, 13, 3);
4341 int rm = extract32(insn, 16, 5);
4342 int opt = extract32(insn, 22, 2);
4343 bool setflags = extract32(insn, 29, 1);
4344 bool sub_op = extract32(insn, 30, 1);
4345 bool sf = extract32(insn, 31, 1);
4347 TCGv_i64 tcg_rm, tcg_rn; /* temps */
4348 TCGv_i64 tcg_rd;
4349 TCGv_i64 tcg_result;
4351 if (imm3 > 4 || opt != 0) {
4352 unallocated_encoding(s);
4353 return;
4356 /* non-flag setting ops may use SP */
4357 if (!setflags) {
4358 tcg_rd = cpu_reg_sp(s, rd);
4359 } else {
4360 tcg_rd = cpu_reg(s, rd);
4362 tcg_rn = read_cpu_reg_sp(s, rn, sf);
4364 tcg_rm = read_cpu_reg(s, rm, sf);
4365 ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
4367 tcg_result = tcg_temp_new_i64();
4369 if (!setflags) {
4370 if (sub_op) {
4371 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4372 } else {
4373 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4375 } else {
4376 if (sub_op) {
4377 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4378 } else {
4379 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4383 if (sf) {
4384 tcg_gen_mov_i64(tcg_rd, tcg_result);
4385 } else {
4386 tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4389 tcg_temp_free_i64(tcg_result);
4393 * Add/subtract (shifted register)
4395 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0
4396 * +--+--+--+-----------+-----+--+-------+---------+------+------+
4397 * |sf|op| S| 0 1 0 1 1 |shift| 0| Rm | imm6 | Rn | Rd |
4398 * +--+--+--+-----------+-----+--+-------+---------+------+------+
4400 * sf: 0 -> 32bit, 1 -> 64bit
4401 * op: 0 -> add , 1 -> sub
4402 * S: 1 -> set flags
4403 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
4404 * imm6: Shift amount to apply to Rm before the add/sub
4406 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
4408 int rd = extract32(insn, 0, 5);
4409 int rn = extract32(insn, 5, 5);
4410 int imm6 = extract32(insn, 10, 6);
4411 int rm = extract32(insn, 16, 5);
4412 int shift_type = extract32(insn, 22, 2);
4413 bool setflags = extract32(insn, 29, 1);
4414 bool sub_op = extract32(insn, 30, 1);
4415 bool sf = extract32(insn, 31, 1);
4417 TCGv_i64 tcg_rd = cpu_reg(s, rd);
4418 TCGv_i64 tcg_rn, tcg_rm;
4419 TCGv_i64 tcg_result;
4421 if ((shift_type == 3) || (!sf && (imm6 > 31))) {
4422 unallocated_encoding(s);
4423 return;
4426 tcg_rn = read_cpu_reg(s, rn, sf);
4427 tcg_rm = read_cpu_reg(s, rm, sf);
4429 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
4431 tcg_result = tcg_temp_new_i64();
4433 if (!setflags) {
4434 if (sub_op) {
4435 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4436 } else {
4437 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4439 } else {
4440 if (sub_op) {
4441 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4442 } else {
4443 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4447 if (sf) {
4448 tcg_gen_mov_i64(tcg_rd, tcg_result);
4449 } else {
4450 tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4453 tcg_temp_free_i64(tcg_result);
4456 /* Data-processing (3 source)
4458 * 31 30 29 28 24 23 21 20 16 15 14 10 9 5 4 0
4459 * +--+------+-----------+------+------+----+------+------+------+
4460 * |sf| op54 | 1 1 0 1 1 | op31 | Rm | o0 | Ra | Rn | Rd |
4461 * +--+------+-----------+------+------+----+------+------+------+
4463 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
4465 int rd = extract32(insn, 0, 5);
4466 int rn = extract32(insn, 5, 5);
4467 int ra = extract32(insn, 10, 5);
4468 int rm = extract32(insn, 16, 5);
4469 int op_id = (extract32(insn, 29, 3) << 4) |
4470 (extract32(insn, 21, 3) << 1) |
4471 extract32(insn, 15, 1);
4472 bool sf = extract32(insn, 31, 1);
4473 bool is_sub = extract32(op_id, 0, 1);
4474 bool is_high = extract32(op_id, 2, 1);
4475 bool is_signed = false;
4476 TCGv_i64 tcg_op1;
4477 TCGv_i64 tcg_op2;
4478 TCGv_i64 tcg_tmp;
4480 /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
4481 switch (op_id) {
4482 case 0x42: /* SMADDL */
4483 case 0x43: /* SMSUBL */
4484 case 0x44: /* SMULH */
4485 is_signed = true;
4486 break;
4487 case 0x0: /* MADD (32bit) */
4488 case 0x1: /* MSUB (32bit) */
4489 case 0x40: /* MADD (64bit) */
4490 case 0x41: /* MSUB (64bit) */
4491 case 0x4a: /* UMADDL */
4492 case 0x4b: /* UMSUBL */
4493 case 0x4c: /* UMULH */
4494 break;
4495 default:
4496 unallocated_encoding(s);
4497 return;
4500 if (is_high) {
4501 TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
4502 TCGv_i64 tcg_rd = cpu_reg(s, rd);
4503 TCGv_i64 tcg_rn = cpu_reg(s, rn);
4504 TCGv_i64 tcg_rm = cpu_reg(s, rm);
4506 if (is_signed) {
4507 tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4508 } else {
4509 tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4512 tcg_temp_free_i64(low_bits);
4513 return;
4516 tcg_op1 = tcg_temp_new_i64();
4517 tcg_op2 = tcg_temp_new_i64();
4518 tcg_tmp = tcg_temp_new_i64();
4520 if (op_id < 0x42) {
4521 tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
4522 tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
4523 } else {
4524 if (is_signed) {
4525 tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
4526 tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
4527 } else {
4528 tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
4529 tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
4533 if (ra == 31 && !is_sub) {
4534 /* Special-case MADD with rA == XZR; it is the standard MUL alias */
4535 tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
4536 } else {
4537 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
4538 if (is_sub) {
4539 tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4540 } else {
4541 tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4545 if (!sf) {
4546 tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
4549 tcg_temp_free_i64(tcg_op1);
4550 tcg_temp_free_i64(tcg_op2);
4551 tcg_temp_free_i64(tcg_tmp);
4554 /* Add/subtract (with carry)
4555 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 10 9 5 4 0
4556 * +--+--+--+------------------------+------+-------------+------+-----+
4557 * |sf|op| S| 1 1 0 1 0 0 0 0 | rm | 0 0 0 0 0 0 | Rn | Rd |
4558 * +--+--+--+------------------------+------+-------------+------+-----+
4561 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
4563 unsigned int sf, op, setflags, rm, rn, rd;
4564 TCGv_i64 tcg_y, tcg_rn, tcg_rd;
4566 sf = extract32(insn, 31, 1);
4567 op = extract32(insn, 30, 1);
4568 setflags = extract32(insn, 29, 1);
4569 rm = extract32(insn, 16, 5);
4570 rn = extract32(insn, 5, 5);
4571 rd = extract32(insn, 0, 5);
4573 tcg_rd = cpu_reg(s, rd);
4574 tcg_rn = cpu_reg(s, rn);
4576 if (op) {
4577 tcg_y = new_tmp_a64(s);
4578 tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
4579 } else {
4580 tcg_y = cpu_reg(s, rm);
4583 if (setflags) {
4584 gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
4585 } else {
4586 gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
4591 * Rotate right into flags
4592 * 31 30 29 21 15 10 5 4 0
4593 * +--+--+--+-----------------+--------+-----------+------+--+------+
4594 * |sf|op| S| 1 1 0 1 0 0 0 0 | imm6 | 0 0 0 0 1 | Rn |o2| mask |
4595 * +--+--+--+-----------------+--------+-----------+------+--+------+
4597 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn)
4599 int mask = extract32(insn, 0, 4);
4600 int o2 = extract32(insn, 4, 1);
4601 int rn = extract32(insn, 5, 5);
4602 int imm6 = extract32(insn, 15, 6);
4603 int sf_op_s = extract32(insn, 29, 3);
4604 TCGv_i64 tcg_rn;
4605 TCGv_i32 nzcv;
4607 if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) {
4608 unallocated_encoding(s);
4609 return;
4612 tcg_rn = read_cpu_reg(s, rn, 1);
4613 tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6);
4615 nzcv = tcg_temp_new_i32();
4616 tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
4618 if (mask & 8) { /* N */
4619 tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
4621 if (mask & 4) { /* Z */
4622 tcg_gen_not_i32(cpu_ZF, nzcv);
4623 tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
4625 if (mask & 2) { /* C */
4626 tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
4628 if (mask & 1) { /* V */
4629 tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
4632 tcg_temp_free_i32(nzcv);
4636 * Evaluate into flags
4637 * 31 30 29 21 15 14 10 5 4 0
4638 * +--+--+--+-----------------+---------+----+---------+------+--+------+
4639 * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 | Rn |o3| mask |
4640 * +--+--+--+-----------------+---------+----+---------+------+--+------+
4642 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn)
4644 int o3_mask = extract32(insn, 0, 5);
4645 int rn = extract32(insn, 5, 5);
4646 int o2 = extract32(insn, 15, 6);
4647 int sz = extract32(insn, 14, 1);
4648 int sf_op_s = extract32(insn, 29, 3);
4649 TCGv_i32 tmp;
4650 int shift;
4652 if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd ||
4653 !dc_isar_feature(aa64_condm_4, s)) {
4654 unallocated_encoding(s);
4655 return;
4657 shift = sz ? 16 : 24; /* SETF16 or SETF8 */
4659 tmp = tcg_temp_new_i32();
4660 tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
4661 tcg_gen_shli_i32(cpu_NF, tmp, shift);
4662 tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
4663 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
4664 tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
4665 tcg_temp_free_i32(tmp);
4668 /* Conditional compare (immediate / register)
4669 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0
4670 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4671 * |sf|op| S| 1 1 0 1 0 0 1 0 |imm5/rm | cond |i/r |o2| Rn |o3|nzcv |
4672 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4673 * [1] y [0] [0]
4675 static void disas_cc(DisasContext *s, uint32_t insn)
4677 unsigned int sf, op, y, cond, rn, nzcv, is_imm;
4678 TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
4679 TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
4680 DisasCompare c;
4682 if (!extract32(insn, 29, 1)) {
4683 unallocated_encoding(s);
4684 return;
4686 if (insn & (1 << 10 | 1 << 4)) {
4687 unallocated_encoding(s);
4688 return;
4690 sf = extract32(insn, 31, 1);
4691 op = extract32(insn, 30, 1);
4692 is_imm = extract32(insn, 11, 1);
4693 y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
4694 cond = extract32(insn, 12, 4);
4695 rn = extract32(insn, 5, 5);
4696 nzcv = extract32(insn, 0, 4);
4698 /* Set T0 = !COND. */
4699 tcg_t0 = tcg_temp_new_i32();
4700 arm_test_cc(&c, cond);
4701 tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
4702 arm_free_cc(&c);
4704 /* Load the arguments for the new comparison. */
4705 if (is_imm) {
4706 tcg_y = new_tmp_a64(s);
4707 tcg_gen_movi_i64(tcg_y, y);
4708 } else {
4709 tcg_y = cpu_reg(s, y);
4711 tcg_rn = cpu_reg(s, rn);
4713 /* Set the flags for the new comparison. */
4714 tcg_tmp = tcg_temp_new_i64();
4715 if (op) {
4716 gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
4717 } else {
4718 gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
4720 tcg_temp_free_i64(tcg_tmp);
4722 /* If COND was false, force the flags to #nzcv. Compute two masks
4723 * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
4724 * For tcg hosts that support ANDC, we can make do with just T1.
4725 * In either case, allow the tcg optimizer to delete any unused mask.
4727 tcg_t1 = tcg_temp_new_i32();
4728 tcg_t2 = tcg_temp_new_i32();
4729 tcg_gen_neg_i32(tcg_t1, tcg_t0);
4730 tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
4732 if (nzcv & 8) { /* N */
4733 tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
4734 } else {
4735 if (TCG_TARGET_HAS_andc_i32) {
4736 tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
4737 } else {
4738 tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
4741 if (nzcv & 4) { /* Z */
4742 if (TCG_TARGET_HAS_andc_i32) {
4743 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
4744 } else {
4745 tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
4747 } else {
4748 tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
4750 if (nzcv & 2) { /* C */
4751 tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
4752 } else {
4753 if (TCG_TARGET_HAS_andc_i32) {
4754 tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
4755 } else {
4756 tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
4759 if (nzcv & 1) { /* V */
4760 tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
4761 } else {
4762 if (TCG_TARGET_HAS_andc_i32) {
4763 tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
4764 } else {
4765 tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
4768 tcg_temp_free_i32(tcg_t0);
4769 tcg_temp_free_i32(tcg_t1);
4770 tcg_temp_free_i32(tcg_t2);
4773 /* Conditional select
4774 * 31 30 29 28 21 20 16 15 12 11 10 9 5 4 0
4775 * +----+----+---+-----------------+------+------+-----+------+------+
4776 * | sf | op | S | 1 1 0 1 0 1 0 0 | Rm | cond | op2 | Rn | Rd |
4777 * +----+----+---+-----------------+------+------+-----+------+------+
4779 static void disas_cond_select(DisasContext *s, uint32_t insn)
4781 unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
4782 TCGv_i64 tcg_rd, zero;
4783 DisasCompare64 c;
4785 if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
4786 /* S == 1 or op2<1> == 1 */
4787 unallocated_encoding(s);
4788 return;
4790 sf = extract32(insn, 31, 1);
4791 else_inv = extract32(insn, 30, 1);
4792 rm = extract32(insn, 16, 5);
4793 cond = extract32(insn, 12, 4);
4794 else_inc = extract32(insn, 10, 1);
4795 rn = extract32(insn, 5, 5);
4796 rd = extract32(insn, 0, 5);
4798 tcg_rd = cpu_reg(s, rd);
4800 a64_test_cc(&c, cond);
4801 zero = tcg_const_i64(0);
4803 if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
4804 /* CSET & CSETM. */
4805 tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
4806 if (else_inv) {
4807 tcg_gen_neg_i64(tcg_rd, tcg_rd);
4809 } else {
4810 TCGv_i64 t_true = cpu_reg(s, rn);
4811 TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
4812 if (else_inv && else_inc) {
4813 tcg_gen_neg_i64(t_false, t_false);
4814 } else if (else_inv) {
4815 tcg_gen_not_i64(t_false, t_false);
4816 } else if (else_inc) {
4817 tcg_gen_addi_i64(t_false, t_false, 1);
4819 tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
4822 tcg_temp_free_i64(zero);
4823 a64_free_cc(&c);
4825 if (!sf) {
4826 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4830 static void handle_clz(DisasContext *s, unsigned int sf,
4831 unsigned int rn, unsigned int rd)
4833 TCGv_i64 tcg_rd, tcg_rn;
4834 tcg_rd = cpu_reg(s, rd);
4835 tcg_rn = cpu_reg(s, rn);
4837 if (sf) {
4838 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
4839 } else {
4840 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4841 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4842 tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
4843 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4844 tcg_temp_free_i32(tcg_tmp32);
4848 static void handle_cls(DisasContext *s, unsigned int sf,
4849 unsigned int rn, unsigned int rd)
4851 TCGv_i64 tcg_rd, tcg_rn;
4852 tcg_rd = cpu_reg(s, rd);
4853 tcg_rn = cpu_reg(s, rn);
4855 if (sf) {
4856 tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
4857 } else {
4858 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4859 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4860 tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
4861 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4862 tcg_temp_free_i32(tcg_tmp32);
4866 static void handle_rbit(DisasContext *s, unsigned int sf,
4867 unsigned int rn, unsigned int rd)
4869 TCGv_i64 tcg_rd, tcg_rn;
4870 tcg_rd = cpu_reg(s, rd);
4871 tcg_rn = cpu_reg(s, rn);
4873 if (sf) {
4874 gen_helper_rbit64(tcg_rd, tcg_rn);
4875 } else {
4876 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4877 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4878 gen_helper_rbit(tcg_tmp32, tcg_tmp32);
4879 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4880 tcg_temp_free_i32(tcg_tmp32);
4884 /* REV with sf==1, opcode==3 ("REV64") */
4885 static void handle_rev64(DisasContext *s, unsigned int sf,
4886 unsigned int rn, unsigned int rd)
4888 if (!sf) {
4889 unallocated_encoding(s);
4890 return;
4892 tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
4895 /* REV with sf==0, opcode==2
4896 * REV32 (sf==1, opcode==2)
4898 static void handle_rev32(DisasContext *s, unsigned int sf,
4899 unsigned int rn, unsigned int rd)
4901 TCGv_i64 tcg_rd = cpu_reg(s, rd);
4903 if (sf) {
4904 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4905 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4907 /* bswap32_i64 requires zero high word */
4908 tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
4909 tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
4910 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
4911 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
4912 tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
4914 tcg_temp_free_i64(tcg_tmp);
4915 } else {
4916 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
4917 tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
4921 /* REV16 (opcode==1) */
4922 static void handle_rev16(DisasContext *s, unsigned int sf,
4923 unsigned int rn, unsigned int rd)
4925 TCGv_i64 tcg_rd = cpu_reg(s, rd);
4926 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4927 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4928 TCGv_i64 mask = tcg_const_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
4930 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
4931 tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
4932 tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
4933 tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
4934 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
4936 tcg_temp_free_i64(mask);
4937 tcg_temp_free_i64(tcg_tmp);
4940 /* Data-processing (1 source)
4941 * 31 30 29 28 21 20 16 15 10 9 5 4 0
4942 * +----+---+---+-----------------+---------+--------+------+------+
4943 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode | Rn | Rd |
4944 * +----+---+---+-----------------+---------+--------+------+------+
4946 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
4948 unsigned int sf, opcode, opcode2, rn, rd;
4949 TCGv_i64 tcg_rd;
4951 if (extract32(insn, 29, 1)) {
4952 unallocated_encoding(s);
4953 return;
4956 sf = extract32(insn, 31, 1);
4957 opcode = extract32(insn, 10, 6);
4958 opcode2 = extract32(insn, 16, 5);
4959 rn = extract32(insn, 5, 5);
4960 rd = extract32(insn, 0, 5);
4962 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7))
4964 switch (MAP(sf, opcode2, opcode)) {
4965 case MAP(0, 0x00, 0x00): /* RBIT */
4966 case MAP(1, 0x00, 0x00):
4967 handle_rbit(s, sf, rn, rd);
4968 break;
4969 case MAP(0, 0x00, 0x01): /* REV16 */
4970 case MAP(1, 0x00, 0x01):
4971 handle_rev16(s, sf, rn, rd);
4972 break;
4973 case MAP(0, 0x00, 0x02): /* REV/REV32 */
4974 case MAP(1, 0x00, 0x02):
4975 handle_rev32(s, sf, rn, rd);
4976 break;
4977 case MAP(1, 0x00, 0x03): /* REV64 */
4978 handle_rev64(s, sf, rn, rd);
4979 break;
4980 case MAP(0, 0x00, 0x04): /* CLZ */
4981 case MAP(1, 0x00, 0x04):
4982 handle_clz(s, sf, rn, rd);
4983 break;
4984 case MAP(0, 0x00, 0x05): /* CLS */
4985 case MAP(1, 0x00, 0x05):
4986 handle_cls(s, sf, rn, rd);
4987 break;
4988 case MAP(1, 0x01, 0x00): /* PACIA */
4989 if (s->pauth_active) {
4990 tcg_rd = cpu_reg(s, rd);
4991 gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4992 } else if (!dc_isar_feature(aa64_pauth, s)) {
4993 goto do_unallocated;
4995 break;
4996 case MAP(1, 0x01, 0x01): /* PACIB */
4997 if (s->pauth_active) {
4998 tcg_rd = cpu_reg(s, rd);
4999 gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5000 } else if (!dc_isar_feature(aa64_pauth, s)) {
5001 goto do_unallocated;
5003 break;
5004 case MAP(1, 0x01, 0x02): /* PACDA */
5005 if (s->pauth_active) {
5006 tcg_rd = cpu_reg(s, rd);
5007 gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5008 } else if (!dc_isar_feature(aa64_pauth, s)) {
5009 goto do_unallocated;
5011 break;
5012 case MAP(1, 0x01, 0x03): /* PACDB */
5013 if (s->pauth_active) {
5014 tcg_rd = cpu_reg(s, rd);
5015 gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5016 } else if (!dc_isar_feature(aa64_pauth, s)) {
5017 goto do_unallocated;
5019 break;
5020 case MAP(1, 0x01, 0x04): /* AUTIA */
5021 if (s->pauth_active) {
5022 tcg_rd = cpu_reg(s, rd);
5023 gen_helper_autia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5024 } else if (!dc_isar_feature(aa64_pauth, s)) {
5025 goto do_unallocated;
5027 break;
5028 case MAP(1, 0x01, 0x05): /* AUTIB */
5029 if (s->pauth_active) {
5030 tcg_rd = cpu_reg(s, rd);
5031 gen_helper_autib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5032 } else if (!dc_isar_feature(aa64_pauth, s)) {
5033 goto do_unallocated;
5035 break;
5036 case MAP(1, 0x01, 0x06): /* AUTDA */
5037 if (s->pauth_active) {
5038 tcg_rd = cpu_reg(s, rd);
5039 gen_helper_autda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5040 } else if (!dc_isar_feature(aa64_pauth, s)) {
5041 goto do_unallocated;
5043 break;
5044 case MAP(1, 0x01, 0x07): /* AUTDB */
5045 if (s->pauth_active) {
5046 tcg_rd = cpu_reg(s, rd);
5047 gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5048 } else if (!dc_isar_feature(aa64_pauth, s)) {
5049 goto do_unallocated;
5051 break;
5052 case MAP(1, 0x01, 0x08): /* PACIZA */
5053 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5054 goto do_unallocated;
5055 } else if (s->pauth_active) {
5056 tcg_rd = cpu_reg(s, rd);
5057 gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
5059 break;
5060 case MAP(1, 0x01, 0x09): /* PACIZB */
5061 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5062 goto do_unallocated;
5063 } else if (s->pauth_active) {
5064 tcg_rd = cpu_reg(s, rd);
5065 gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
5067 break;
5068 case MAP(1, 0x01, 0x0a): /* PACDZA */
5069 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5070 goto do_unallocated;
5071 } else if (s->pauth_active) {
5072 tcg_rd = cpu_reg(s, rd);
5073 gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
5075 break;
5076 case MAP(1, 0x01, 0x0b): /* PACDZB */
5077 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5078 goto do_unallocated;
5079 } else if (s->pauth_active) {
5080 tcg_rd = cpu_reg(s, rd);
5081 gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
5083 break;
5084 case MAP(1, 0x01, 0x0c): /* AUTIZA */
5085 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5086 goto do_unallocated;
5087 } else if (s->pauth_active) {
5088 tcg_rd = cpu_reg(s, rd);
5089 gen_helper_autia(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
5091 break;
5092 case MAP(1, 0x01, 0x0d): /* AUTIZB */
5093 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5094 goto do_unallocated;
5095 } else if (s->pauth_active) {
5096 tcg_rd = cpu_reg(s, rd);
5097 gen_helper_autib(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
5099 break;
5100 case MAP(1, 0x01, 0x0e): /* AUTDZA */
5101 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5102 goto do_unallocated;
5103 } else if (s->pauth_active) {
5104 tcg_rd = cpu_reg(s, rd);
5105 gen_helper_autda(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
5107 break;
5108 case MAP(1, 0x01, 0x0f): /* AUTDZB */
5109 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5110 goto do_unallocated;
5111 } else if (s->pauth_active) {
5112 tcg_rd = cpu_reg(s, rd);
5113 gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
5115 break;
5116 case MAP(1, 0x01, 0x10): /* XPACI */
5117 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5118 goto do_unallocated;
5119 } else if (s->pauth_active) {
5120 tcg_rd = cpu_reg(s, rd);
5121 gen_helper_xpaci(tcg_rd, cpu_env, tcg_rd);
5123 break;
5124 case MAP(1, 0x01, 0x11): /* XPACD */
5125 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5126 goto do_unallocated;
5127 } else if (s->pauth_active) {
5128 tcg_rd = cpu_reg(s, rd);
5129 gen_helper_xpacd(tcg_rd, cpu_env, tcg_rd);
5131 break;
5132 default:
5133 do_unallocated:
5134 unallocated_encoding(s);
5135 break;
5138 #undef MAP
5141 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
5142 unsigned int rm, unsigned int rn, unsigned int rd)
5144 TCGv_i64 tcg_n, tcg_m, tcg_rd;
5145 tcg_rd = cpu_reg(s, rd);
5147 if (!sf && is_signed) {
5148 tcg_n = new_tmp_a64(s);
5149 tcg_m = new_tmp_a64(s);
5150 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
5151 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
5152 } else {
5153 tcg_n = read_cpu_reg(s, rn, sf);
5154 tcg_m = read_cpu_reg(s, rm, sf);
5157 if (is_signed) {
5158 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
5159 } else {
5160 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
5163 if (!sf) { /* zero extend final result */
5164 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5168 /* LSLV, LSRV, ASRV, RORV */
5169 static void handle_shift_reg(DisasContext *s,
5170 enum a64_shift_type shift_type, unsigned int sf,
5171 unsigned int rm, unsigned int rn, unsigned int rd)
5173 TCGv_i64 tcg_shift = tcg_temp_new_i64();
5174 TCGv_i64 tcg_rd = cpu_reg(s, rd);
5175 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5177 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
5178 shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
5179 tcg_temp_free_i64(tcg_shift);
5182 /* CRC32[BHWX], CRC32C[BHWX] */
5183 static void handle_crc32(DisasContext *s,
5184 unsigned int sf, unsigned int sz, bool crc32c,
5185 unsigned int rm, unsigned int rn, unsigned int rd)
5187 TCGv_i64 tcg_acc, tcg_val;
5188 TCGv_i32 tcg_bytes;
5190 if (!dc_isar_feature(aa64_crc32, s)
5191 || (sf == 1 && sz != 3)
5192 || (sf == 0 && sz == 3)) {
5193 unallocated_encoding(s);
5194 return;
5197 if (sz == 3) {
5198 tcg_val = cpu_reg(s, rm);
5199 } else {
5200 uint64_t mask;
5201 switch (sz) {
5202 case 0:
5203 mask = 0xFF;
5204 break;
5205 case 1:
5206 mask = 0xFFFF;
5207 break;
5208 case 2:
5209 mask = 0xFFFFFFFF;
5210 break;
5211 default:
5212 g_assert_not_reached();
5214 tcg_val = new_tmp_a64(s);
5215 tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
5218 tcg_acc = cpu_reg(s, rn);
5219 tcg_bytes = tcg_const_i32(1 << sz);
5221 if (crc32c) {
5222 gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5223 } else {
5224 gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5227 tcg_temp_free_i32(tcg_bytes);
5230 /* Data-processing (2 source)
5231 * 31 30 29 28 21 20 16 15 10 9 5 4 0
5232 * +----+---+---+-----------------+------+--------+------+------+
5233 * | sf | 0 | S | 1 1 0 1 0 1 1 0 | Rm | opcode | Rn | Rd |
5234 * +----+---+---+-----------------+------+--------+------+------+
5236 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
5238 unsigned int sf, rm, opcode, rn, rd;
5239 sf = extract32(insn, 31, 1);
5240 rm = extract32(insn, 16, 5);
5241 opcode = extract32(insn, 10, 6);
5242 rn = extract32(insn, 5, 5);
5243 rd = extract32(insn, 0, 5);
5245 if (extract32(insn, 29, 1)) {
5246 unallocated_encoding(s);
5247 return;
5250 switch (opcode) {
5251 case 2: /* UDIV */
5252 handle_div(s, false, sf, rm, rn, rd);
5253 break;
5254 case 3: /* SDIV */
5255 handle_div(s, true, sf, rm, rn, rd);
5256 break;
5257 case 8: /* LSLV */
5258 handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
5259 break;
5260 case 9: /* LSRV */
5261 handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
5262 break;
5263 case 10: /* ASRV */
5264 handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
5265 break;
5266 case 11: /* RORV */
5267 handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
5268 break;
5269 case 12: /* PACGA */
5270 if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) {
5271 goto do_unallocated;
5273 gen_helper_pacga(cpu_reg(s, rd), cpu_env,
5274 cpu_reg(s, rn), cpu_reg_sp(s, rm));
5275 break;
5276 case 16:
5277 case 17:
5278 case 18:
5279 case 19:
5280 case 20:
5281 case 21:
5282 case 22:
5283 case 23: /* CRC32 */
5285 int sz = extract32(opcode, 0, 2);
5286 bool crc32c = extract32(opcode, 2, 1);
5287 handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
5288 break;
5290 default:
5291 do_unallocated:
5292 unallocated_encoding(s);
5293 break;
5298 * Data processing - register
5299 * 31 30 29 28 25 21 20 16 10 0
5300 * +--+---+--+---+-------+-----+-------+-------+---------+
5301 * | |op0| |op1| 1 0 1 | op2 | | op3 | |
5302 * +--+---+--+---+-------+-----+-------+-------+---------+
5304 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
5306 int op0 = extract32(insn, 30, 1);
5307 int op1 = extract32(insn, 28, 1);
5308 int op2 = extract32(insn, 21, 4);
5309 int op3 = extract32(insn, 10, 6);
5311 if (!op1) {
5312 if (op2 & 8) {
5313 if (op2 & 1) {
5314 /* Add/sub (extended register) */
5315 disas_add_sub_ext_reg(s, insn);
5316 } else {
5317 /* Add/sub (shifted register) */
5318 disas_add_sub_reg(s, insn);
5320 } else {
5321 /* Logical (shifted register) */
5322 disas_logic_reg(s, insn);
5324 return;
5327 switch (op2) {
5328 case 0x0:
5329 switch (op3) {
5330 case 0x00: /* Add/subtract (with carry) */
5331 disas_adc_sbc(s, insn);
5332 break;
5334 case 0x01: /* Rotate right into flags */
5335 case 0x21:
5336 disas_rotate_right_into_flags(s, insn);
5337 break;
5339 case 0x02: /* Evaluate into flags */
5340 case 0x12:
5341 case 0x22:
5342 case 0x32:
5343 disas_evaluate_into_flags(s, insn);
5344 break;
5346 default:
5347 goto do_unallocated;
5349 break;
5351 case 0x2: /* Conditional compare */
5352 disas_cc(s, insn); /* both imm and reg forms */
5353 break;
5355 case 0x4: /* Conditional select */
5356 disas_cond_select(s, insn);
5357 break;
5359 case 0x6: /* Data-processing */
5360 if (op0) { /* (1 source) */
5361 disas_data_proc_1src(s, insn);
5362 } else { /* (2 source) */
5363 disas_data_proc_2src(s, insn);
5365 break;
5366 case 0x8 ... 0xf: /* (3 source) */
5367 disas_data_proc_3src(s, insn);
5368 break;
5370 default:
5371 do_unallocated:
5372 unallocated_encoding(s);
5373 break;
5377 static void handle_fp_compare(DisasContext *s, int size,
5378 unsigned int rn, unsigned int rm,
5379 bool cmp_with_zero, bool signal_all_nans)
5381 TCGv_i64 tcg_flags = tcg_temp_new_i64();
5382 TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16);
5384 if (size == MO_64) {
5385 TCGv_i64 tcg_vn, tcg_vm;
5387 tcg_vn = read_fp_dreg(s, rn);
5388 if (cmp_with_zero) {
5389 tcg_vm = tcg_const_i64(0);
5390 } else {
5391 tcg_vm = read_fp_dreg(s, rm);
5393 if (signal_all_nans) {
5394 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5395 } else {
5396 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5398 tcg_temp_free_i64(tcg_vn);
5399 tcg_temp_free_i64(tcg_vm);
5400 } else {
5401 TCGv_i32 tcg_vn = tcg_temp_new_i32();
5402 TCGv_i32 tcg_vm = tcg_temp_new_i32();
5404 read_vec_element_i32(s, tcg_vn, rn, 0, size);
5405 if (cmp_with_zero) {
5406 tcg_gen_movi_i32(tcg_vm, 0);
5407 } else {
5408 read_vec_element_i32(s, tcg_vm, rm, 0, size);
5411 switch (size) {
5412 case MO_32:
5413 if (signal_all_nans) {
5414 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5415 } else {
5416 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5418 break;
5419 case MO_16:
5420 if (signal_all_nans) {
5421 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5422 } else {
5423 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5425 break;
5426 default:
5427 g_assert_not_reached();
5430 tcg_temp_free_i32(tcg_vn);
5431 tcg_temp_free_i32(tcg_vm);
5434 tcg_temp_free_ptr(fpst);
5436 gen_set_nzcv(tcg_flags);
5438 tcg_temp_free_i64(tcg_flags);
5441 /* Floating point compare
5442 * 31 30 29 28 24 23 22 21 20 16 15 14 13 10 9 5 4 0
5443 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5444 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | op | 1 0 0 0 | Rn | op2 |
5445 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5447 static void disas_fp_compare(DisasContext *s, uint32_t insn)
5449 unsigned int mos, type, rm, op, rn, opc, op2r;
5450 int size;
5452 mos = extract32(insn, 29, 3);
5453 type = extract32(insn, 22, 2);
5454 rm = extract32(insn, 16, 5);
5455 op = extract32(insn, 14, 2);
5456 rn = extract32(insn, 5, 5);
5457 opc = extract32(insn, 3, 2);
5458 op2r = extract32(insn, 0, 3);
5460 if (mos || op || op2r) {
5461 unallocated_encoding(s);
5462 return;
5465 switch (type) {
5466 case 0:
5467 size = MO_32;
5468 break;
5469 case 1:
5470 size = MO_64;
5471 break;
5472 case 3:
5473 size = MO_16;
5474 if (dc_isar_feature(aa64_fp16, s)) {
5475 break;
5477 /* fallthru */
5478 default:
5479 unallocated_encoding(s);
5480 return;
5483 if (!fp_access_check(s)) {
5484 return;
5487 handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
5490 /* Floating point conditional compare
5491 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0
5492 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5493 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 0 1 | Rn | op | nzcv |
5494 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5496 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
5498 unsigned int mos, type, rm, cond, rn, op, nzcv;
5499 TCGv_i64 tcg_flags;
5500 TCGLabel *label_continue = NULL;
5501 int size;
5503 mos = extract32(insn, 29, 3);
5504 type = extract32(insn, 22, 2);
5505 rm = extract32(insn, 16, 5);
5506 cond = extract32(insn, 12, 4);
5507 rn = extract32(insn, 5, 5);
5508 op = extract32(insn, 4, 1);
5509 nzcv = extract32(insn, 0, 4);
5511 if (mos) {
5512 unallocated_encoding(s);
5513 return;
5516 switch (type) {
5517 case 0:
5518 size = MO_32;
5519 break;
5520 case 1:
5521 size = MO_64;
5522 break;
5523 case 3:
5524 size = MO_16;
5525 if (dc_isar_feature(aa64_fp16, s)) {
5526 break;
5528 /* fallthru */
5529 default:
5530 unallocated_encoding(s);
5531 return;
5534 if (!fp_access_check(s)) {
5535 return;
5538 if (cond < 0x0e) { /* not always */
5539 TCGLabel *label_match = gen_new_label();
5540 label_continue = gen_new_label();
5541 arm_gen_test_cc(cond, label_match);
5542 /* nomatch: */
5543 tcg_flags = tcg_const_i64(nzcv << 28);
5544 gen_set_nzcv(tcg_flags);
5545 tcg_temp_free_i64(tcg_flags);
5546 tcg_gen_br(label_continue);
5547 gen_set_label(label_match);
5550 handle_fp_compare(s, size, rn, rm, false, op);
5552 if (cond < 0x0e) {
5553 gen_set_label(label_continue);
5557 /* Floating point conditional select
5558 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
5559 * +---+---+---+-----------+------+---+------+------+-----+------+------+
5560 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 1 1 | Rn | Rd |
5561 * +---+---+---+-----------+------+---+------+------+-----+------+------+
5563 static void disas_fp_csel(DisasContext *s, uint32_t insn)
5565 unsigned int mos, type, rm, cond, rn, rd;
5566 TCGv_i64 t_true, t_false, t_zero;
5567 DisasCompare64 c;
5568 MemOp sz;
5570 mos = extract32(insn, 29, 3);
5571 type = extract32(insn, 22, 2);
5572 rm = extract32(insn, 16, 5);
5573 cond = extract32(insn, 12, 4);
5574 rn = extract32(insn, 5, 5);
5575 rd = extract32(insn, 0, 5);
5577 if (mos) {
5578 unallocated_encoding(s);
5579 return;
5582 switch (type) {
5583 case 0:
5584 sz = MO_32;
5585 break;
5586 case 1:
5587 sz = MO_64;
5588 break;
5589 case 3:
5590 sz = MO_16;
5591 if (dc_isar_feature(aa64_fp16, s)) {
5592 break;
5594 /* fallthru */
5595 default:
5596 unallocated_encoding(s);
5597 return;
5600 if (!fp_access_check(s)) {
5601 return;
5604 /* Zero extend sreg & hreg inputs to 64 bits now. */
5605 t_true = tcg_temp_new_i64();
5606 t_false = tcg_temp_new_i64();
5607 read_vec_element(s, t_true, rn, 0, sz);
5608 read_vec_element(s, t_false, rm, 0, sz);
5610 a64_test_cc(&c, cond);
5611 t_zero = tcg_const_i64(0);
5612 tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
5613 tcg_temp_free_i64(t_zero);
5614 tcg_temp_free_i64(t_false);
5615 a64_free_cc(&c);
5617 /* Note that sregs & hregs write back zeros to the high bits,
5618 and we've already done the zero-extension. */
5619 write_fp_dreg(s, rd, t_true);
5620 tcg_temp_free_i64(t_true);
5623 /* Floating-point data-processing (1 source) - half precision */
5624 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
5626 TCGv_ptr fpst = NULL;
5627 TCGv_i32 tcg_op = read_fp_hreg(s, rn);
5628 TCGv_i32 tcg_res = tcg_temp_new_i32();
5630 switch (opcode) {
5631 case 0x0: /* FMOV */
5632 tcg_gen_mov_i32(tcg_res, tcg_op);
5633 break;
5634 case 0x1: /* FABS */
5635 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
5636 break;
5637 case 0x2: /* FNEG */
5638 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
5639 break;
5640 case 0x3: /* FSQRT */
5641 fpst = get_fpstatus_ptr(true);
5642 gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
5643 break;
5644 case 0x8: /* FRINTN */
5645 case 0x9: /* FRINTP */
5646 case 0xa: /* FRINTM */
5647 case 0xb: /* FRINTZ */
5648 case 0xc: /* FRINTA */
5650 TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
5651 fpst = get_fpstatus_ptr(true);
5653 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5654 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
5656 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5657 tcg_temp_free_i32(tcg_rmode);
5658 break;
5660 case 0xe: /* FRINTX */
5661 fpst = get_fpstatus_ptr(true);
5662 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
5663 break;
5664 case 0xf: /* FRINTI */
5665 fpst = get_fpstatus_ptr(true);
5666 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
5667 break;
5668 default:
5669 abort();
5672 write_fp_sreg(s, rd, tcg_res);
5674 if (fpst) {
5675 tcg_temp_free_ptr(fpst);
5677 tcg_temp_free_i32(tcg_op);
5678 tcg_temp_free_i32(tcg_res);
5681 /* Floating-point data-processing (1 source) - single precision */
5682 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
5684 void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr);
5685 TCGv_i32 tcg_op, tcg_res;
5686 TCGv_ptr fpst;
5687 int rmode = -1;
5689 tcg_op = read_fp_sreg(s, rn);
5690 tcg_res = tcg_temp_new_i32();
5692 switch (opcode) {
5693 case 0x0: /* FMOV */
5694 tcg_gen_mov_i32(tcg_res, tcg_op);
5695 goto done;
5696 case 0x1: /* FABS */
5697 gen_helper_vfp_abss(tcg_res, tcg_op);
5698 goto done;
5699 case 0x2: /* FNEG */
5700 gen_helper_vfp_negs(tcg_res, tcg_op);
5701 goto done;
5702 case 0x3: /* FSQRT */
5703 gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
5704 goto done;
5705 case 0x8: /* FRINTN */
5706 case 0x9: /* FRINTP */
5707 case 0xa: /* FRINTM */
5708 case 0xb: /* FRINTZ */
5709 case 0xc: /* FRINTA */
5710 rmode = arm_rmode_to_sf(opcode & 7);
5711 gen_fpst = gen_helper_rints;
5712 break;
5713 case 0xe: /* FRINTX */
5714 gen_fpst = gen_helper_rints_exact;
5715 break;
5716 case 0xf: /* FRINTI */
5717 gen_fpst = gen_helper_rints;
5718 break;
5719 case 0x10: /* FRINT32Z */
5720 rmode = float_round_to_zero;
5721 gen_fpst = gen_helper_frint32_s;
5722 break;
5723 case 0x11: /* FRINT32X */
5724 gen_fpst = gen_helper_frint32_s;
5725 break;
5726 case 0x12: /* FRINT64Z */
5727 rmode = float_round_to_zero;
5728 gen_fpst = gen_helper_frint64_s;
5729 break;
5730 case 0x13: /* FRINT64X */
5731 gen_fpst = gen_helper_frint64_s;
5732 break;
5733 default:
5734 g_assert_not_reached();
5737 fpst = get_fpstatus_ptr(false);
5738 if (rmode >= 0) {
5739 TCGv_i32 tcg_rmode = tcg_const_i32(rmode);
5740 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5741 gen_fpst(tcg_res, tcg_op, fpst);
5742 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5743 tcg_temp_free_i32(tcg_rmode);
5744 } else {
5745 gen_fpst(tcg_res, tcg_op, fpst);
5747 tcg_temp_free_ptr(fpst);
5749 done:
5750 write_fp_sreg(s, rd, tcg_res);
5751 tcg_temp_free_i32(tcg_op);
5752 tcg_temp_free_i32(tcg_res);
5755 /* Floating-point data-processing (1 source) - double precision */
5756 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
5758 void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr);
5759 TCGv_i64 tcg_op, tcg_res;
5760 TCGv_ptr fpst;
5761 int rmode = -1;
5763 switch (opcode) {
5764 case 0x0: /* FMOV */
5765 gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
5766 return;
5769 tcg_op = read_fp_dreg(s, rn);
5770 tcg_res = tcg_temp_new_i64();
5772 switch (opcode) {
5773 case 0x1: /* FABS */
5774 gen_helper_vfp_absd(tcg_res, tcg_op);
5775 goto done;
5776 case 0x2: /* FNEG */
5777 gen_helper_vfp_negd(tcg_res, tcg_op);
5778 goto done;
5779 case 0x3: /* FSQRT */
5780 gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
5781 goto done;
5782 case 0x8: /* FRINTN */
5783 case 0x9: /* FRINTP */
5784 case 0xa: /* FRINTM */
5785 case 0xb: /* FRINTZ */
5786 case 0xc: /* FRINTA */
5787 rmode = arm_rmode_to_sf(opcode & 7);
5788 gen_fpst = gen_helper_rintd;
5789 break;
5790 case 0xe: /* FRINTX */
5791 gen_fpst = gen_helper_rintd_exact;
5792 break;
5793 case 0xf: /* FRINTI */
5794 gen_fpst = gen_helper_rintd;
5795 break;
5796 case 0x10: /* FRINT32Z */
5797 rmode = float_round_to_zero;
5798 gen_fpst = gen_helper_frint32_d;
5799 break;
5800 case 0x11: /* FRINT32X */
5801 gen_fpst = gen_helper_frint32_d;
5802 break;
5803 case 0x12: /* FRINT64Z */
5804 rmode = float_round_to_zero;
5805 gen_fpst = gen_helper_frint64_d;
5806 break;
5807 case 0x13: /* FRINT64X */
5808 gen_fpst = gen_helper_frint64_d;
5809 break;
5810 default:
5811 g_assert_not_reached();
5814 fpst = get_fpstatus_ptr(false);
5815 if (rmode >= 0) {
5816 TCGv_i32 tcg_rmode = tcg_const_i32(rmode);
5817 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5818 gen_fpst(tcg_res, tcg_op, fpst);
5819 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5820 tcg_temp_free_i32(tcg_rmode);
5821 } else {
5822 gen_fpst(tcg_res, tcg_op, fpst);
5824 tcg_temp_free_ptr(fpst);
5826 done:
5827 write_fp_dreg(s, rd, tcg_res);
5828 tcg_temp_free_i64(tcg_op);
5829 tcg_temp_free_i64(tcg_res);
5832 static void handle_fp_fcvt(DisasContext *s, int opcode,
5833 int rd, int rn, int dtype, int ntype)
5835 switch (ntype) {
5836 case 0x0:
5838 TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
5839 if (dtype == 1) {
5840 /* Single to double */
5841 TCGv_i64 tcg_rd = tcg_temp_new_i64();
5842 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
5843 write_fp_dreg(s, rd, tcg_rd);
5844 tcg_temp_free_i64(tcg_rd);
5845 } else {
5846 /* Single to half */
5847 TCGv_i32 tcg_rd = tcg_temp_new_i32();
5848 TCGv_i32 ahp = get_ahp_flag();
5849 TCGv_ptr fpst = get_fpstatus_ptr(false);
5851 gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
5852 /* write_fp_sreg is OK here because top half of tcg_rd is zero */
5853 write_fp_sreg(s, rd, tcg_rd);
5854 tcg_temp_free_i32(tcg_rd);
5855 tcg_temp_free_i32(ahp);
5856 tcg_temp_free_ptr(fpst);
5858 tcg_temp_free_i32(tcg_rn);
5859 break;
5861 case 0x1:
5863 TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
5864 TCGv_i32 tcg_rd = tcg_temp_new_i32();
5865 if (dtype == 0) {
5866 /* Double to single */
5867 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
5868 } else {
5869 TCGv_ptr fpst = get_fpstatus_ptr(false);
5870 TCGv_i32 ahp = get_ahp_flag();
5871 /* Double to half */
5872 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
5873 /* write_fp_sreg is OK here because top half of tcg_rd is zero */
5874 tcg_temp_free_ptr(fpst);
5875 tcg_temp_free_i32(ahp);
5877 write_fp_sreg(s, rd, tcg_rd);
5878 tcg_temp_free_i32(tcg_rd);
5879 tcg_temp_free_i64(tcg_rn);
5880 break;
5882 case 0x3:
5884 TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
5885 TCGv_ptr tcg_fpst = get_fpstatus_ptr(false);
5886 TCGv_i32 tcg_ahp = get_ahp_flag();
5887 tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
5888 if (dtype == 0) {
5889 /* Half to single */
5890 TCGv_i32 tcg_rd = tcg_temp_new_i32();
5891 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
5892 write_fp_sreg(s, rd, tcg_rd);
5893 tcg_temp_free_i32(tcg_rd);
5894 } else {
5895 /* Half to double */
5896 TCGv_i64 tcg_rd = tcg_temp_new_i64();
5897 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
5898 write_fp_dreg(s, rd, tcg_rd);
5899 tcg_temp_free_i64(tcg_rd);
5901 tcg_temp_free_i32(tcg_rn);
5902 tcg_temp_free_ptr(tcg_fpst);
5903 tcg_temp_free_i32(tcg_ahp);
5904 break;
5906 default:
5907 abort();
5911 /* Floating point data-processing (1 source)
5912 * 31 30 29 28 24 23 22 21 20 15 14 10 9 5 4 0
5913 * +---+---+---+-----------+------+---+--------+-----------+------+------+
5914 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 | Rn | Rd |
5915 * +---+---+---+-----------+------+---+--------+-----------+------+------+
5917 static void disas_fp_1src(DisasContext *s, uint32_t insn)
5919 int mos = extract32(insn, 29, 3);
5920 int type = extract32(insn, 22, 2);
5921 int opcode = extract32(insn, 15, 6);
5922 int rn = extract32(insn, 5, 5);
5923 int rd = extract32(insn, 0, 5);
5925 if (mos) {
5926 unallocated_encoding(s);
5927 return;
5930 switch (opcode) {
5931 case 0x4: case 0x5: case 0x7:
5933 /* FCVT between half, single and double precision */
5934 int dtype = extract32(opcode, 0, 2);
5935 if (type == 2 || dtype == type) {
5936 unallocated_encoding(s);
5937 return;
5939 if (!fp_access_check(s)) {
5940 return;
5943 handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
5944 break;
5947 case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
5948 if (type > 1 || !dc_isar_feature(aa64_frint, s)) {
5949 unallocated_encoding(s);
5950 return;
5952 /* fall through */
5953 case 0x0 ... 0x3:
5954 case 0x8 ... 0xc:
5955 case 0xe ... 0xf:
5956 /* 32-to-32 and 64-to-64 ops */
5957 switch (type) {
5958 case 0:
5959 if (!fp_access_check(s)) {
5960 return;
5962 handle_fp_1src_single(s, opcode, rd, rn);
5963 break;
5964 case 1:
5965 if (!fp_access_check(s)) {
5966 return;
5968 handle_fp_1src_double(s, opcode, rd, rn);
5969 break;
5970 case 3:
5971 if (!dc_isar_feature(aa64_fp16, s)) {
5972 unallocated_encoding(s);
5973 return;
5976 if (!fp_access_check(s)) {
5977 return;
5979 handle_fp_1src_half(s, opcode, rd, rn);
5980 break;
5981 default:
5982 unallocated_encoding(s);
5984 break;
5986 default:
5987 unallocated_encoding(s);
5988 break;
5992 /* Floating-point data-processing (2 source) - single precision */
5993 static void handle_fp_2src_single(DisasContext *s, int opcode,
5994 int rd, int rn, int rm)
5996 TCGv_i32 tcg_op1;
5997 TCGv_i32 tcg_op2;
5998 TCGv_i32 tcg_res;
5999 TCGv_ptr fpst;
6001 tcg_res = tcg_temp_new_i32();
6002 fpst = get_fpstatus_ptr(false);
6003 tcg_op1 = read_fp_sreg(s, rn);
6004 tcg_op2 = read_fp_sreg(s, rm);
6006 switch (opcode) {
6007 case 0x0: /* FMUL */
6008 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6009 break;
6010 case 0x1: /* FDIV */
6011 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
6012 break;
6013 case 0x2: /* FADD */
6014 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6015 break;
6016 case 0x3: /* FSUB */
6017 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
6018 break;
6019 case 0x4: /* FMAX */
6020 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6021 break;
6022 case 0x5: /* FMIN */
6023 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6024 break;
6025 case 0x6: /* FMAXNM */
6026 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6027 break;
6028 case 0x7: /* FMINNM */
6029 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6030 break;
6031 case 0x8: /* FNMUL */
6032 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6033 gen_helper_vfp_negs(tcg_res, tcg_res);
6034 break;
6037 write_fp_sreg(s, rd, tcg_res);
6039 tcg_temp_free_ptr(fpst);
6040 tcg_temp_free_i32(tcg_op1);
6041 tcg_temp_free_i32(tcg_op2);
6042 tcg_temp_free_i32(tcg_res);
6045 /* Floating-point data-processing (2 source) - double precision */
6046 static void handle_fp_2src_double(DisasContext *s, int opcode,
6047 int rd, int rn, int rm)
6049 TCGv_i64 tcg_op1;
6050 TCGv_i64 tcg_op2;
6051 TCGv_i64 tcg_res;
6052 TCGv_ptr fpst;
6054 tcg_res = tcg_temp_new_i64();
6055 fpst = get_fpstatus_ptr(false);
6056 tcg_op1 = read_fp_dreg(s, rn);
6057 tcg_op2 = read_fp_dreg(s, rm);
6059 switch (opcode) {
6060 case 0x0: /* FMUL */
6061 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6062 break;
6063 case 0x1: /* FDIV */
6064 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
6065 break;
6066 case 0x2: /* FADD */
6067 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6068 break;
6069 case 0x3: /* FSUB */
6070 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
6071 break;
6072 case 0x4: /* FMAX */
6073 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6074 break;
6075 case 0x5: /* FMIN */
6076 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6077 break;
6078 case 0x6: /* FMAXNM */
6079 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6080 break;
6081 case 0x7: /* FMINNM */
6082 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6083 break;
6084 case 0x8: /* FNMUL */
6085 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6086 gen_helper_vfp_negd(tcg_res, tcg_res);
6087 break;
6090 write_fp_dreg(s, rd, tcg_res);
6092 tcg_temp_free_ptr(fpst);
6093 tcg_temp_free_i64(tcg_op1);
6094 tcg_temp_free_i64(tcg_op2);
6095 tcg_temp_free_i64(tcg_res);
6098 /* Floating-point data-processing (2 source) - half precision */
6099 static void handle_fp_2src_half(DisasContext *s, int opcode,
6100 int rd, int rn, int rm)
6102 TCGv_i32 tcg_op1;
6103 TCGv_i32 tcg_op2;
6104 TCGv_i32 tcg_res;
6105 TCGv_ptr fpst;
6107 tcg_res = tcg_temp_new_i32();
6108 fpst = get_fpstatus_ptr(true);
6109 tcg_op1 = read_fp_hreg(s, rn);
6110 tcg_op2 = read_fp_hreg(s, rm);
6112 switch (opcode) {
6113 case 0x0: /* FMUL */
6114 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6115 break;
6116 case 0x1: /* FDIV */
6117 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
6118 break;
6119 case 0x2: /* FADD */
6120 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
6121 break;
6122 case 0x3: /* FSUB */
6123 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
6124 break;
6125 case 0x4: /* FMAX */
6126 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
6127 break;
6128 case 0x5: /* FMIN */
6129 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
6130 break;
6131 case 0x6: /* FMAXNM */
6132 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6133 break;
6134 case 0x7: /* FMINNM */
6135 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6136 break;
6137 case 0x8: /* FNMUL */
6138 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6139 tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
6140 break;
6141 default:
6142 g_assert_not_reached();
6145 write_fp_sreg(s, rd, tcg_res);
6147 tcg_temp_free_ptr(fpst);
6148 tcg_temp_free_i32(tcg_op1);
6149 tcg_temp_free_i32(tcg_op2);
6150 tcg_temp_free_i32(tcg_res);
6153 /* Floating point data-processing (2 source)
6154 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
6155 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6156 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | opcode | 1 0 | Rn | Rd |
6157 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6159 static void disas_fp_2src(DisasContext *s, uint32_t insn)
6161 int mos = extract32(insn, 29, 3);
6162 int type = extract32(insn, 22, 2);
6163 int rd = extract32(insn, 0, 5);
6164 int rn = extract32(insn, 5, 5);
6165 int rm = extract32(insn, 16, 5);
6166 int opcode = extract32(insn, 12, 4);
6168 if (opcode > 8 || mos) {
6169 unallocated_encoding(s);
6170 return;
6173 switch (type) {
6174 case 0:
6175 if (!fp_access_check(s)) {
6176 return;
6178 handle_fp_2src_single(s, opcode, rd, rn, rm);
6179 break;
6180 case 1:
6181 if (!fp_access_check(s)) {
6182 return;
6184 handle_fp_2src_double(s, opcode, rd, rn, rm);
6185 break;
6186 case 3:
6187 if (!dc_isar_feature(aa64_fp16, s)) {
6188 unallocated_encoding(s);
6189 return;
6191 if (!fp_access_check(s)) {
6192 return;
6194 handle_fp_2src_half(s, opcode, rd, rn, rm);
6195 break;
6196 default:
6197 unallocated_encoding(s);
6201 /* Floating-point data-processing (3 source) - single precision */
6202 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
6203 int rd, int rn, int rm, int ra)
6205 TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6206 TCGv_i32 tcg_res = tcg_temp_new_i32();
6207 TCGv_ptr fpst = get_fpstatus_ptr(false);
6209 tcg_op1 = read_fp_sreg(s, rn);
6210 tcg_op2 = read_fp_sreg(s, rm);
6211 tcg_op3 = read_fp_sreg(s, ra);
6213 /* These are fused multiply-add, and must be done as one
6214 * floating point operation with no rounding between the
6215 * multiplication and addition steps.
6216 * NB that doing the negations here as separate steps is
6217 * correct : an input NaN should come out with its sign bit
6218 * flipped if it is a negated-input.
6220 if (o1 == true) {
6221 gen_helper_vfp_negs(tcg_op3, tcg_op3);
6224 if (o0 != o1) {
6225 gen_helper_vfp_negs(tcg_op1, tcg_op1);
6228 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6230 write_fp_sreg(s, rd, tcg_res);
6232 tcg_temp_free_ptr(fpst);
6233 tcg_temp_free_i32(tcg_op1);
6234 tcg_temp_free_i32(tcg_op2);
6235 tcg_temp_free_i32(tcg_op3);
6236 tcg_temp_free_i32(tcg_res);
6239 /* Floating-point data-processing (3 source) - double precision */
6240 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
6241 int rd, int rn, int rm, int ra)
6243 TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
6244 TCGv_i64 tcg_res = tcg_temp_new_i64();
6245 TCGv_ptr fpst = get_fpstatus_ptr(false);
6247 tcg_op1 = read_fp_dreg(s, rn);
6248 tcg_op2 = read_fp_dreg(s, rm);
6249 tcg_op3 = read_fp_dreg(s, ra);
6251 /* These are fused multiply-add, and must be done as one
6252 * floating point operation with no rounding between the
6253 * multiplication and addition steps.
6254 * NB that doing the negations here as separate steps is
6255 * correct : an input NaN should come out with its sign bit
6256 * flipped if it is a negated-input.
6258 if (o1 == true) {
6259 gen_helper_vfp_negd(tcg_op3, tcg_op3);
6262 if (o0 != o1) {
6263 gen_helper_vfp_negd(tcg_op1, tcg_op1);
6266 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6268 write_fp_dreg(s, rd, tcg_res);
6270 tcg_temp_free_ptr(fpst);
6271 tcg_temp_free_i64(tcg_op1);
6272 tcg_temp_free_i64(tcg_op2);
6273 tcg_temp_free_i64(tcg_op3);
6274 tcg_temp_free_i64(tcg_res);
6277 /* Floating-point data-processing (3 source) - half precision */
6278 static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
6279 int rd, int rn, int rm, int ra)
6281 TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6282 TCGv_i32 tcg_res = tcg_temp_new_i32();
6283 TCGv_ptr fpst = get_fpstatus_ptr(true);
6285 tcg_op1 = read_fp_hreg(s, rn);
6286 tcg_op2 = read_fp_hreg(s, rm);
6287 tcg_op3 = read_fp_hreg(s, ra);
6289 /* These are fused multiply-add, and must be done as one
6290 * floating point operation with no rounding between the
6291 * multiplication and addition steps.
6292 * NB that doing the negations here as separate steps is
6293 * correct : an input NaN should come out with its sign bit
6294 * flipped if it is a negated-input.
6296 if (o1 == true) {
6297 tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000);
6300 if (o0 != o1) {
6301 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
6304 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6306 write_fp_sreg(s, rd, tcg_res);
6308 tcg_temp_free_ptr(fpst);
6309 tcg_temp_free_i32(tcg_op1);
6310 tcg_temp_free_i32(tcg_op2);
6311 tcg_temp_free_i32(tcg_op3);
6312 tcg_temp_free_i32(tcg_res);
6315 /* Floating point data-processing (3 source)
6316 * 31 30 29 28 24 23 22 21 20 16 15 14 10 9 5 4 0
6317 * +---+---+---+-----------+------+----+------+----+------+------+------+
6318 * | M | 0 | S | 1 1 1 1 1 | type | o1 | Rm | o0 | Ra | Rn | Rd |
6319 * +---+---+---+-----------+------+----+------+----+------+------+------+
6321 static void disas_fp_3src(DisasContext *s, uint32_t insn)
6323 int mos = extract32(insn, 29, 3);
6324 int type = extract32(insn, 22, 2);
6325 int rd = extract32(insn, 0, 5);
6326 int rn = extract32(insn, 5, 5);
6327 int ra = extract32(insn, 10, 5);
6328 int rm = extract32(insn, 16, 5);
6329 bool o0 = extract32(insn, 15, 1);
6330 bool o1 = extract32(insn, 21, 1);
6332 if (mos) {
6333 unallocated_encoding(s);
6334 return;
6337 switch (type) {
6338 case 0:
6339 if (!fp_access_check(s)) {
6340 return;
6342 handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
6343 break;
6344 case 1:
6345 if (!fp_access_check(s)) {
6346 return;
6348 handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
6349 break;
6350 case 3:
6351 if (!dc_isar_feature(aa64_fp16, s)) {
6352 unallocated_encoding(s);
6353 return;
6355 if (!fp_access_check(s)) {
6356 return;
6358 handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra);
6359 break;
6360 default:
6361 unallocated_encoding(s);
6365 /* Floating point immediate
6366 * 31 30 29 28 24 23 22 21 20 13 12 10 9 5 4 0
6367 * +---+---+---+-----------+------+---+------------+-------+------+------+
6368 * | M | 0 | S | 1 1 1 1 0 | type | 1 | imm8 | 1 0 0 | imm5 | Rd |
6369 * +---+---+---+-----------+------+---+------------+-------+------+------+
6371 static void disas_fp_imm(DisasContext *s, uint32_t insn)
6373 int rd = extract32(insn, 0, 5);
6374 int imm5 = extract32(insn, 5, 5);
6375 int imm8 = extract32(insn, 13, 8);
6376 int type = extract32(insn, 22, 2);
6377 int mos = extract32(insn, 29, 3);
6378 uint64_t imm;
6379 TCGv_i64 tcg_res;
6380 MemOp sz;
6382 if (mos || imm5) {
6383 unallocated_encoding(s);
6384 return;
6387 switch (type) {
6388 case 0:
6389 sz = MO_32;
6390 break;
6391 case 1:
6392 sz = MO_64;
6393 break;
6394 case 3:
6395 sz = MO_16;
6396 if (dc_isar_feature(aa64_fp16, s)) {
6397 break;
6399 /* fallthru */
6400 default:
6401 unallocated_encoding(s);
6402 return;
6405 if (!fp_access_check(s)) {
6406 return;
6409 imm = vfp_expand_imm(sz, imm8);
6411 tcg_res = tcg_const_i64(imm);
6412 write_fp_dreg(s, rd, tcg_res);
6413 tcg_temp_free_i64(tcg_res);
6416 /* Handle floating point <=> fixed point conversions. Note that we can
6417 * also deal with fp <=> integer conversions as a special case (scale == 64)
6418 * OPTME: consider handling that special case specially or at least skipping
6419 * the call to scalbn in the helpers for zero shifts.
6421 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
6422 bool itof, int rmode, int scale, int sf, int type)
6424 bool is_signed = !(opcode & 1);
6425 TCGv_ptr tcg_fpstatus;
6426 TCGv_i32 tcg_shift, tcg_single;
6427 TCGv_i64 tcg_double;
6429 tcg_fpstatus = get_fpstatus_ptr(type == 3);
6431 tcg_shift = tcg_const_i32(64 - scale);
6433 if (itof) {
6434 TCGv_i64 tcg_int = cpu_reg(s, rn);
6435 if (!sf) {
6436 TCGv_i64 tcg_extend = new_tmp_a64(s);
6438 if (is_signed) {
6439 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
6440 } else {
6441 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
6444 tcg_int = tcg_extend;
6447 switch (type) {
6448 case 1: /* float64 */
6449 tcg_double = tcg_temp_new_i64();
6450 if (is_signed) {
6451 gen_helper_vfp_sqtod(tcg_double, tcg_int,
6452 tcg_shift, tcg_fpstatus);
6453 } else {
6454 gen_helper_vfp_uqtod(tcg_double, tcg_int,
6455 tcg_shift, tcg_fpstatus);
6457 write_fp_dreg(s, rd, tcg_double);
6458 tcg_temp_free_i64(tcg_double);
6459 break;
6461 case 0: /* float32 */
6462 tcg_single = tcg_temp_new_i32();
6463 if (is_signed) {
6464 gen_helper_vfp_sqtos(tcg_single, tcg_int,
6465 tcg_shift, tcg_fpstatus);
6466 } else {
6467 gen_helper_vfp_uqtos(tcg_single, tcg_int,
6468 tcg_shift, tcg_fpstatus);
6470 write_fp_sreg(s, rd, tcg_single);
6471 tcg_temp_free_i32(tcg_single);
6472 break;
6474 case 3: /* float16 */
6475 tcg_single = tcg_temp_new_i32();
6476 if (is_signed) {
6477 gen_helper_vfp_sqtoh(tcg_single, tcg_int,
6478 tcg_shift, tcg_fpstatus);
6479 } else {
6480 gen_helper_vfp_uqtoh(tcg_single, tcg_int,
6481 tcg_shift, tcg_fpstatus);
6483 write_fp_sreg(s, rd, tcg_single);
6484 tcg_temp_free_i32(tcg_single);
6485 break;
6487 default:
6488 g_assert_not_reached();
6490 } else {
6491 TCGv_i64 tcg_int = cpu_reg(s, rd);
6492 TCGv_i32 tcg_rmode;
6494 if (extract32(opcode, 2, 1)) {
6495 /* There are too many rounding modes to all fit into rmode,
6496 * so FCVTA[US] is a special case.
6498 rmode = FPROUNDING_TIEAWAY;
6501 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6503 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
6505 switch (type) {
6506 case 1: /* float64 */
6507 tcg_double = read_fp_dreg(s, rn);
6508 if (is_signed) {
6509 if (!sf) {
6510 gen_helper_vfp_tosld(tcg_int, tcg_double,
6511 tcg_shift, tcg_fpstatus);
6512 } else {
6513 gen_helper_vfp_tosqd(tcg_int, tcg_double,
6514 tcg_shift, tcg_fpstatus);
6516 } else {
6517 if (!sf) {
6518 gen_helper_vfp_tould(tcg_int, tcg_double,
6519 tcg_shift, tcg_fpstatus);
6520 } else {
6521 gen_helper_vfp_touqd(tcg_int, tcg_double,
6522 tcg_shift, tcg_fpstatus);
6525 if (!sf) {
6526 tcg_gen_ext32u_i64(tcg_int, tcg_int);
6528 tcg_temp_free_i64(tcg_double);
6529 break;
6531 case 0: /* float32 */
6532 tcg_single = read_fp_sreg(s, rn);
6533 if (sf) {
6534 if (is_signed) {
6535 gen_helper_vfp_tosqs(tcg_int, tcg_single,
6536 tcg_shift, tcg_fpstatus);
6537 } else {
6538 gen_helper_vfp_touqs(tcg_int, tcg_single,
6539 tcg_shift, tcg_fpstatus);
6541 } else {
6542 TCGv_i32 tcg_dest = tcg_temp_new_i32();
6543 if (is_signed) {
6544 gen_helper_vfp_tosls(tcg_dest, tcg_single,
6545 tcg_shift, tcg_fpstatus);
6546 } else {
6547 gen_helper_vfp_touls(tcg_dest, tcg_single,
6548 tcg_shift, tcg_fpstatus);
6550 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6551 tcg_temp_free_i32(tcg_dest);
6553 tcg_temp_free_i32(tcg_single);
6554 break;
6556 case 3: /* float16 */
6557 tcg_single = read_fp_sreg(s, rn);
6558 if (sf) {
6559 if (is_signed) {
6560 gen_helper_vfp_tosqh(tcg_int, tcg_single,
6561 tcg_shift, tcg_fpstatus);
6562 } else {
6563 gen_helper_vfp_touqh(tcg_int, tcg_single,
6564 tcg_shift, tcg_fpstatus);
6566 } else {
6567 TCGv_i32 tcg_dest = tcg_temp_new_i32();
6568 if (is_signed) {
6569 gen_helper_vfp_toslh(tcg_dest, tcg_single,
6570 tcg_shift, tcg_fpstatus);
6571 } else {
6572 gen_helper_vfp_toulh(tcg_dest, tcg_single,
6573 tcg_shift, tcg_fpstatus);
6575 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6576 tcg_temp_free_i32(tcg_dest);
6578 tcg_temp_free_i32(tcg_single);
6579 break;
6581 default:
6582 g_assert_not_reached();
6585 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
6586 tcg_temp_free_i32(tcg_rmode);
6589 tcg_temp_free_ptr(tcg_fpstatus);
6590 tcg_temp_free_i32(tcg_shift);
6593 /* Floating point <-> fixed point conversions
6594 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0
6595 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6596 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale | Rn | Rd |
6597 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6599 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
6601 int rd = extract32(insn, 0, 5);
6602 int rn = extract32(insn, 5, 5);
6603 int scale = extract32(insn, 10, 6);
6604 int opcode = extract32(insn, 16, 3);
6605 int rmode = extract32(insn, 19, 2);
6606 int type = extract32(insn, 22, 2);
6607 bool sbit = extract32(insn, 29, 1);
6608 bool sf = extract32(insn, 31, 1);
6609 bool itof;
6611 if (sbit || (!sf && scale < 32)) {
6612 unallocated_encoding(s);
6613 return;
6616 switch (type) {
6617 case 0: /* float32 */
6618 case 1: /* float64 */
6619 break;
6620 case 3: /* float16 */
6621 if (dc_isar_feature(aa64_fp16, s)) {
6622 break;
6624 /* fallthru */
6625 default:
6626 unallocated_encoding(s);
6627 return;
6630 switch ((rmode << 3) | opcode) {
6631 case 0x2: /* SCVTF */
6632 case 0x3: /* UCVTF */
6633 itof = true;
6634 break;
6635 case 0x18: /* FCVTZS */
6636 case 0x19: /* FCVTZU */
6637 itof = false;
6638 break;
6639 default:
6640 unallocated_encoding(s);
6641 return;
6644 if (!fp_access_check(s)) {
6645 return;
6648 handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
6651 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
6653 /* FMOV: gpr to or from float, double, or top half of quad fp reg,
6654 * without conversion.
6657 if (itof) {
6658 TCGv_i64 tcg_rn = cpu_reg(s, rn);
6659 TCGv_i64 tmp;
6661 switch (type) {
6662 case 0:
6663 /* 32 bit */
6664 tmp = tcg_temp_new_i64();
6665 tcg_gen_ext32u_i64(tmp, tcg_rn);
6666 write_fp_dreg(s, rd, tmp);
6667 tcg_temp_free_i64(tmp);
6668 break;
6669 case 1:
6670 /* 64 bit */
6671 write_fp_dreg(s, rd, tcg_rn);
6672 break;
6673 case 2:
6674 /* 64 bit to top half. */
6675 tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
6676 clear_vec_high(s, true, rd);
6677 break;
6678 case 3:
6679 /* 16 bit */
6680 tmp = tcg_temp_new_i64();
6681 tcg_gen_ext16u_i64(tmp, tcg_rn);
6682 write_fp_dreg(s, rd, tmp);
6683 tcg_temp_free_i64(tmp);
6684 break;
6685 default:
6686 g_assert_not_reached();
6688 } else {
6689 TCGv_i64 tcg_rd = cpu_reg(s, rd);
6691 switch (type) {
6692 case 0:
6693 /* 32 bit */
6694 tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
6695 break;
6696 case 1:
6697 /* 64 bit */
6698 tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
6699 break;
6700 case 2:
6701 /* 64 bits from top half */
6702 tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
6703 break;
6704 case 3:
6705 /* 16 bit */
6706 tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16));
6707 break;
6708 default:
6709 g_assert_not_reached();
6714 static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
6716 TCGv_i64 t = read_fp_dreg(s, rn);
6717 TCGv_ptr fpstatus = get_fpstatus_ptr(false);
6719 gen_helper_fjcvtzs(t, t, fpstatus);
6721 tcg_temp_free_ptr(fpstatus);
6723 tcg_gen_ext32u_i64(cpu_reg(s, rd), t);
6724 tcg_gen_extrh_i64_i32(cpu_ZF, t);
6725 tcg_gen_movi_i32(cpu_CF, 0);
6726 tcg_gen_movi_i32(cpu_NF, 0);
6727 tcg_gen_movi_i32(cpu_VF, 0);
6729 tcg_temp_free_i64(t);
6732 /* Floating point <-> integer conversions
6733 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0
6734 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
6735 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
6736 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
6738 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
6740 int rd = extract32(insn, 0, 5);
6741 int rn = extract32(insn, 5, 5);
6742 int opcode = extract32(insn, 16, 3);
6743 int rmode = extract32(insn, 19, 2);
6744 int type = extract32(insn, 22, 2);
6745 bool sbit = extract32(insn, 29, 1);
6746 bool sf = extract32(insn, 31, 1);
6747 bool itof = false;
6749 if (sbit) {
6750 goto do_unallocated;
6753 switch (opcode) {
6754 case 2: /* SCVTF */
6755 case 3: /* UCVTF */
6756 itof = true;
6757 /* fallthru */
6758 case 4: /* FCVTAS */
6759 case 5: /* FCVTAU */
6760 if (rmode != 0) {
6761 goto do_unallocated;
6763 /* fallthru */
6764 case 0: /* FCVT[NPMZ]S */
6765 case 1: /* FCVT[NPMZ]U */
6766 switch (type) {
6767 case 0: /* float32 */
6768 case 1: /* float64 */
6769 break;
6770 case 3: /* float16 */
6771 if (!dc_isar_feature(aa64_fp16, s)) {
6772 goto do_unallocated;
6774 break;
6775 default:
6776 goto do_unallocated;
6778 if (!fp_access_check(s)) {
6779 return;
6781 handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
6782 break;
6784 default:
6785 switch (sf << 7 | type << 5 | rmode << 3 | opcode) {
6786 case 0b01100110: /* FMOV half <-> 32-bit int */
6787 case 0b01100111:
6788 case 0b11100110: /* FMOV half <-> 64-bit int */
6789 case 0b11100111:
6790 if (!dc_isar_feature(aa64_fp16, s)) {
6791 goto do_unallocated;
6793 /* fallthru */
6794 case 0b00000110: /* FMOV 32-bit */
6795 case 0b00000111:
6796 case 0b10100110: /* FMOV 64-bit */
6797 case 0b10100111:
6798 case 0b11001110: /* FMOV top half of 128-bit */
6799 case 0b11001111:
6800 if (!fp_access_check(s)) {
6801 return;
6803 itof = opcode & 1;
6804 handle_fmov(s, rd, rn, type, itof);
6805 break;
6807 case 0b00111110: /* FJCVTZS */
6808 if (!dc_isar_feature(aa64_jscvt, s)) {
6809 goto do_unallocated;
6810 } else if (fp_access_check(s)) {
6811 handle_fjcvtzs(s, rd, rn);
6813 break;
6815 default:
6816 do_unallocated:
6817 unallocated_encoding(s);
6818 return;
6820 break;
6824 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
6825 * 31 30 29 28 25 24 0
6826 * +---+---+---+---------+-----------------------------+
6827 * | | 0 | | 1 1 1 1 | |
6828 * +---+---+---+---------+-----------------------------+
6830 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
6832 if (extract32(insn, 24, 1)) {
6833 /* Floating point data-processing (3 source) */
6834 disas_fp_3src(s, insn);
6835 } else if (extract32(insn, 21, 1) == 0) {
6836 /* Floating point to fixed point conversions */
6837 disas_fp_fixed_conv(s, insn);
6838 } else {
6839 switch (extract32(insn, 10, 2)) {
6840 case 1:
6841 /* Floating point conditional compare */
6842 disas_fp_ccomp(s, insn);
6843 break;
6844 case 2:
6845 /* Floating point data-processing (2 source) */
6846 disas_fp_2src(s, insn);
6847 break;
6848 case 3:
6849 /* Floating point conditional select */
6850 disas_fp_csel(s, insn);
6851 break;
6852 case 0:
6853 switch (ctz32(extract32(insn, 12, 4))) {
6854 case 0: /* [15:12] == xxx1 */
6855 /* Floating point immediate */
6856 disas_fp_imm(s, insn);
6857 break;
6858 case 1: /* [15:12] == xx10 */
6859 /* Floating point compare */
6860 disas_fp_compare(s, insn);
6861 break;
6862 case 2: /* [15:12] == x100 */
6863 /* Floating point data-processing (1 source) */
6864 disas_fp_1src(s, insn);
6865 break;
6866 case 3: /* [15:12] == 1000 */
6867 unallocated_encoding(s);
6868 break;
6869 default: /* [15:12] == 0000 */
6870 /* Floating point <-> integer conversions */
6871 disas_fp_int_conv(s, insn);
6872 break;
6874 break;
6879 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
6880 int pos)
6882 /* Extract 64 bits from the middle of two concatenated 64 bit
6883 * vector register slices left:right. The extracted bits start
6884 * at 'pos' bits into the right (least significant) side.
6885 * We return the result in tcg_right, and guarantee not to
6886 * trash tcg_left.
6888 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
6889 assert(pos > 0 && pos < 64);
6891 tcg_gen_shri_i64(tcg_right, tcg_right, pos);
6892 tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
6893 tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
6895 tcg_temp_free_i64(tcg_tmp);
6898 /* EXT
6899 * 31 30 29 24 23 22 21 20 16 15 14 11 10 9 5 4 0
6900 * +---+---+-------------+-----+---+------+---+------+---+------+------+
6901 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 | Rm | 0 | imm4 | 0 | Rn | Rd |
6902 * +---+---+-------------+-----+---+------+---+------+---+------+------+
6904 static void disas_simd_ext(DisasContext *s, uint32_t insn)
6906 int is_q = extract32(insn, 30, 1);
6907 int op2 = extract32(insn, 22, 2);
6908 int imm4 = extract32(insn, 11, 4);
6909 int rm = extract32(insn, 16, 5);
6910 int rn = extract32(insn, 5, 5);
6911 int rd = extract32(insn, 0, 5);
6912 int pos = imm4 << 3;
6913 TCGv_i64 tcg_resl, tcg_resh;
6915 if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
6916 unallocated_encoding(s);
6917 return;
6920 if (!fp_access_check(s)) {
6921 return;
6924 tcg_resh = tcg_temp_new_i64();
6925 tcg_resl = tcg_temp_new_i64();
6927 /* Vd gets bits starting at pos bits into Vm:Vn. This is
6928 * either extracting 128 bits from a 128:128 concatenation, or
6929 * extracting 64 bits from a 64:64 concatenation.
6931 if (!is_q) {
6932 read_vec_element(s, tcg_resl, rn, 0, MO_64);
6933 if (pos != 0) {
6934 read_vec_element(s, tcg_resh, rm, 0, MO_64);
6935 do_ext64(s, tcg_resh, tcg_resl, pos);
6937 } else {
6938 TCGv_i64 tcg_hh;
6939 typedef struct {
6940 int reg;
6941 int elt;
6942 } EltPosns;
6943 EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
6944 EltPosns *elt = eltposns;
6946 if (pos >= 64) {
6947 elt++;
6948 pos -= 64;
6951 read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
6952 elt++;
6953 read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
6954 elt++;
6955 if (pos != 0) {
6956 do_ext64(s, tcg_resh, tcg_resl, pos);
6957 tcg_hh = tcg_temp_new_i64();
6958 read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
6959 do_ext64(s, tcg_hh, tcg_resh, pos);
6960 tcg_temp_free_i64(tcg_hh);
6964 write_vec_element(s, tcg_resl, rd, 0, MO_64);
6965 tcg_temp_free_i64(tcg_resl);
6966 if (is_q) {
6967 write_vec_element(s, tcg_resh, rd, 1, MO_64);
6969 tcg_temp_free_i64(tcg_resh);
6970 clear_vec_high(s, is_q, rd);
6973 /* TBL/TBX
6974 * 31 30 29 24 23 22 21 20 16 15 14 13 12 11 10 9 5 4 0
6975 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
6976 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 | Rm | 0 | len | op | 0 0 | Rn | Rd |
6977 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
6979 static void disas_simd_tb(DisasContext *s, uint32_t insn)
6981 int op2 = extract32(insn, 22, 2);
6982 int is_q = extract32(insn, 30, 1);
6983 int rm = extract32(insn, 16, 5);
6984 int rn = extract32(insn, 5, 5);
6985 int rd = extract32(insn, 0, 5);
6986 int is_tblx = extract32(insn, 12, 1);
6987 int len = extract32(insn, 13, 2);
6988 TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
6989 TCGv_i32 tcg_regno, tcg_numregs;
6991 if (op2 != 0) {
6992 unallocated_encoding(s);
6993 return;
6996 if (!fp_access_check(s)) {
6997 return;
7000 /* This does a table lookup: for every byte element in the input
7001 * we index into a table formed from up to four vector registers,
7002 * and then the output is the result of the lookups. Our helper
7003 * function does the lookup operation for a single 64 bit part of
7004 * the input.
7006 tcg_resl = tcg_temp_new_i64();
7007 tcg_resh = NULL;
7009 if (is_tblx) {
7010 read_vec_element(s, tcg_resl, rd, 0, MO_64);
7011 } else {
7012 tcg_gen_movi_i64(tcg_resl, 0);
7015 if (is_q) {
7016 tcg_resh = tcg_temp_new_i64();
7017 if (is_tblx) {
7018 read_vec_element(s, tcg_resh, rd, 1, MO_64);
7019 } else {
7020 tcg_gen_movi_i64(tcg_resh, 0);
7024 tcg_idx = tcg_temp_new_i64();
7025 tcg_regno = tcg_const_i32(rn);
7026 tcg_numregs = tcg_const_i32(len + 1);
7027 read_vec_element(s, tcg_idx, rm, 0, MO_64);
7028 gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
7029 tcg_regno, tcg_numregs);
7030 if (is_q) {
7031 read_vec_element(s, tcg_idx, rm, 1, MO_64);
7032 gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
7033 tcg_regno, tcg_numregs);
7035 tcg_temp_free_i64(tcg_idx);
7036 tcg_temp_free_i32(tcg_regno);
7037 tcg_temp_free_i32(tcg_numregs);
7039 write_vec_element(s, tcg_resl, rd, 0, MO_64);
7040 tcg_temp_free_i64(tcg_resl);
7042 if (is_q) {
7043 write_vec_element(s, tcg_resh, rd, 1, MO_64);
7044 tcg_temp_free_i64(tcg_resh);
7046 clear_vec_high(s, is_q, rd);
7049 /* ZIP/UZP/TRN
7050 * 31 30 29 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0
7051 * +---+---+-------------+------+---+------+---+------------------+------+
7052 * | 0 | Q | 0 0 1 1 1 0 | size | 0 | Rm | 0 | opc | 1 0 | Rn | Rd |
7053 * +---+---+-------------+------+---+------+---+------------------+------+
7055 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
7057 int rd = extract32(insn, 0, 5);
7058 int rn = extract32(insn, 5, 5);
7059 int rm = extract32(insn, 16, 5);
7060 int size = extract32(insn, 22, 2);
7061 /* opc field bits [1:0] indicate ZIP/UZP/TRN;
7062 * bit 2 indicates 1 vs 2 variant of the insn.
7064 int opcode = extract32(insn, 12, 2);
7065 bool part = extract32(insn, 14, 1);
7066 bool is_q = extract32(insn, 30, 1);
7067 int esize = 8 << size;
7068 int i, ofs;
7069 int datasize = is_q ? 128 : 64;
7070 int elements = datasize / esize;
7071 TCGv_i64 tcg_res, tcg_resl, tcg_resh;
7073 if (opcode == 0 || (size == 3 && !is_q)) {
7074 unallocated_encoding(s);
7075 return;
7078 if (!fp_access_check(s)) {
7079 return;
7082 tcg_resl = tcg_const_i64(0);
7083 tcg_resh = is_q ? tcg_const_i64(0) : NULL;
7084 tcg_res = tcg_temp_new_i64();
7086 for (i = 0; i < elements; i++) {
7087 switch (opcode) {
7088 case 1: /* UZP1/2 */
7090 int midpoint = elements / 2;
7091 if (i < midpoint) {
7092 read_vec_element(s, tcg_res, rn, 2 * i + part, size);
7093 } else {
7094 read_vec_element(s, tcg_res, rm,
7095 2 * (i - midpoint) + part, size);
7097 break;
7099 case 2: /* TRN1/2 */
7100 if (i & 1) {
7101 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
7102 } else {
7103 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
7105 break;
7106 case 3: /* ZIP1/2 */
7108 int base = part * elements / 2;
7109 if (i & 1) {
7110 read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
7111 } else {
7112 read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
7114 break;
7116 default:
7117 g_assert_not_reached();
7120 ofs = i * esize;
7121 if (ofs < 64) {
7122 tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
7123 tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
7124 } else {
7125 tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
7126 tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
7130 tcg_temp_free_i64(tcg_res);
7132 write_vec_element(s, tcg_resl, rd, 0, MO_64);
7133 tcg_temp_free_i64(tcg_resl);
7135 if (is_q) {
7136 write_vec_element(s, tcg_resh, rd, 1, MO_64);
7137 tcg_temp_free_i64(tcg_resh);
7139 clear_vec_high(s, is_q, rd);
7143 * do_reduction_op helper
7145 * This mirrors the Reduce() pseudocode in the ARM ARM. It is
7146 * important for correct NaN propagation that we do these
7147 * operations in exactly the order specified by the pseudocode.
7149 * This is a recursive function, TCG temps should be freed by the
7150 * calling function once it is done with the values.
7152 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
7153 int esize, int size, int vmap, TCGv_ptr fpst)
7155 if (esize == size) {
7156 int element;
7157 MemOp msize = esize == 16 ? MO_16 : MO_32;
7158 TCGv_i32 tcg_elem;
7160 /* We should have one register left here */
7161 assert(ctpop8(vmap) == 1);
7162 element = ctz32(vmap);
7163 assert(element < 8);
7165 tcg_elem = tcg_temp_new_i32();
7166 read_vec_element_i32(s, tcg_elem, rn, element, msize);
7167 return tcg_elem;
7168 } else {
7169 int bits = size / 2;
7170 int shift = ctpop8(vmap) / 2;
7171 int vmap_lo = (vmap >> shift) & vmap;
7172 int vmap_hi = (vmap & ~vmap_lo);
7173 TCGv_i32 tcg_hi, tcg_lo, tcg_res;
7175 tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
7176 tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
7177 tcg_res = tcg_temp_new_i32();
7179 switch (fpopcode) {
7180 case 0x0c: /* fmaxnmv half-precision */
7181 gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7182 break;
7183 case 0x0f: /* fmaxv half-precision */
7184 gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
7185 break;
7186 case 0x1c: /* fminnmv half-precision */
7187 gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7188 break;
7189 case 0x1f: /* fminv half-precision */
7190 gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
7191 break;
7192 case 0x2c: /* fmaxnmv */
7193 gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
7194 break;
7195 case 0x2f: /* fmaxv */
7196 gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
7197 break;
7198 case 0x3c: /* fminnmv */
7199 gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
7200 break;
7201 case 0x3f: /* fminv */
7202 gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
7203 break;
7204 default:
7205 g_assert_not_reached();
7208 tcg_temp_free_i32(tcg_hi);
7209 tcg_temp_free_i32(tcg_lo);
7210 return tcg_res;
7214 /* AdvSIMD across lanes
7215 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
7216 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7217 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd |
7218 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7220 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
7222 int rd = extract32(insn, 0, 5);
7223 int rn = extract32(insn, 5, 5);
7224 int size = extract32(insn, 22, 2);
7225 int opcode = extract32(insn, 12, 5);
7226 bool is_q = extract32(insn, 30, 1);
7227 bool is_u = extract32(insn, 29, 1);
7228 bool is_fp = false;
7229 bool is_min = false;
7230 int esize;
7231 int elements;
7232 int i;
7233 TCGv_i64 tcg_res, tcg_elt;
7235 switch (opcode) {
7236 case 0x1b: /* ADDV */
7237 if (is_u) {
7238 unallocated_encoding(s);
7239 return;
7241 /* fall through */
7242 case 0x3: /* SADDLV, UADDLV */
7243 case 0xa: /* SMAXV, UMAXV */
7244 case 0x1a: /* SMINV, UMINV */
7245 if (size == 3 || (size == 2 && !is_q)) {
7246 unallocated_encoding(s);
7247 return;
7249 break;
7250 case 0xc: /* FMAXNMV, FMINNMV */
7251 case 0xf: /* FMAXV, FMINV */
7252 /* Bit 1 of size field encodes min vs max and the actual size
7253 * depends on the encoding of the U bit. If not set (and FP16
7254 * enabled) then we do half-precision float instead of single
7255 * precision.
7257 is_min = extract32(size, 1, 1);
7258 is_fp = true;
7259 if (!is_u && dc_isar_feature(aa64_fp16, s)) {
7260 size = 1;
7261 } else if (!is_u || !is_q || extract32(size, 0, 1)) {
7262 unallocated_encoding(s);
7263 return;
7264 } else {
7265 size = 2;
7267 break;
7268 default:
7269 unallocated_encoding(s);
7270 return;
7273 if (!fp_access_check(s)) {
7274 return;
7277 esize = 8 << size;
7278 elements = (is_q ? 128 : 64) / esize;
7280 tcg_res = tcg_temp_new_i64();
7281 tcg_elt = tcg_temp_new_i64();
7283 /* These instructions operate across all lanes of a vector
7284 * to produce a single result. We can guarantee that a 64
7285 * bit intermediate is sufficient:
7286 * + for [US]ADDLV the maximum element size is 32 bits, and
7287 * the result type is 64 bits
7288 * + for FMAX*V, FMIN*V, ADDV the intermediate type is the
7289 * same as the element size, which is 32 bits at most
7290 * For the integer operations we can choose to work at 64
7291 * or 32 bits and truncate at the end; for simplicity
7292 * we use 64 bits always. The floating point
7293 * ops do require 32 bit intermediates, though.
7295 if (!is_fp) {
7296 read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
7298 for (i = 1; i < elements; i++) {
7299 read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
7301 switch (opcode) {
7302 case 0x03: /* SADDLV / UADDLV */
7303 case 0x1b: /* ADDV */
7304 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
7305 break;
7306 case 0x0a: /* SMAXV / UMAXV */
7307 if (is_u) {
7308 tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
7309 } else {
7310 tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
7312 break;
7313 case 0x1a: /* SMINV / UMINV */
7314 if (is_u) {
7315 tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
7316 } else {
7317 tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
7319 break;
7320 default:
7321 g_assert_not_reached();
7325 } else {
7326 /* Floating point vector reduction ops which work across 32
7327 * bit (single) or 16 bit (half-precision) intermediates.
7328 * Note that correct NaN propagation requires that we do these
7329 * operations in exactly the order specified by the pseudocode.
7331 TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16);
7332 int fpopcode = opcode | is_min << 4 | is_u << 5;
7333 int vmap = (1 << elements) - 1;
7334 TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
7335 (is_q ? 128 : 64), vmap, fpst);
7336 tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
7337 tcg_temp_free_i32(tcg_res32);
7338 tcg_temp_free_ptr(fpst);
7341 tcg_temp_free_i64(tcg_elt);
7343 /* Now truncate the result to the width required for the final output */
7344 if (opcode == 0x03) {
7345 /* SADDLV, UADDLV: result is 2*esize */
7346 size++;
7349 switch (size) {
7350 case 0:
7351 tcg_gen_ext8u_i64(tcg_res, tcg_res);
7352 break;
7353 case 1:
7354 tcg_gen_ext16u_i64(tcg_res, tcg_res);
7355 break;
7356 case 2:
7357 tcg_gen_ext32u_i64(tcg_res, tcg_res);
7358 break;
7359 case 3:
7360 break;
7361 default:
7362 g_assert_not_reached();
7365 write_fp_dreg(s, rd, tcg_res);
7366 tcg_temp_free_i64(tcg_res);
7369 /* DUP (Element, Vector)
7371 * 31 30 29 21 20 16 15 10 9 5 4 0
7372 * +---+---+-------------------+--------+-------------+------+------+
7373 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd |
7374 * +---+---+-------------------+--------+-------------+------+------+
7376 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7378 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
7379 int imm5)
7381 int size = ctz32(imm5);
7382 int index;
7384 if (size > 3 || (size == 3 && !is_q)) {
7385 unallocated_encoding(s);
7386 return;
7389 if (!fp_access_check(s)) {
7390 return;
7393 index = imm5 >> (size + 1);
7394 tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
7395 vec_reg_offset(s, rn, index, size),
7396 is_q ? 16 : 8, vec_full_reg_size(s));
7399 /* DUP (element, scalar)
7400 * 31 21 20 16 15 10 9 5 4 0
7401 * +-----------------------+--------+-------------+------+------+
7402 * | 0 1 0 1 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd |
7403 * +-----------------------+--------+-------------+------+------+
7405 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
7406 int imm5)
7408 int size = ctz32(imm5);
7409 int index;
7410 TCGv_i64 tmp;
7412 if (size > 3) {
7413 unallocated_encoding(s);
7414 return;
7417 if (!fp_access_check(s)) {
7418 return;
7421 index = imm5 >> (size + 1);
7423 /* This instruction just extracts the specified element and
7424 * zero-extends it into the bottom of the destination register.
7426 tmp = tcg_temp_new_i64();
7427 read_vec_element(s, tmp, rn, index, size);
7428 write_fp_dreg(s, rd, tmp);
7429 tcg_temp_free_i64(tmp);
7432 /* DUP (General)
7434 * 31 30 29 21 20 16 15 10 9 5 4 0
7435 * +---+---+-------------------+--------+-------------+------+------+
7436 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 1 1 | Rn | Rd |
7437 * +---+---+-------------------+--------+-------------+------+------+
7439 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7441 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
7442 int imm5)
7444 int size = ctz32(imm5);
7445 uint32_t dofs, oprsz, maxsz;
7447 if (size > 3 || ((size == 3) && !is_q)) {
7448 unallocated_encoding(s);
7449 return;
7452 if (!fp_access_check(s)) {
7453 return;
7456 dofs = vec_full_reg_offset(s, rd);
7457 oprsz = is_q ? 16 : 8;
7458 maxsz = vec_full_reg_size(s);
7460 tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
7463 /* INS (Element)
7465 * 31 21 20 16 15 14 11 10 9 5 4 0
7466 * +-----------------------+--------+------------+---+------+------+
7467 * | 0 1 1 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
7468 * +-----------------------+--------+------------+---+------+------+
7470 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7471 * index: encoded in imm5<4:size+1>
7473 static void handle_simd_inse(DisasContext *s, int rd, int rn,
7474 int imm4, int imm5)
7476 int size = ctz32(imm5);
7477 int src_index, dst_index;
7478 TCGv_i64 tmp;
7480 if (size > 3) {
7481 unallocated_encoding(s);
7482 return;
7485 if (!fp_access_check(s)) {
7486 return;
7489 dst_index = extract32(imm5, 1+size, 5);
7490 src_index = extract32(imm4, size, 4);
7492 tmp = tcg_temp_new_i64();
7494 read_vec_element(s, tmp, rn, src_index, size);
7495 write_vec_element(s, tmp, rd, dst_index, size);
7497 tcg_temp_free_i64(tmp);
7499 /* INS is considered a 128-bit write for SVE. */
7500 clear_vec_high(s, true, rd);
7504 /* INS (General)
7506 * 31 21 20 16 15 10 9 5 4 0
7507 * +-----------------------+--------+-------------+------+------+
7508 * | 0 1 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 1 1 1 | Rn | Rd |
7509 * +-----------------------+--------+-------------+------+------+
7511 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7512 * index: encoded in imm5<4:size+1>
7514 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
7516 int size = ctz32(imm5);
7517 int idx;
7519 if (size > 3) {
7520 unallocated_encoding(s);
7521 return;
7524 if (!fp_access_check(s)) {
7525 return;
7528 idx = extract32(imm5, 1 + size, 4 - size);
7529 write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
7531 /* INS is considered a 128-bit write for SVE. */
7532 clear_vec_high(s, true, rd);
7536 * UMOV (General)
7537 * SMOV (General)
7539 * 31 30 29 21 20 16 15 12 10 9 5 4 0
7540 * +---+---+-------------------+--------+-------------+------+------+
7541 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 1 U 1 1 | Rn | Rd |
7542 * +---+---+-------------------+--------+-------------+------+------+
7544 * U: unsigned when set
7545 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7547 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
7548 int rn, int rd, int imm5)
7550 int size = ctz32(imm5);
7551 int element;
7552 TCGv_i64 tcg_rd;
7554 /* Check for UnallocatedEncodings */
7555 if (is_signed) {
7556 if (size > 2 || (size == 2 && !is_q)) {
7557 unallocated_encoding(s);
7558 return;
7560 } else {
7561 if (size > 3
7562 || (size < 3 && is_q)
7563 || (size == 3 && !is_q)) {
7564 unallocated_encoding(s);
7565 return;
7569 if (!fp_access_check(s)) {
7570 return;
7573 element = extract32(imm5, 1+size, 4);
7575 tcg_rd = cpu_reg(s, rd);
7576 read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
7577 if (is_signed && !is_q) {
7578 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7582 /* AdvSIMD copy
7583 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0
7584 * +---+---+----+-----------------+------+---+------+---+------+------+
7585 * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
7586 * +---+---+----+-----------------+------+---+------+---+------+------+
7588 static void disas_simd_copy(DisasContext *s, uint32_t insn)
7590 int rd = extract32(insn, 0, 5);
7591 int rn = extract32(insn, 5, 5);
7592 int imm4 = extract32(insn, 11, 4);
7593 int op = extract32(insn, 29, 1);
7594 int is_q = extract32(insn, 30, 1);
7595 int imm5 = extract32(insn, 16, 5);
7597 if (op) {
7598 if (is_q) {
7599 /* INS (element) */
7600 handle_simd_inse(s, rd, rn, imm4, imm5);
7601 } else {
7602 unallocated_encoding(s);
7604 } else {
7605 switch (imm4) {
7606 case 0:
7607 /* DUP (element - vector) */
7608 handle_simd_dupe(s, is_q, rd, rn, imm5);
7609 break;
7610 case 1:
7611 /* DUP (general) */
7612 handle_simd_dupg(s, is_q, rd, rn, imm5);
7613 break;
7614 case 3:
7615 if (is_q) {
7616 /* INS (general) */
7617 handle_simd_insg(s, rd, rn, imm5);
7618 } else {
7619 unallocated_encoding(s);
7621 break;
7622 case 5:
7623 case 7:
7624 /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
7625 handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
7626 break;
7627 default:
7628 unallocated_encoding(s);
7629 break;
7634 /* AdvSIMD modified immediate
7635 * 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0
7636 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7637 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh | Rd |
7638 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7640 * There are a number of operations that can be carried out here:
7641 * MOVI - move (shifted) imm into register
7642 * MVNI - move inverted (shifted) imm into register
7643 * ORR - bitwise OR of (shifted) imm with register
7644 * BIC - bitwise clear of (shifted) imm with register
7645 * With ARMv8.2 we also have:
7646 * FMOV half-precision
7648 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
7650 int rd = extract32(insn, 0, 5);
7651 int cmode = extract32(insn, 12, 4);
7652 int cmode_3_1 = extract32(cmode, 1, 3);
7653 int cmode_0 = extract32(cmode, 0, 1);
7654 int o2 = extract32(insn, 11, 1);
7655 uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
7656 bool is_neg = extract32(insn, 29, 1);
7657 bool is_q = extract32(insn, 30, 1);
7658 uint64_t imm = 0;
7660 if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
7661 /* Check for FMOV (vector, immediate) - half-precision */
7662 if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) {
7663 unallocated_encoding(s);
7664 return;
7668 if (!fp_access_check(s)) {
7669 return;
7672 /* See AdvSIMDExpandImm() in ARM ARM */
7673 switch (cmode_3_1) {
7674 case 0: /* Replicate(Zeros(24):imm8, 2) */
7675 case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
7676 case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
7677 case 3: /* Replicate(imm8:Zeros(24), 2) */
7679 int shift = cmode_3_1 * 8;
7680 imm = bitfield_replicate(abcdefgh << shift, 32);
7681 break;
7683 case 4: /* Replicate(Zeros(8):imm8, 4) */
7684 case 5: /* Replicate(imm8:Zeros(8), 4) */
7686 int shift = (cmode_3_1 & 0x1) * 8;
7687 imm = bitfield_replicate(abcdefgh << shift, 16);
7688 break;
7690 case 6:
7691 if (cmode_0) {
7692 /* Replicate(Zeros(8):imm8:Ones(16), 2) */
7693 imm = (abcdefgh << 16) | 0xffff;
7694 } else {
7695 /* Replicate(Zeros(16):imm8:Ones(8), 2) */
7696 imm = (abcdefgh << 8) | 0xff;
7698 imm = bitfield_replicate(imm, 32);
7699 break;
7700 case 7:
7701 if (!cmode_0 && !is_neg) {
7702 imm = bitfield_replicate(abcdefgh, 8);
7703 } else if (!cmode_0 && is_neg) {
7704 int i;
7705 imm = 0;
7706 for (i = 0; i < 8; i++) {
7707 if ((abcdefgh) & (1 << i)) {
7708 imm |= 0xffULL << (i * 8);
7711 } else if (cmode_0) {
7712 if (is_neg) {
7713 imm = (abcdefgh & 0x3f) << 48;
7714 if (abcdefgh & 0x80) {
7715 imm |= 0x8000000000000000ULL;
7717 if (abcdefgh & 0x40) {
7718 imm |= 0x3fc0000000000000ULL;
7719 } else {
7720 imm |= 0x4000000000000000ULL;
7722 } else {
7723 if (o2) {
7724 /* FMOV (vector, immediate) - half-precision */
7725 imm = vfp_expand_imm(MO_16, abcdefgh);
7726 /* now duplicate across the lanes */
7727 imm = bitfield_replicate(imm, 16);
7728 } else {
7729 imm = (abcdefgh & 0x3f) << 19;
7730 if (abcdefgh & 0x80) {
7731 imm |= 0x80000000;
7733 if (abcdefgh & 0x40) {
7734 imm |= 0x3e000000;
7735 } else {
7736 imm |= 0x40000000;
7738 imm |= (imm << 32);
7742 break;
7743 default:
7744 fprintf(stderr, "%s: cmode_3_1: %x\n", __func__, cmode_3_1);
7745 g_assert_not_reached();
7748 if (cmode_3_1 != 7 && is_neg) {
7749 imm = ~imm;
7752 if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
7753 /* MOVI or MVNI, with MVNI negation handled above. */
7754 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8,
7755 vec_full_reg_size(s), imm);
7756 } else {
7757 /* ORR or BIC, with BIC negation to AND handled above. */
7758 if (is_neg) {
7759 gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
7760 } else {
7761 gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
7766 /* AdvSIMD scalar copy
7767 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0
7768 * +-----+----+-----------------+------+---+------+---+------+------+
7769 * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
7770 * +-----+----+-----------------+------+---+------+---+------+------+
7772 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
7774 int rd = extract32(insn, 0, 5);
7775 int rn = extract32(insn, 5, 5);
7776 int imm4 = extract32(insn, 11, 4);
7777 int imm5 = extract32(insn, 16, 5);
7778 int op = extract32(insn, 29, 1);
7780 if (op != 0 || imm4 != 0) {
7781 unallocated_encoding(s);
7782 return;
7785 /* DUP (element, scalar) */
7786 handle_simd_dupes(s, rd, rn, imm5);
7789 /* AdvSIMD scalar pairwise
7790 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
7791 * +-----+---+-----------+------+-----------+--------+-----+------+------+
7792 * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd |
7793 * +-----+---+-----------+------+-----------+--------+-----+------+------+
7795 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
7797 int u = extract32(insn, 29, 1);
7798 int size = extract32(insn, 22, 2);
7799 int opcode = extract32(insn, 12, 5);
7800 int rn = extract32(insn, 5, 5);
7801 int rd = extract32(insn, 0, 5);
7802 TCGv_ptr fpst;
7804 /* For some ops (the FP ones), size[1] is part of the encoding.
7805 * For ADDP strictly it is not but size[1] is always 1 for valid
7806 * encodings.
7808 opcode |= (extract32(size, 1, 1) << 5);
7810 switch (opcode) {
7811 case 0x3b: /* ADDP */
7812 if (u || size != 3) {
7813 unallocated_encoding(s);
7814 return;
7816 if (!fp_access_check(s)) {
7817 return;
7820 fpst = NULL;
7821 break;
7822 case 0xc: /* FMAXNMP */
7823 case 0xd: /* FADDP */
7824 case 0xf: /* FMAXP */
7825 case 0x2c: /* FMINNMP */
7826 case 0x2f: /* FMINP */
7827 /* FP op, size[0] is 32 or 64 bit*/
7828 if (!u) {
7829 if (!dc_isar_feature(aa64_fp16, s)) {
7830 unallocated_encoding(s);
7831 return;
7832 } else {
7833 size = MO_16;
7835 } else {
7836 size = extract32(size, 0, 1) ? MO_64 : MO_32;
7839 if (!fp_access_check(s)) {
7840 return;
7843 fpst = get_fpstatus_ptr(size == MO_16);
7844 break;
7845 default:
7846 unallocated_encoding(s);
7847 return;
7850 if (size == MO_64) {
7851 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7852 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7853 TCGv_i64 tcg_res = tcg_temp_new_i64();
7855 read_vec_element(s, tcg_op1, rn, 0, MO_64);
7856 read_vec_element(s, tcg_op2, rn, 1, MO_64);
7858 switch (opcode) {
7859 case 0x3b: /* ADDP */
7860 tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
7861 break;
7862 case 0xc: /* FMAXNMP */
7863 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7864 break;
7865 case 0xd: /* FADDP */
7866 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7867 break;
7868 case 0xf: /* FMAXP */
7869 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7870 break;
7871 case 0x2c: /* FMINNMP */
7872 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7873 break;
7874 case 0x2f: /* FMINP */
7875 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7876 break;
7877 default:
7878 g_assert_not_reached();
7881 write_fp_dreg(s, rd, tcg_res);
7883 tcg_temp_free_i64(tcg_op1);
7884 tcg_temp_free_i64(tcg_op2);
7885 tcg_temp_free_i64(tcg_res);
7886 } else {
7887 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7888 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7889 TCGv_i32 tcg_res = tcg_temp_new_i32();
7891 read_vec_element_i32(s, tcg_op1, rn, 0, size);
7892 read_vec_element_i32(s, tcg_op2, rn, 1, size);
7894 if (size == MO_16) {
7895 switch (opcode) {
7896 case 0xc: /* FMAXNMP */
7897 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
7898 break;
7899 case 0xd: /* FADDP */
7900 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
7901 break;
7902 case 0xf: /* FMAXP */
7903 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
7904 break;
7905 case 0x2c: /* FMINNMP */
7906 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
7907 break;
7908 case 0x2f: /* FMINP */
7909 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
7910 break;
7911 default:
7912 g_assert_not_reached();
7914 } else {
7915 switch (opcode) {
7916 case 0xc: /* FMAXNMP */
7917 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7918 break;
7919 case 0xd: /* FADDP */
7920 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7921 break;
7922 case 0xf: /* FMAXP */
7923 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7924 break;
7925 case 0x2c: /* FMINNMP */
7926 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7927 break;
7928 case 0x2f: /* FMINP */
7929 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7930 break;
7931 default:
7932 g_assert_not_reached();
7936 write_fp_sreg(s, rd, tcg_res);
7938 tcg_temp_free_i32(tcg_op1);
7939 tcg_temp_free_i32(tcg_op2);
7940 tcg_temp_free_i32(tcg_res);
7943 if (fpst) {
7944 tcg_temp_free_ptr(fpst);
7949 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
7951 * This code is handles the common shifting code and is used by both
7952 * the vector and scalar code.
7954 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
7955 TCGv_i64 tcg_rnd, bool accumulate,
7956 bool is_u, int size, int shift)
7958 bool extended_result = false;
7959 bool round = tcg_rnd != NULL;
7960 int ext_lshift = 0;
7961 TCGv_i64 tcg_src_hi;
7963 if (round && size == 3) {
7964 extended_result = true;
7965 ext_lshift = 64 - shift;
7966 tcg_src_hi = tcg_temp_new_i64();
7967 } else if (shift == 64) {
7968 if (!accumulate && is_u) {
7969 /* result is zero */
7970 tcg_gen_movi_i64(tcg_res, 0);
7971 return;
7975 /* Deal with the rounding step */
7976 if (round) {
7977 if (extended_result) {
7978 TCGv_i64 tcg_zero = tcg_const_i64(0);
7979 if (!is_u) {
7980 /* take care of sign extending tcg_res */
7981 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
7982 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
7983 tcg_src, tcg_src_hi,
7984 tcg_rnd, tcg_zero);
7985 } else {
7986 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
7987 tcg_src, tcg_zero,
7988 tcg_rnd, tcg_zero);
7990 tcg_temp_free_i64(tcg_zero);
7991 } else {
7992 tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
7996 /* Now do the shift right */
7997 if (round && extended_result) {
7998 /* extended case, >64 bit precision required */
7999 if (ext_lshift == 0) {
8000 /* special case, only high bits matter */
8001 tcg_gen_mov_i64(tcg_src, tcg_src_hi);
8002 } else {
8003 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
8004 tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
8005 tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
8007 } else {
8008 if (is_u) {
8009 if (shift == 64) {
8010 /* essentially shifting in 64 zeros */
8011 tcg_gen_movi_i64(tcg_src, 0);
8012 } else {
8013 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
8015 } else {
8016 if (shift == 64) {
8017 /* effectively extending the sign-bit */
8018 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
8019 } else {
8020 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
8025 if (accumulate) {
8026 tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
8027 } else {
8028 tcg_gen_mov_i64(tcg_res, tcg_src);
8031 if (extended_result) {
8032 tcg_temp_free_i64(tcg_src_hi);
8036 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
8037 static void handle_scalar_simd_shri(DisasContext *s,
8038 bool is_u, int immh, int immb,
8039 int opcode, int rn, int rd)
8041 const int size = 3;
8042 int immhb = immh << 3 | immb;
8043 int shift = 2 * (8 << size) - immhb;
8044 bool accumulate = false;
8045 bool round = false;
8046 bool insert = false;
8047 TCGv_i64 tcg_rn;
8048 TCGv_i64 tcg_rd;
8049 TCGv_i64 tcg_round;
8051 if (!extract32(immh, 3, 1)) {
8052 unallocated_encoding(s);
8053 return;
8056 if (!fp_access_check(s)) {
8057 return;
8060 switch (opcode) {
8061 case 0x02: /* SSRA / USRA (accumulate) */
8062 accumulate = true;
8063 break;
8064 case 0x04: /* SRSHR / URSHR (rounding) */
8065 round = true;
8066 break;
8067 case 0x06: /* SRSRA / URSRA (accum + rounding) */
8068 accumulate = round = true;
8069 break;
8070 case 0x08: /* SRI */
8071 insert = true;
8072 break;
8075 if (round) {
8076 uint64_t round_const = 1ULL << (shift - 1);
8077 tcg_round = tcg_const_i64(round_const);
8078 } else {
8079 tcg_round = NULL;
8082 tcg_rn = read_fp_dreg(s, rn);
8083 tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
8085 if (insert) {
8086 /* shift count same as element size is valid but does nothing;
8087 * special case to avoid potential shift by 64.
8089 int esize = 8 << size;
8090 if (shift != esize) {
8091 tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
8092 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
8094 } else {
8095 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8096 accumulate, is_u, size, shift);
8099 write_fp_dreg(s, rd, tcg_rd);
8101 tcg_temp_free_i64(tcg_rn);
8102 tcg_temp_free_i64(tcg_rd);
8103 if (round) {
8104 tcg_temp_free_i64(tcg_round);
8108 /* SHL/SLI - Scalar shift left */
8109 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
8110 int immh, int immb, int opcode,
8111 int rn, int rd)
8113 int size = 32 - clz32(immh) - 1;
8114 int immhb = immh << 3 | immb;
8115 int shift = immhb - (8 << size);
8116 TCGv_i64 tcg_rn = new_tmp_a64(s);
8117 TCGv_i64 tcg_rd = new_tmp_a64(s);
8119 if (!extract32(immh, 3, 1)) {
8120 unallocated_encoding(s);
8121 return;
8124 if (!fp_access_check(s)) {
8125 return;
8128 tcg_rn = read_fp_dreg(s, rn);
8129 tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
8131 if (insert) {
8132 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
8133 } else {
8134 tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
8137 write_fp_dreg(s, rd, tcg_rd);
8139 tcg_temp_free_i64(tcg_rn);
8140 tcg_temp_free_i64(tcg_rd);
8143 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
8144 * (signed/unsigned) narrowing */
8145 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
8146 bool is_u_shift, bool is_u_narrow,
8147 int immh, int immb, int opcode,
8148 int rn, int rd)
8150 int immhb = immh << 3 | immb;
8151 int size = 32 - clz32(immh) - 1;
8152 int esize = 8 << size;
8153 int shift = (2 * esize) - immhb;
8154 int elements = is_scalar ? 1 : (64 / esize);
8155 bool round = extract32(opcode, 0, 1);
8156 MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
8157 TCGv_i64 tcg_rn, tcg_rd, tcg_round;
8158 TCGv_i32 tcg_rd_narrowed;
8159 TCGv_i64 tcg_final;
8161 static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
8162 { gen_helper_neon_narrow_sat_s8,
8163 gen_helper_neon_unarrow_sat8 },
8164 { gen_helper_neon_narrow_sat_s16,
8165 gen_helper_neon_unarrow_sat16 },
8166 { gen_helper_neon_narrow_sat_s32,
8167 gen_helper_neon_unarrow_sat32 },
8168 { NULL, NULL },
8170 static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
8171 gen_helper_neon_narrow_sat_u8,
8172 gen_helper_neon_narrow_sat_u16,
8173 gen_helper_neon_narrow_sat_u32,
8174 NULL
8176 NeonGenNarrowEnvFn *narrowfn;
8178 int i;
8180 assert(size < 4);
8182 if (extract32(immh, 3, 1)) {
8183 unallocated_encoding(s);
8184 return;
8187 if (!fp_access_check(s)) {
8188 return;
8191 if (is_u_shift) {
8192 narrowfn = unsigned_narrow_fns[size];
8193 } else {
8194 narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
8197 tcg_rn = tcg_temp_new_i64();
8198 tcg_rd = tcg_temp_new_i64();
8199 tcg_rd_narrowed = tcg_temp_new_i32();
8200 tcg_final = tcg_const_i64(0);
8202 if (round) {
8203 uint64_t round_const = 1ULL << (shift - 1);
8204 tcg_round = tcg_const_i64(round_const);
8205 } else {
8206 tcg_round = NULL;
8209 for (i = 0; i < elements; i++) {
8210 read_vec_element(s, tcg_rn, rn, i, ldop);
8211 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8212 false, is_u_shift, size+1, shift);
8213 narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
8214 tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
8215 tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8218 if (!is_q) {
8219 write_vec_element(s, tcg_final, rd, 0, MO_64);
8220 } else {
8221 write_vec_element(s, tcg_final, rd, 1, MO_64);
8224 if (round) {
8225 tcg_temp_free_i64(tcg_round);
8227 tcg_temp_free_i64(tcg_rn);
8228 tcg_temp_free_i64(tcg_rd);
8229 tcg_temp_free_i32(tcg_rd_narrowed);
8230 tcg_temp_free_i64(tcg_final);
8232 clear_vec_high(s, is_q, rd);
8235 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
8236 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
8237 bool src_unsigned, bool dst_unsigned,
8238 int immh, int immb, int rn, int rd)
8240 int immhb = immh << 3 | immb;
8241 int size = 32 - clz32(immh) - 1;
8242 int shift = immhb - (8 << size);
8243 int pass;
8245 assert(immh != 0);
8246 assert(!(scalar && is_q));
8248 if (!scalar) {
8249 if (!is_q && extract32(immh, 3, 1)) {
8250 unallocated_encoding(s);
8251 return;
8254 /* Since we use the variable-shift helpers we must
8255 * replicate the shift count into each element of
8256 * the tcg_shift value.
8258 switch (size) {
8259 case 0:
8260 shift |= shift << 8;
8261 /* fall through */
8262 case 1:
8263 shift |= shift << 16;
8264 break;
8265 case 2:
8266 case 3:
8267 break;
8268 default:
8269 g_assert_not_reached();
8273 if (!fp_access_check(s)) {
8274 return;
8277 if (size == 3) {
8278 TCGv_i64 tcg_shift = tcg_const_i64(shift);
8279 static NeonGenTwo64OpEnvFn * const fns[2][2] = {
8280 { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
8281 { NULL, gen_helper_neon_qshl_u64 },
8283 NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
8284 int maxpass = is_q ? 2 : 1;
8286 for (pass = 0; pass < maxpass; pass++) {
8287 TCGv_i64 tcg_op = tcg_temp_new_i64();
8289 read_vec_element(s, tcg_op, rn, pass, MO_64);
8290 genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8291 write_vec_element(s, tcg_op, rd, pass, MO_64);
8293 tcg_temp_free_i64(tcg_op);
8295 tcg_temp_free_i64(tcg_shift);
8296 clear_vec_high(s, is_q, rd);
8297 } else {
8298 TCGv_i32 tcg_shift = tcg_const_i32(shift);
8299 static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
8301 { gen_helper_neon_qshl_s8,
8302 gen_helper_neon_qshl_s16,
8303 gen_helper_neon_qshl_s32 },
8304 { gen_helper_neon_qshlu_s8,
8305 gen_helper_neon_qshlu_s16,
8306 gen_helper_neon_qshlu_s32 }
8307 }, {
8308 { NULL, NULL, NULL },
8309 { gen_helper_neon_qshl_u8,
8310 gen_helper_neon_qshl_u16,
8311 gen_helper_neon_qshl_u32 }
8314 NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
8315 MemOp memop = scalar ? size : MO_32;
8316 int maxpass = scalar ? 1 : is_q ? 4 : 2;
8318 for (pass = 0; pass < maxpass; pass++) {
8319 TCGv_i32 tcg_op = tcg_temp_new_i32();
8321 read_vec_element_i32(s, tcg_op, rn, pass, memop);
8322 genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8323 if (scalar) {
8324 switch (size) {
8325 case 0:
8326 tcg_gen_ext8u_i32(tcg_op, tcg_op);
8327 break;
8328 case 1:
8329 tcg_gen_ext16u_i32(tcg_op, tcg_op);
8330 break;
8331 case 2:
8332 break;
8333 default:
8334 g_assert_not_reached();
8336 write_fp_sreg(s, rd, tcg_op);
8337 } else {
8338 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
8341 tcg_temp_free_i32(tcg_op);
8343 tcg_temp_free_i32(tcg_shift);
8345 if (!scalar) {
8346 clear_vec_high(s, is_q, rd);
8351 /* Common vector code for handling integer to FP conversion */
8352 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
8353 int elements, int is_signed,
8354 int fracbits, int size)
8356 TCGv_ptr tcg_fpst = get_fpstatus_ptr(size == MO_16);
8357 TCGv_i32 tcg_shift = NULL;
8359 MemOp mop = size | (is_signed ? MO_SIGN : 0);
8360 int pass;
8362 if (fracbits || size == MO_64) {
8363 tcg_shift = tcg_const_i32(fracbits);
8366 if (size == MO_64) {
8367 TCGv_i64 tcg_int64 = tcg_temp_new_i64();
8368 TCGv_i64 tcg_double = tcg_temp_new_i64();
8370 for (pass = 0; pass < elements; pass++) {
8371 read_vec_element(s, tcg_int64, rn, pass, mop);
8373 if (is_signed) {
8374 gen_helper_vfp_sqtod(tcg_double, tcg_int64,
8375 tcg_shift, tcg_fpst);
8376 } else {
8377 gen_helper_vfp_uqtod(tcg_double, tcg_int64,
8378 tcg_shift, tcg_fpst);
8380 if (elements == 1) {
8381 write_fp_dreg(s, rd, tcg_double);
8382 } else {
8383 write_vec_element(s, tcg_double, rd, pass, MO_64);
8387 tcg_temp_free_i64(tcg_int64);
8388 tcg_temp_free_i64(tcg_double);
8390 } else {
8391 TCGv_i32 tcg_int32 = tcg_temp_new_i32();
8392 TCGv_i32 tcg_float = tcg_temp_new_i32();
8394 for (pass = 0; pass < elements; pass++) {
8395 read_vec_element_i32(s, tcg_int32, rn, pass, mop);
8397 switch (size) {
8398 case MO_32:
8399 if (fracbits) {
8400 if (is_signed) {
8401 gen_helper_vfp_sltos(tcg_float, tcg_int32,
8402 tcg_shift, tcg_fpst);
8403 } else {
8404 gen_helper_vfp_ultos(tcg_float, tcg_int32,
8405 tcg_shift, tcg_fpst);
8407 } else {
8408 if (is_signed) {
8409 gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
8410 } else {
8411 gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
8414 break;
8415 case MO_16:
8416 if (fracbits) {
8417 if (is_signed) {
8418 gen_helper_vfp_sltoh(tcg_float, tcg_int32,
8419 tcg_shift, tcg_fpst);
8420 } else {
8421 gen_helper_vfp_ultoh(tcg_float, tcg_int32,
8422 tcg_shift, tcg_fpst);
8424 } else {
8425 if (is_signed) {
8426 gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
8427 } else {
8428 gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
8431 break;
8432 default:
8433 g_assert_not_reached();
8436 if (elements == 1) {
8437 write_fp_sreg(s, rd, tcg_float);
8438 } else {
8439 write_vec_element_i32(s, tcg_float, rd, pass, size);
8443 tcg_temp_free_i32(tcg_int32);
8444 tcg_temp_free_i32(tcg_float);
8447 tcg_temp_free_ptr(tcg_fpst);
8448 if (tcg_shift) {
8449 tcg_temp_free_i32(tcg_shift);
8452 clear_vec_high(s, elements << size == 16, rd);
8455 /* UCVTF/SCVTF - Integer to FP conversion */
8456 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
8457 bool is_q, bool is_u,
8458 int immh, int immb, int opcode,
8459 int rn, int rd)
8461 int size, elements, fracbits;
8462 int immhb = immh << 3 | immb;
8464 if (immh & 8) {
8465 size = MO_64;
8466 if (!is_scalar && !is_q) {
8467 unallocated_encoding(s);
8468 return;
8470 } else if (immh & 4) {
8471 size = MO_32;
8472 } else if (immh & 2) {
8473 size = MO_16;
8474 if (!dc_isar_feature(aa64_fp16, s)) {
8475 unallocated_encoding(s);
8476 return;
8478 } else {
8479 /* immh == 0 would be a failure of the decode logic */
8480 g_assert(immh == 1);
8481 unallocated_encoding(s);
8482 return;
8485 if (is_scalar) {
8486 elements = 1;
8487 } else {
8488 elements = (8 << is_q) >> size;
8490 fracbits = (16 << size) - immhb;
8492 if (!fp_access_check(s)) {
8493 return;
8496 handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
8499 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
8500 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
8501 bool is_q, bool is_u,
8502 int immh, int immb, int rn, int rd)
8504 int immhb = immh << 3 | immb;
8505 int pass, size, fracbits;
8506 TCGv_ptr tcg_fpstatus;
8507 TCGv_i32 tcg_rmode, tcg_shift;
8509 if (immh & 0x8) {
8510 size = MO_64;
8511 if (!is_scalar && !is_q) {
8512 unallocated_encoding(s);
8513 return;
8515 } else if (immh & 0x4) {
8516 size = MO_32;
8517 } else if (immh & 0x2) {
8518 size = MO_16;
8519 if (!dc_isar_feature(aa64_fp16, s)) {
8520 unallocated_encoding(s);
8521 return;
8523 } else {
8524 /* Should have split out AdvSIMD modified immediate earlier. */
8525 assert(immh == 1);
8526 unallocated_encoding(s);
8527 return;
8530 if (!fp_access_check(s)) {
8531 return;
8534 assert(!(is_scalar && is_q));
8536 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
8537 tcg_fpstatus = get_fpstatus_ptr(size == MO_16);
8538 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
8539 fracbits = (16 << size) - immhb;
8540 tcg_shift = tcg_const_i32(fracbits);
8542 if (size == MO_64) {
8543 int maxpass = is_scalar ? 1 : 2;
8545 for (pass = 0; pass < maxpass; pass++) {
8546 TCGv_i64 tcg_op = tcg_temp_new_i64();
8548 read_vec_element(s, tcg_op, rn, pass, MO_64);
8549 if (is_u) {
8550 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8551 } else {
8552 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8554 write_vec_element(s, tcg_op, rd, pass, MO_64);
8555 tcg_temp_free_i64(tcg_op);
8557 clear_vec_high(s, is_q, rd);
8558 } else {
8559 void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
8560 int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
8562 switch (size) {
8563 case MO_16:
8564 if (is_u) {
8565 fn = gen_helper_vfp_touhh;
8566 } else {
8567 fn = gen_helper_vfp_toshh;
8569 break;
8570 case MO_32:
8571 if (is_u) {
8572 fn = gen_helper_vfp_touls;
8573 } else {
8574 fn = gen_helper_vfp_tosls;
8576 break;
8577 default:
8578 g_assert_not_reached();
8581 for (pass = 0; pass < maxpass; pass++) {
8582 TCGv_i32 tcg_op = tcg_temp_new_i32();
8584 read_vec_element_i32(s, tcg_op, rn, pass, size);
8585 fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8586 if (is_scalar) {
8587 write_fp_sreg(s, rd, tcg_op);
8588 } else {
8589 write_vec_element_i32(s, tcg_op, rd, pass, size);
8591 tcg_temp_free_i32(tcg_op);
8593 if (!is_scalar) {
8594 clear_vec_high(s, is_q, rd);
8598 tcg_temp_free_ptr(tcg_fpstatus);
8599 tcg_temp_free_i32(tcg_shift);
8600 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
8601 tcg_temp_free_i32(tcg_rmode);
8604 /* AdvSIMD scalar shift by immediate
8605 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0
8606 * +-----+---+-------------+------+------+--------+---+------+------+
8607 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd |
8608 * +-----+---+-------------+------+------+--------+---+------+------+
8610 * This is the scalar version so it works on a fixed sized registers
8612 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
8614 int rd = extract32(insn, 0, 5);
8615 int rn = extract32(insn, 5, 5);
8616 int opcode = extract32(insn, 11, 5);
8617 int immb = extract32(insn, 16, 3);
8618 int immh = extract32(insn, 19, 4);
8619 bool is_u = extract32(insn, 29, 1);
8621 if (immh == 0) {
8622 unallocated_encoding(s);
8623 return;
8626 switch (opcode) {
8627 case 0x08: /* SRI */
8628 if (!is_u) {
8629 unallocated_encoding(s);
8630 return;
8632 /* fall through */
8633 case 0x00: /* SSHR / USHR */
8634 case 0x02: /* SSRA / USRA */
8635 case 0x04: /* SRSHR / URSHR */
8636 case 0x06: /* SRSRA / URSRA */
8637 handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
8638 break;
8639 case 0x0a: /* SHL / SLI */
8640 handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
8641 break;
8642 case 0x1c: /* SCVTF, UCVTF */
8643 handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
8644 opcode, rn, rd);
8645 break;
8646 case 0x10: /* SQSHRUN, SQSHRUN2 */
8647 case 0x11: /* SQRSHRUN, SQRSHRUN2 */
8648 if (!is_u) {
8649 unallocated_encoding(s);
8650 return;
8652 handle_vec_simd_sqshrn(s, true, false, false, true,
8653 immh, immb, opcode, rn, rd);
8654 break;
8655 case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
8656 case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
8657 handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
8658 immh, immb, opcode, rn, rd);
8659 break;
8660 case 0xc: /* SQSHLU */
8661 if (!is_u) {
8662 unallocated_encoding(s);
8663 return;
8665 handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
8666 break;
8667 case 0xe: /* SQSHL, UQSHL */
8668 handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
8669 break;
8670 case 0x1f: /* FCVTZS, FCVTZU */
8671 handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
8672 break;
8673 default:
8674 unallocated_encoding(s);
8675 break;
8679 /* AdvSIMD scalar three different
8680 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
8681 * +-----+---+-----------+------+---+------+--------+-----+------+------+
8682 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd |
8683 * +-----+---+-----------+------+---+------+--------+-----+------+------+
8685 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
8687 bool is_u = extract32(insn, 29, 1);
8688 int size = extract32(insn, 22, 2);
8689 int opcode = extract32(insn, 12, 4);
8690 int rm = extract32(insn, 16, 5);
8691 int rn = extract32(insn, 5, 5);
8692 int rd = extract32(insn, 0, 5);
8694 if (is_u) {
8695 unallocated_encoding(s);
8696 return;
8699 switch (opcode) {
8700 case 0x9: /* SQDMLAL, SQDMLAL2 */
8701 case 0xb: /* SQDMLSL, SQDMLSL2 */
8702 case 0xd: /* SQDMULL, SQDMULL2 */
8703 if (size == 0 || size == 3) {
8704 unallocated_encoding(s);
8705 return;
8707 break;
8708 default:
8709 unallocated_encoding(s);
8710 return;
8713 if (!fp_access_check(s)) {
8714 return;
8717 if (size == 2) {
8718 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8719 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8720 TCGv_i64 tcg_res = tcg_temp_new_i64();
8722 read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
8723 read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
8725 tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
8726 gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
8728 switch (opcode) {
8729 case 0xd: /* SQDMULL, SQDMULL2 */
8730 break;
8731 case 0xb: /* SQDMLSL, SQDMLSL2 */
8732 tcg_gen_neg_i64(tcg_res, tcg_res);
8733 /* fall through */
8734 case 0x9: /* SQDMLAL, SQDMLAL2 */
8735 read_vec_element(s, tcg_op1, rd, 0, MO_64);
8736 gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
8737 tcg_res, tcg_op1);
8738 break;
8739 default:
8740 g_assert_not_reached();
8743 write_fp_dreg(s, rd, tcg_res);
8745 tcg_temp_free_i64(tcg_op1);
8746 tcg_temp_free_i64(tcg_op2);
8747 tcg_temp_free_i64(tcg_res);
8748 } else {
8749 TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
8750 TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
8751 TCGv_i64 tcg_res = tcg_temp_new_i64();
8753 gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
8754 gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
8756 switch (opcode) {
8757 case 0xd: /* SQDMULL, SQDMULL2 */
8758 break;
8759 case 0xb: /* SQDMLSL, SQDMLSL2 */
8760 gen_helper_neon_negl_u32(tcg_res, tcg_res);
8761 /* fall through */
8762 case 0x9: /* SQDMLAL, SQDMLAL2 */
8764 TCGv_i64 tcg_op3 = tcg_temp_new_i64();
8765 read_vec_element(s, tcg_op3, rd, 0, MO_32);
8766 gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
8767 tcg_res, tcg_op3);
8768 tcg_temp_free_i64(tcg_op3);
8769 break;
8771 default:
8772 g_assert_not_reached();
8775 tcg_gen_ext32u_i64(tcg_res, tcg_res);
8776 write_fp_dreg(s, rd, tcg_res);
8778 tcg_temp_free_i32(tcg_op1);
8779 tcg_temp_free_i32(tcg_op2);
8780 tcg_temp_free_i64(tcg_res);
8784 static void handle_3same_64(DisasContext *s, int opcode, bool u,
8785 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
8787 /* Handle 64x64->64 opcodes which are shared between the scalar
8788 * and vector 3-same groups. We cover every opcode where size == 3
8789 * is valid in either the three-reg-same (integer, not pairwise)
8790 * or scalar-three-reg-same groups.
8792 TCGCond cond;
8794 switch (opcode) {
8795 case 0x1: /* SQADD */
8796 if (u) {
8797 gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8798 } else {
8799 gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8801 break;
8802 case 0x5: /* SQSUB */
8803 if (u) {
8804 gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8805 } else {
8806 gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8808 break;
8809 case 0x6: /* CMGT, CMHI */
8810 /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
8811 * We implement this using setcond (test) and then negating.
8813 cond = u ? TCG_COND_GTU : TCG_COND_GT;
8814 do_cmop:
8815 tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
8816 tcg_gen_neg_i64(tcg_rd, tcg_rd);
8817 break;
8818 case 0x7: /* CMGE, CMHS */
8819 cond = u ? TCG_COND_GEU : TCG_COND_GE;
8820 goto do_cmop;
8821 case 0x11: /* CMTST, CMEQ */
8822 if (u) {
8823 cond = TCG_COND_EQ;
8824 goto do_cmop;
8826 gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm);
8827 break;
8828 case 0x8: /* SSHL, USHL */
8829 if (u) {
8830 gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm);
8831 } else {
8832 gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm);
8834 break;
8835 case 0x9: /* SQSHL, UQSHL */
8836 if (u) {
8837 gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8838 } else {
8839 gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8841 break;
8842 case 0xa: /* SRSHL, URSHL */
8843 if (u) {
8844 gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
8845 } else {
8846 gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
8848 break;
8849 case 0xb: /* SQRSHL, UQRSHL */
8850 if (u) {
8851 gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8852 } else {
8853 gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8855 break;
8856 case 0x10: /* ADD, SUB */
8857 if (u) {
8858 tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
8859 } else {
8860 tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
8862 break;
8863 default:
8864 g_assert_not_reached();
8868 /* Handle the 3-same-operands float operations; shared by the scalar
8869 * and vector encodings. The caller must filter out any encodings
8870 * not allocated for the encoding it is dealing with.
8872 static void handle_3same_float(DisasContext *s, int size, int elements,
8873 int fpopcode, int rd, int rn, int rm)
8875 int pass;
8876 TCGv_ptr fpst = get_fpstatus_ptr(false);
8878 for (pass = 0; pass < elements; pass++) {
8879 if (size) {
8880 /* Double */
8881 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8882 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8883 TCGv_i64 tcg_res = tcg_temp_new_i64();
8885 read_vec_element(s, tcg_op1, rn, pass, MO_64);
8886 read_vec_element(s, tcg_op2, rm, pass, MO_64);
8888 switch (fpopcode) {
8889 case 0x39: /* FMLS */
8890 /* As usual for ARM, separate negation for fused multiply-add */
8891 gen_helper_vfp_negd(tcg_op1, tcg_op1);
8892 /* fall through */
8893 case 0x19: /* FMLA */
8894 read_vec_element(s, tcg_res, rd, pass, MO_64);
8895 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
8896 tcg_res, fpst);
8897 break;
8898 case 0x18: /* FMAXNM */
8899 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8900 break;
8901 case 0x1a: /* FADD */
8902 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
8903 break;
8904 case 0x1b: /* FMULX */
8905 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
8906 break;
8907 case 0x1c: /* FCMEQ */
8908 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8909 break;
8910 case 0x1e: /* FMAX */
8911 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
8912 break;
8913 case 0x1f: /* FRECPS */
8914 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8915 break;
8916 case 0x38: /* FMINNM */
8917 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8918 break;
8919 case 0x3a: /* FSUB */
8920 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
8921 break;
8922 case 0x3e: /* FMIN */
8923 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
8924 break;
8925 case 0x3f: /* FRSQRTS */
8926 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8927 break;
8928 case 0x5b: /* FMUL */
8929 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
8930 break;
8931 case 0x5c: /* FCMGE */
8932 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8933 break;
8934 case 0x5d: /* FACGE */
8935 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8936 break;
8937 case 0x5f: /* FDIV */
8938 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
8939 break;
8940 case 0x7a: /* FABD */
8941 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
8942 gen_helper_vfp_absd(tcg_res, tcg_res);
8943 break;
8944 case 0x7c: /* FCMGT */
8945 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8946 break;
8947 case 0x7d: /* FACGT */
8948 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8949 break;
8950 default:
8951 g_assert_not_reached();
8954 write_vec_element(s, tcg_res, rd, pass, MO_64);
8956 tcg_temp_free_i64(tcg_res);
8957 tcg_temp_free_i64(tcg_op1);
8958 tcg_temp_free_i64(tcg_op2);
8959 } else {
8960 /* Single */
8961 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8962 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8963 TCGv_i32 tcg_res = tcg_temp_new_i32();
8965 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
8966 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
8968 switch (fpopcode) {
8969 case 0x39: /* FMLS */
8970 /* As usual for ARM, separate negation for fused multiply-add */
8971 gen_helper_vfp_negs(tcg_op1, tcg_op1);
8972 /* fall through */
8973 case 0x19: /* FMLA */
8974 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8975 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
8976 tcg_res, fpst);
8977 break;
8978 case 0x1a: /* FADD */
8979 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
8980 break;
8981 case 0x1b: /* FMULX */
8982 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
8983 break;
8984 case 0x1c: /* FCMEQ */
8985 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8986 break;
8987 case 0x1e: /* FMAX */
8988 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
8989 break;
8990 case 0x1f: /* FRECPS */
8991 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8992 break;
8993 case 0x18: /* FMAXNM */
8994 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
8995 break;
8996 case 0x38: /* FMINNM */
8997 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
8998 break;
8999 case 0x3a: /* FSUB */
9000 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
9001 break;
9002 case 0x3e: /* FMIN */
9003 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
9004 break;
9005 case 0x3f: /* FRSQRTS */
9006 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9007 break;
9008 case 0x5b: /* FMUL */
9009 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
9010 break;
9011 case 0x5c: /* FCMGE */
9012 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9013 break;
9014 case 0x5d: /* FACGE */
9015 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9016 break;
9017 case 0x5f: /* FDIV */
9018 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
9019 break;
9020 case 0x7a: /* FABD */
9021 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
9022 gen_helper_vfp_abss(tcg_res, tcg_res);
9023 break;
9024 case 0x7c: /* FCMGT */
9025 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9026 break;
9027 case 0x7d: /* FACGT */
9028 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9029 break;
9030 default:
9031 g_assert_not_reached();
9034 if (elements == 1) {
9035 /* scalar single so clear high part */
9036 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9038 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
9039 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
9040 tcg_temp_free_i64(tcg_tmp);
9041 } else {
9042 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9045 tcg_temp_free_i32(tcg_res);
9046 tcg_temp_free_i32(tcg_op1);
9047 tcg_temp_free_i32(tcg_op2);
9051 tcg_temp_free_ptr(fpst);
9053 clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
9056 /* AdvSIMD scalar three same
9057 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0
9058 * +-----+---+-----------+------+---+------+--------+---+------+------+
9059 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd |
9060 * +-----+---+-----------+------+---+------+--------+---+------+------+
9062 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
9064 int rd = extract32(insn, 0, 5);
9065 int rn = extract32(insn, 5, 5);
9066 int opcode = extract32(insn, 11, 5);
9067 int rm = extract32(insn, 16, 5);
9068 int size = extract32(insn, 22, 2);
9069 bool u = extract32(insn, 29, 1);
9070 TCGv_i64 tcg_rd;
9072 if (opcode >= 0x18) {
9073 /* Floating point: U, size[1] and opcode indicate operation */
9074 int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
9075 switch (fpopcode) {
9076 case 0x1b: /* FMULX */
9077 case 0x1f: /* FRECPS */
9078 case 0x3f: /* FRSQRTS */
9079 case 0x5d: /* FACGE */
9080 case 0x7d: /* FACGT */
9081 case 0x1c: /* FCMEQ */
9082 case 0x5c: /* FCMGE */
9083 case 0x7c: /* FCMGT */
9084 case 0x7a: /* FABD */
9085 break;
9086 default:
9087 unallocated_encoding(s);
9088 return;
9091 if (!fp_access_check(s)) {
9092 return;
9095 handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
9096 return;
9099 switch (opcode) {
9100 case 0x1: /* SQADD, UQADD */
9101 case 0x5: /* SQSUB, UQSUB */
9102 case 0x9: /* SQSHL, UQSHL */
9103 case 0xb: /* SQRSHL, UQRSHL */
9104 break;
9105 case 0x8: /* SSHL, USHL */
9106 case 0xa: /* SRSHL, URSHL */
9107 case 0x6: /* CMGT, CMHI */
9108 case 0x7: /* CMGE, CMHS */
9109 case 0x11: /* CMTST, CMEQ */
9110 case 0x10: /* ADD, SUB (vector) */
9111 if (size != 3) {
9112 unallocated_encoding(s);
9113 return;
9115 break;
9116 case 0x16: /* SQDMULH, SQRDMULH (vector) */
9117 if (size != 1 && size != 2) {
9118 unallocated_encoding(s);
9119 return;
9121 break;
9122 default:
9123 unallocated_encoding(s);
9124 return;
9127 if (!fp_access_check(s)) {
9128 return;
9131 tcg_rd = tcg_temp_new_i64();
9133 if (size == 3) {
9134 TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
9135 TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
9137 handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
9138 tcg_temp_free_i64(tcg_rn);
9139 tcg_temp_free_i64(tcg_rm);
9140 } else {
9141 /* Do a single operation on the lowest element in the vector.
9142 * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
9143 * no side effects for all these operations.
9144 * OPTME: special-purpose helpers would avoid doing some
9145 * unnecessary work in the helper for the 8 and 16 bit cases.
9147 NeonGenTwoOpEnvFn *genenvfn;
9148 TCGv_i32 tcg_rn = tcg_temp_new_i32();
9149 TCGv_i32 tcg_rm = tcg_temp_new_i32();
9150 TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
9152 read_vec_element_i32(s, tcg_rn, rn, 0, size);
9153 read_vec_element_i32(s, tcg_rm, rm, 0, size);
9155 switch (opcode) {
9156 case 0x1: /* SQADD, UQADD */
9158 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9159 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9160 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9161 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9163 genenvfn = fns[size][u];
9164 break;
9166 case 0x5: /* SQSUB, UQSUB */
9168 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9169 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9170 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9171 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9173 genenvfn = fns[size][u];
9174 break;
9176 case 0x9: /* SQSHL, UQSHL */
9178 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9179 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9180 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9181 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9183 genenvfn = fns[size][u];
9184 break;
9186 case 0xb: /* SQRSHL, UQRSHL */
9188 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9189 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9190 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9191 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9193 genenvfn = fns[size][u];
9194 break;
9196 case 0x16: /* SQDMULH, SQRDMULH */
9198 static NeonGenTwoOpEnvFn * const fns[2][2] = {
9199 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9200 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9202 assert(size == 1 || size == 2);
9203 genenvfn = fns[size - 1][u];
9204 break;
9206 default:
9207 g_assert_not_reached();
9210 genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
9211 tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
9212 tcg_temp_free_i32(tcg_rd32);
9213 tcg_temp_free_i32(tcg_rn);
9214 tcg_temp_free_i32(tcg_rm);
9217 write_fp_dreg(s, rd, tcg_rd);
9219 tcg_temp_free_i64(tcg_rd);
9222 /* AdvSIMD scalar three same FP16
9223 * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0
9224 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9225 * | 0 1 | U | 1 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd |
9226 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9227 * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
9228 * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
9230 static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
9231 uint32_t insn)
9233 int rd = extract32(insn, 0, 5);
9234 int rn = extract32(insn, 5, 5);
9235 int opcode = extract32(insn, 11, 3);
9236 int rm = extract32(insn, 16, 5);
9237 bool u = extract32(insn, 29, 1);
9238 bool a = extract32(insn, 23, 1);
9239 int fpopcode = opcode | (a << 3) | (u << 4);
9240 TCGv_ptr fpst;
9241 TCGv_i32 tcg_op1;
9242 TCGv_i32 tcg_op2;
9243 TCGv_i32 tcg_res;
9245 switch (fpopcode) {
9246 case 0x03: /* FMULX */
9247 case 0x04: /* FCMEQ (reg) */
9248 case 0x07: /* FRECPS */
9249 case 0x0f: /* FRSQRTS */
9250 case 0x14: /* FCMGE (reg) */
9251 case 0x15: /* FACGE */
9252 case 0x1a: /* FABD */
9253 case 0x1c: /* FCMGT (reg) */
9254 case 0x1d: /* FACGT */
9255 break;
9256 default:
9257 unallocated_encoding(s);
9258 return;
9261 if (!dc_isar_feature(aa64_fp16, s)) {
9262 unallocated_encoding(s);
9265 if (!fp_access_check(s)) {
9266 return;
9269 fpst = get_fpstatus_ptr(true);
9271 tcg_op1 = read_fp_hreg(s, rn);
9272 tcg_op2 = read_fp_hreg(s, rm);
9273 tcg_res = tcg_temp_new_i32();
9275 switch (fpopcode) {
9276 case 0x03: /* FMULX */
9277 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
9278 break;
9279 case 0x04: /* FCMEQ (reg) */
9280 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9281 break;
9282 case 0x07: /* FRECPS */
9283 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9284 break;
9285 case 0x0f: /* FRSQRTS */
9286 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9287 break;
9288 case 0x14: /* FCMGE (reg) */
9289 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9290 break;
9291 case 0x15: /* FACGE */
9292 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9293 break;
9294 case 0x1a: /* FABD */
9295 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
9296 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
9297 break;
9298 case 0x1c: /* FCMGT (reg) */
9299 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9300 break;
9301 case 0x1d: /* FACGT */
9302 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9303 break;
9304 default:
9305 g_assert_not_reached();
9308 write_fp_sreg(s, rd, tcg_res);
9311 tcg_temp_free_i32(tcg_res);
9312 tcg_temp_free_i32(tcg_op1);
9313 tcg_temp_free_i32(tcg_op2);
9314 tcg_temp_free_ptr(fpst);
9317 /* AdvSIMD scalar three same extra
9318 * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0
9319 * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9320 * | 0 1 | U | 1 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd |
9321 * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9323 static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
9324 uint32_t insn)
9326 int rd = extract32(insn, 0, 5);
9327 int rn = extract32(insn, 5, 5);
9328 int opcode = extract32(insn, 11, 4);
9329 int rm = extract32(insn, 16, 5);
9330 int size = extract32(insn, 22, 2);
9331 bool u = extract32(insn, 29, 1);
9332 TCGv_i32 ele1, ele2, ele3;
9333 TCGv_i64 res;
9334 bool feature;
9336 switch (u * 16 + opcode) {
9337 case 0x10: /* SQRDMLAH (vector) */
9338 case 0x11: /* SQRDMLSH (vector) */
9339 if (size != 1 && size != 2) {
9340 unallocated_encoding(s);
9341 return;
9343 feature = dc_isar_feature(aa64_rdm, s);
9344 break;
9345 default:
9346 unallocated_encoding(s);
9347 return;
9349 if (!feature) {
9350 unallocated_encoding(s);
9351 return;
9353 if (!fp_access_check(s)) {
9354 return;
9357 /* Do a single operation on the lowest element in the vector.
9358 * We use the standard Neon helpers and rely on 0 OP 0 == 0
9359 * with no side effects for all these operations.
9360 * OPTME: special-purpose helpers would avoid doing some
9361 * unnecessary work in the helper for the 16 bit cases.
9363 ele1 = tcg_temp_new_i32();
9364 ele2 = tcg_temp_new_i32();
9365 ele3 = tcg_temp_new_i32();
9367 read_vec_element_i32(s, ele1, rn, 0, size);
9368 read_vec_element_i32(s, ele2, rm, 0, size);
9369 read_vec_element_i32(s, ele3, rd, 0, size);
9371 switch (opcode) {
9372 case 0x0: /* SQRDMLAH */
9373 if (size == 1) {
9374 gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3);
9375 } else {
9376 gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3);
9378 break;
9379 case 0x1: /* SQRDMLSH */
9380 if (size == 1) {
9381 gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3);
9382 } else {
9383 gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3);
9385 break;
9386 default:
9387 g_assert_not_reached();
9389 tcg_temp_free_i32(ele1);
9390 tcg_temp_free_i32(ele2);
9392 res = tcg_temp_new_i64();
9393 tcg_gen_extu_i32_i64(res, ele3);
9394 tcg_temp_free_i32(ele3);
9396 write_fp_dreg(s, rd, res);
9397 tcg_temp_free_i64(res);
9400 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
9401 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
9402 TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
9404 /* Handle 64->64 opcodes which are shared between the scalar and
9405 * vector 2-reg-misc groups. We cover every integer opcode where size == 3
9406 * is valid in either group and also the double-precision fp ops.
9407 * The caller only need provide tcg_rmode and tcg_fpstatus if the op
9408 * requires them.
9410 TCGCond cond;
9412 switch (opcode) {
9413 case 0x4: /* CLS, CLZ */
9414 if (u) {
9415 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
9416 } else {
9417 tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
9419 break;
9420 case 0x5: /* NOT */
9421 /* This opcode is shared with CNT and RBIT but we have earlier
9422 * enforced that size == 3 if and only if this is the NOT insn.
9424 tcg_gen_not_i64(tcg_rd, tcg_rn);
9425 break;
9426 case 0x7: /* SQABS, SQNEG */
9427 if (u) {
9428 gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
9429 } else {
9430 gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
9432 break;
9433 case 0xa: /* CMLT */
9434 /* 64 bit integer comparison against zero, result is
9435 * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
9436 * subtracting 1.
9438 cond = TCG_COND_LT;
9439 do_cmop:
9440 tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
9441 tcg_gen_neg_i64(tcg_rd, tcg_rd);
9442 break;
9443 case 0x8: /* CMGT, CMGE */
9444 cond = u ? TCG_COND_GE : TCG_COND_GT;
9445 goto do_cmop;
9446 case 0x9: /* CMEQ, CMLE */
9447 cond = u ? TCG_COND_LE : TCG_COND_EQ;
9448 goto do_cmop;
9449 case 0xb: /* ABS, NEG */
9450 if (u) {
9451 tcg_gen_neg_i64(tcg_rd, tcg_rn);
9452 } else {
9453 tcg_gen_abs_i64(tcg_rd, tcg_rn);
9455 break;
9456 case 0x2f: /* FABS */
9457 gen_helper_vfp_absd(tcg_rd, tcg_rn);
9458 break;
9459 case 0x6f: /* FNEG */
9460 gen_helper_vfp_negd(tcg_rd, tcg_rn);
9461 break;
9462 case 0x7f: /* FSQRT */
9463 gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
9464 break;
9465 case 0x1a: /* FCVTNS */
9466 case 0x1b: /* FCVTMS */
9467 case 0x1c: /* FCVTAS */
9468 case 0x3a: /* FCVTPS */
9469 case 0x3b: /* FCVTZS */
9471 TCGv_i32 tcg_shift = tcg_const_i32(0);
9472 gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
9473 tcg_temp_free_i32(tcg_shift);
9474 break;
9476 case 0x5a: /* FCVTNU */
9477 case 0x5b: /* FCVTMU */
9478 case 0x5c: /* FCVTAU */
9479 case 0x7a: /* FCVTPU */
9480 case 0x7b: /* FCVTZU */
9482 TCGv_i32 tcg_shift = tcg_const_i32(0);
9483 gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
9484 tcg_temp_free_i32(tcg_shift);
9485 break;
9487 case 0x18: /* FRINTN */
9488 case 0x19: /* FRINTM */
9489 case 0x38: /* FRINTP */
9490 case 0x39: /* FRINTZ */
9491 case 0x58: /* FRINTA */
9492 case 0x79: /* FRINTI */
9493 gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
9494 break;
9495 case 0x59: /* FRINTX */
9496 gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
9497 break;
9498 case 0x1e: /* FRINT32Z */
9499 case 0x5e: /* FRINT32X */
9500 gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus);
9501 break;
9502 case 0x1f: /* FRINT64Z */
9503 case 0x5f: /* FRINT64X */
9504 gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus);
9505 break;
9506 default:
9507 g_assert_not_reached();
9511 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
9512 bool is_scalar, bool is_u, bool is_q,
9513 int size, int rn, int rd)
9515 bool is_double = (size == MO_64);
9516 TCGv_ptr fpst;
9518 if (!fp_access_check(s)) {
9519 return;
9522 fpst = get_fpstatus_ptr(size == MO_16);
9524 if (is_double) {
9525 TCGv_i64 tcg_op = tcg_temp_new_i64();
9526 TCGv_i64 tcg_zero = tcg_const_i64(0);
9527 TCGv_i64 tcg_res = tcg_temp_new_i64();
9528 NeonGenTwoDoubleOPFn *genfn;
9529 bool swap = false;
9530 int pass;
9532 switch (opcode) {
9533 case 0x2e: /* FCMLT (zero) */
9534 swap = true;
9535 /* fallthrough */
9536 case 0x2c: /* FCMGT (zero) */
9537 genfn = gen_helper_neon_cgt_f64;
9538 break;
9539 case 0x2d: /* FCMEQ (zero) */
9540 genfn = gen_helper_neon_ceq_f64;
9541 break;
9542 case 0x6d: /* FCMLE (zero) */
9543 swap = true;
9544 /* fall through */
9545 case 0x6c: /* FCMGE (zero) */
9546 genfn = gen_helper_neon_cge_f64;
9547 break;
9548 default:
9549 g_assert_not_reached();
9552 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9553 read_vec_element(s, tcg_op, rn, pass, MO_64);
9554 if (swap) {
9555 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9556 } else {
9557 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9559 write_vec_element(s, tcg_res, rd, pass, MO_64);
9561 tcg_temp_free_i64(tcg_res);
9562 tcg_temp_free_i64(tcg_zero);
9563 tcg_temp_free_i64(tcg_op);
9565 clear_vec_high(s, !is_scalar, rd);
9566 } else {
9567 TCGv_i32 tcg_op = tcg_temp_new_i32();
9568 TCGv_i32 tcg_zero = tcg_const_i32(0);
9569 TCGv_i32 tcg_res = tcg_temp_new_i32();
9570 NeonGenTwoSingleOPFn *genfn;
9571 bool swap = false;
9572 int pass, maxpasses;
9574 if (size == MO_16) {
9575 switch (opcode) {
9576 case 0x2e: /* FCMLT (zero) */
9577 swap = true;
9578 /* fall through */
9579 case 0x2c: /* FCMGT (zero) */
9580 genfn = gen_helper_advsimd_cgt_f16;
9581 break;
9582 case 0x2d: /* FCMEQ (zero) */
9583 genfn = gen_helper_advsimd_ceq_f16;
9584 break;
9585 case 0x6d: /* FCMLE (zero) */
9586 swap = true;
9587 /* fall through */
9588 case 0x6c: /* FCMGE (zero) */
9589 genfn = gen_helper_advsimd_cge_f16;
9590 break;
9591 default:
9592 g_assert_not_reached();
9594 } else {
9595 switch (opcode) {
9596 case 0x2e: /* FCMLT (zero) */
9597 swap = true;
9598 /* fall through */
9599 case 0x2c: /* FCMGT (zero) */
9600 genfn = gen_helper_neon_cgt_f32;
9601 break;
9602 case 0x2d: /* FCMEQ (zero) */
9603 genfn = gen_helper_neon_ceq_f32;
9604 break;
9605 case 0x6d: /* FCMLE (zero) */
9606 swap = true;
9607 /* fall through */
9608 case 0x6c: /* FCMGE (zero) */
9609 genfn = gen_helper_neon_cge_f32;
9610 break;
9611 default:
9612 g_assert_not_reached();
9616 if (is_scalar) {
9617 maxpasses = 1;
9618 } else {
9619 int vector_size = 8 << is_q;
9620 maxpasses = vector_size >> size;
9623 for (pass = 0; pass < maxpasses; pass++) {
9624 read_vec_element_i32(s, tcg_op, rn, pass, size);
9625 if (swap) {
9626 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9627 } else {
9628 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9630 if (is_scalar) {
9631 write_fp_sreg(s, rd, tcg_res);
9632 } else {
9633 write_vec_element_i32(s, tcg_res, rd, pass, size);
9636 tcg_temp_free_i32(tcg_res);
9637 tcg_temp_free_i32(tcg_zero);
9638 tcg_temp_free_i32(tcg_op);
9639 if (!is_scalar) {
9640 clear_vec_high(s, is_q, rd);
9644 tcg_temp_free_ptr(fpst);
9647 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
9648 bool is_scalar, bool is_u, bool is_q,
9649 int size, int rn, int rd)
9651 bool is_double = (size == 3);
9652 TCGv_ptr fpst = get_fpstatus_ptr(false);
9654 if (is_double) {
9655 TCGv_i64 tcg_op = tcg_temp_new_i64();
9656 TCGv_i64 tcg_res = tcg_temp_new_i64();
9657 int pass;
9659 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9660 read_vec_element(s, tcg_op, rn, pass, MO_64);
9661 switch (opcode) {
9662 case 0x3d: /* FRECPE */
9663 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
9664 break;
9665 case 0x3f: /* FRECPX */
9666 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
9667 break;
9668 case 0x7d: /* FRSQRTE */
9669 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
9670 break;
9671 default:
9672 g_assert_not_reached();
9674 write_vec_element(s, tcg_res, rd, pass, MO_64);
9676 tcg_temp_free_i64(tcg_res);
9677 tcg_temp_free_i64(tcg_op);
9678 clear_vec_high(s, !is_scalar, rd);
9679 } else {
9680 TCGv_i32 tcg_op = tcg_temp_new_i32();
9681 TCGv_i32 tcg_res = tcg_temp_new_i32();
9682 int pass, maxpasses;
9684 if (is_scalar) {
9685 maxpasses = 1;
9686 } else {
9687 maxpasses = is_q ? 4 : 2;
9690 for (pass = 0; pass < maxpasses; pass++) {
9691 read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
9693 switch (opcode) {
9694 case 0x3c: /* URECPE */
9695 gen_helper_recpe_u32(tcg_res, tcg_op);
9696 break;
9697 case 0x3d: /* FRECPE */
9698 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
9699 break;
9700 case 0x3f: /* FRECPX */
9701 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
9702 break;
9703 case 0x7d: /* FRSQRTE */
9704 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
9705 break;
9706 default:
9707 g_assert_not_reached();
9710 if (is_scalar) {
9711 write_fp_sreg(s, rd, tcg_res);
9712 } else {
9713 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9716 tcg_temp_free_i32(tcg_res);
9717 tcg_temp_free_i32(tcg_op);
9718 if (!is_scalar) {
9719 clear_vec_high(s, is_q, rd);
9722 tcg_temp_free_ptr(fpst);
9725 static void handle_2misc_narrow(DisasContext *s, bool scalar,
9726 int opcode, bool u, bool is_q,
9727 int size, int rn, int rd)
9729 /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
9730 * in the source becomes a size element in the destination).
9732 int pass;
9733 TCGv_i32 tcg_res[2];
9734 int destelt = is_q ? 2 : 0;
9735 int passes = scalar ? 1 : 2;
9737 if (scalar) {
9738 tcg_res[1] = tcg_const_i32(0);
9741 for (pass = 0; pass < passes; pass++) {
9742 TCGv_i64 tcg_op = tcg_temp_new_i64();
9743 NeonGenNarrowFn *genfn = NULL;
9744 NeonGenNarrowEnvFn *genenvfn = NULL;
9746 if (scalar) {
9747 read_vec_element(s, tcg_op, rn, pass, size + 1);
9748 } else {
9749 read_vec_element(s, tcg_op, rn, pass, MO_64);
9751 tcg_res[pass] = tcg_temp_new_i32();
9753 switch (opcode) {
9754 case 0x12: /* XTN, SQXTUN */
9756 static NeonGenNarrowFn * const xtnfns[3] = {
9757 gen_helper_neon_narrow_u8,
9758 gen_helper_neon_narrow_u16,
9759 tcg_gen_extrl_i64_i32,
9761 static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
9762 gen_helper_neon_unarrow_sat8,
9763 gen_helper_neon_unarrow_sat16,
9764 gen_helper_neon_unarrow_sat32,
9766 if (u) {
9767 genenvfn = sqxtunfns[size];
9768 } else {
9769 genfn = xtnfns[size];
9771 break;
9773 case 0x14: /* SQXTN, UQXTN */
9775 static NeonGenNarrowEnvFn * const fns[3][2] = {
9776 { gen_helper_neon_narrow_sat_s8,
9777 gen_helper_neon_narrow_sat_u8 },
9778 { gen_helper_neon_narrow_sat_s16,
9779 gen_helper_neon_narrow_sat_u16 },
9780 { gen_helper_neon_narrow_sat_s32,
9781 gen_helper_neon_narrow_sat_u32 },
9783 genenvfn = fns[size][u];
9784 break;
9786 case 0x16: /* FCVTN, FCVTN2 */
9787 /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
9788 if (size == 2) {
9789 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
9790 } else {
9791 TCGv_i32 tcg_lo = tcg_temp_new_i32();
9792 TCGv_i32 tcg_hi = tcg_temp_new_i32();
9793 TCGv_ptr fpst = get_fpstatus_ptr(false);
9794 TCGv_i32 ahp = get_ahp_flag();
9796 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
9797 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9798 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9799 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
9800 tcg_temp_free_i32(tcg_lo);
9801 tcg_temp_free_i32(tcg_hi);
9802 tcg_temp_free_ptr(fpst);
9803 tcg_temp_free_i32(ahp);
9805 break;
9806 case 0x56: /* FCVTXN, FCVTXN2 */
9807 /* 64 bit to 32 bit float conversion
9808 * with von Neumann rounding (round to odd)
9810 assert(size == 2);
9811 gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
9812 break;
9813 default:
9814 g_assert_not_reached();
9817 if (genfn) {
9818 genfn(tcg_res[pass], tcg_op);
9819 } else if (genenvfn) {
9820 genenvfn(tcg_res[pass], cpu_env, tcg_op);
9823 tcg_temp_free_i64(tcg_op);
9826 for (pass = 0; pass < 2; pass++) {
9827 write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
9828 tcg_temp_free_i32(tcg_res[pass]);
9830 clear_vec_high(s, is_q, rd);
9833 /* Remaining saturating accumulating ops */
9834 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
9835 bool is_q, int size, int rn, int rd)
9837 bool is_double = (size == 3);
9839 if (is_double) {
9840 TCGv_i64 tcg_rn = tcg_temp_new_i64();
9841 TCGv_i64 tcg_rd = tcg_temp_new_i64();
9842 int pass;
9844 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9845 read_vec_element(s, tcg_rn, rn, pass, MO_64);
9846 read_vec_element(s, tcg_rd, rd, pass, MO_64);
9848 if (is_u) { /* USQADD */
9849 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9850 } else { /* SUQADD */
9851 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9853 write_vec_element(s, tcg_rd, rd, pass, MO_64);
9855 tcg_temp_free_i64(tcg_rd);
9856 tcg_temp_free_i64(tcg_rn);
9857 clear_vec_high(s, !is_scalar, rd);
9858 } else {
9859 TCGv_i32 tcg_rn = tcg_temp_new_i32();
9860 TCGv_i32 tcg_rd = tcg_temp_new_i32();
9861 int pass, maxpasses;
9863 if (is_scalar) {
9864 maxpasses = 1;
9865 } else {
9866 maxpasses = is_q ? 4 : 2;
9869 for (pass = 0; pass < maxpasses; pass++) {
9870 if (is_scalar) {
9871 read_vec_element_i32(s, tcg_rn, rn, pass, size);
9872 read_vec_element_i32(s, tcg_rd, rd, pass, size);
9873 } else {
9874 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
9875 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9878 if (is_u) { /* USQADD */
9879 switch (size) {
9880 case 0:
9881 gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9882 break;
9883 case 1:
9884 gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9885 break;
9886 case 2:
9887 gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9888 break;
9889 default:
9890 g_assert_not_reached();
9892 } else { /* SUQADD */
9893 switch (size) {
9894 case 0:
9895 gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9896 break;
9897 case 1:
9898 gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9899 break;
9900 case 2:
9901 gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9902 break;
9903 default:
9904 g_assert_not_reached();
9908 if (is_scalar) {
9909 TCGv_i64 tcg_zero = tcg_const_i64(0);
9910 write_vec_element(s, tcg_zero, rd, 0, MO_64);
9911 tcg_temp_free_i64(tcg_zero);
9913 write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9915 tcg_temp_free_i32(tcg_rd);
9916 tcg_temp_free_i32(tcg_rn);
9917 clear_vec_high(s, is_q, rd);
9921 /* AdvSIMD scalar two reg misc
9922 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
9923 * +-----+---+-----------+------+-----------+--------+-----+------+------+
9924 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd |
9925 * +-----+---+-----------+------+-----------+--------+-----+------+------+
9927 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
9929 int rd = extract32(insn, 0, 5);
9930 int rn = extract32(insn, 5, 5);
9931 int opcode = extract32(insn, 12, 5);
9932 int size = extract32(insn, 22, 2);
9933 bool u = extract32(insn, 29, 1);
9934 bool is_fcvt = false;
9935 int rmode;
9936 TCGv_i32 tcg_rmode;
9937 TCGv_ptr tcg_fpstatus;
9939 switch (opcode) {
9940 case 0x3: /* USQADD / SUQADD*/
9941 if (!fp_access_check(s)) {
9942 return;
9944 handle_2misc_satacc(s, true, u, false, size, rn, rd);
9945 return;
9946 case 0x7: /* SQABS / SQNEG */
9947 break;
9948 case 0xa: /* CMLT */
9949 if (u) {
9950 unallocated_encoding(s);
9951 return;
9953 /* fall through */
9954 case 0x8: /* CMGT, CMGE */
9955 case 0x9: /* CMEQ, CMLE */
9956 case 0xb: /* ABS, NEG */
9957 if (size != 3) {
9958 unallocated_encoding(s);
9959 return;
9961 break;
9962 case 0x12: /* SQXTUN */
9963 if (!u) {
9964 unallocated_encoding(s);
9965 return;
9967 /* fall through */
9968 case 0x14: /* SQXTN, UQXTN */
9969 if (size == 3) {
9970 unallocated_encoding(s);
9971 return;
9973 if (!fp_access_check(s)) {
9974 return;
9976 handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
9977 return;
9978 case 0xc ... 0xf:
9979 case 0x16 ... 0x1d:
9980 case 0x1f:
9981 /* Floating point: U, size[1] and opcode indicate operation;
9982 * size[0] indicates single or double precision.
9984 opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
9985 size = extract32(size, 0, 1) ? 3 : 2;
9986 switch (opcode) {
9987 case 0x2c: /* FCMGT (zero) */
9988 case 0x2d: /* FCMEQ (zero) */
9989 case 0x2e: /* FCMLT (zero) */
9990 case 0x6c: /* FCMGE (zero) */
9991 case 0x6d: /* FCMLE (zero) */
9992 handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
9993 return;
9994 case 0x1d: /* SCVTF */
9995 case 0x5d: /* UCVTF */
9997 bool is_signed = (opcode == 0x1d);
9998 if (!fp_access_check(s)) {
9999 return;
10001 handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
10002 return;
10004 case 0x3d: /* FRECPE */
10005 case 0x3f: /* FRECPX */
10006 case 0x7d: /* FRSQRTE */
10007 if (!fp_access_check(s)) {
10008 return;
10010 handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
10011 return;
10012 case 0x1a: /* FCVTNS */
10013 case 0x1b: /* FCVTMS */
10014 case 0x3a: /* FCVTPS */
10015 case 0x3b: /* FCVTZS */
10016 case 0x5a: /* FCVTNU */
10017 case 0x5b: /* FCVTMU */
10018 case 0x7a: /* FCVTPU */
10019 case 0x7b: /* FCVTZU */
10020 is_fcvt = true;
10021 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10022 break;
10023 case 0x1c: /* FCVTAS */
10024 case 0x5c: /* FCVTAU */
10025 /* TIEAWAY doesn't fit in the usual rounding mode encoding */
10026 is_fcvt = true;
10027 rmode = FPROUNDING_TIEAWAY;
10028 break;
10029 case 0x56: /* FCVTXN, FCVTXN2 */
10030 if (size == 2) {
10031 unallocated_encoding(s);
10032 return;
10034 if (!fp_access_check(s)) {
10035 return;
10037 handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
10038 return;
10039 default:
10040 unallocated_encoding(s);
10041 return;
10043 break;
10044 default:
10045 unallocated_encoding(s);
10046 return;
10049 if (!fp_access_check(s)) {
10050 return;
10053 if (is_fcvt) {
10054 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
10055 tcg_fpstatus = get_fpstatus_ptr(false);
10056 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
10057 } else {
10058 tcg_rmode = NULL;
10059 tcg_fpstatus = NULL;
10062 if (size == 3) {
10063 TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
10064 TCGv_i64 tcg_rd = tcg_temp_new_i64();
10066 handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
10067 write_fp_dreg(s, rd, tcg_rd);
10068 tcg_temp_free_i64(tcg_rd);
10069 tcg_temp_free_i64(tcg_rn);
10070 } else {
10071 TCGv_i32 tcg_rn = tcg_temp_new_i32();
10072 TCGv_i32 tcg_rd = tcg_temp_new_i32();
10074 read_vec_element_i32(s, tcg_rn, rn, 0, size);
10076 switch (opcode) {
10077 case 0x7: /* SQABS, SQNEG */
10079 NeonGenOneOpEnvFn *genfn;
10080 static NeonGenOneOpEnvFn * const fns[3][2] = {
10081 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10082 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10083 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
10085 genfn = fns[size][u];
10086 genfn(tcg_rd, cpu_env, tcg_rn);
10087 break;
10089 case 0x1a: /* FCVTNS */
10090 case 0x1b: /* FCVTMS */
10091 case 0x1c: /* FCVTAS */
10092 case 0x3a: /* FCVTPS */
10093 case 0x3b: /* FCVTZS */
10095 TCGv_i32 tcg_shift = tcg_const_i32(0);
10096 gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
10097 tcg_temp_free_i32(tcg_shift);
10098 break;
10100 case 0x5a: /* FCVTNU */
10101 case 0x5b: /* FCVTMU */
10102 case 0x5c: /* FCVTAU */
10103 case 0x7a: /* FCVTPU */
10104 case 0x7b: /* FCVTZU */
10106 TCGv_i32 tcg_shift = tcg_const_i32(0);
10107 gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
10108 tcg_temp_free_i32(tcg_shift);
10109 break;
10111 default:
10112 g_assert_not_reached();
10115 write_fp_sreg(s, rd, tcg_rd);
10116 tcg_temp_free_i32(tcg_rd);
10117 tcg_temp_free_i32(tcg_rn);
10120 if (is_fcvt) {
10121 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
10122 tcg_temp_free_i32(tcg_rmode);
10123 tcg_temp_free_ptr(tcg_fpstatus);
10127 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
10128 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
10129 int immh, int immb, int opcode, int rn, int rd)
10131 int size = 32 - clz32(immh) - 1;
10132 int immhb = immh << 3 | immb;
10133 int shift = 2 * (8 << size) - immhb;
10134 GVecGen2iFn *gvec_fn;
10136 if (extract32(immh, 3, 1) && !is_q) {
10137 unallocated_encoding(s);
10138 return;
10140 tcg_debug_assert(size <= 3);
10142 if (!fp_access_check(s)) {
10143 return;
10146 switch (opcode) {
10147 case 0x02: /* SSRA / USRA (accumulate) */
10148 gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra;
10149 break;
10151 case 0x08: /* SRI */
10152 gvec_fn = gen_gvec_sri;
10153 break;
10155 case 0x00: /* SSHR / USHR */
10156 if (is_u) {
10157 if (shift == 8 << size) {
10158 /* Shift count the same size as element size produces zero. */
10159 tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd),
10160 is_q ? 16 : 8, vec_full_reg_size(s), 0);
10161 return;
10163 gvec_fn = tcg_gen_gvec_shri;
10164 } else {
10165 /* Shift count the same size as element size produces all sign. */
10166 if (shift == 8 << size) {
10167 shift -= 1;
10169 gvec_fn = tcg_gen_gvec_sari;
10171 break;
10173 case 0x04: /* SRSHR / URSHR (rounding) */
10174 gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr;
10175 break;
10177 case 0x06: /* SRSRA / URSRA (accum + rounding) */
10178 gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra;
10179 break;
10181 default:
10182 g_assert_not_reached();
10185 gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size);
10188 /* SHL/SLI - Vector shift left */
10189 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
10190 int immh, int immb, int opcode, int rn, int rd)
10192 int size = 32 - clz32(immh) - 1;
10193 int immhb = immh << 3 | immb;
10194 int shift = immhb - (8 << size);
10196 /* Range of size is limited by decode: immh is a non-zero 4 bit field */
10197 assert(size >= 0 && size <= 3);
10199 if (extract32(immh, 3, 1) && !is_q) {
10200 unallocated_encoding(s);
10201 return;
10204 if (!fp_access_check(s)) {
10205 return;
10208 if (insert) {
10209 gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size);
10210 } else {
10211 gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
10215 /* USHLL/SHLL - Vector shift left with widening */
10216 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
10217 int immh, int immb, int opcode, int rn, int rd)
10219 int size = 32 - clz32(immh) - 1;
10220 int immhb = immh << 3 | immb;
10221 int shift = immhb - (8 << size);
10222 int dsize = 64;
10223 int esize = 8 << size;
10224 int elements = dsize/esize;
10225 TCGv_i64 tcg_rn = new_tmp_a64(s);
10226 TCGv_i64 tcg_rd = new_tmp_a64(s);
10227 int i;
10229 if (size >= 3) {
10230 unallocated_encoding(s);
10231 return;
10234 if (!fp_access_check(s)) {
10235 return;
10238 /* For the LL variants the store is larger than the load,
10239 * so if rd == rn we would overwrite parts of our input.
10240 * So load everything right now and use shifts in the main loop.
10242 read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
10244 for (i = 0; i < elements; i++) {
10245 tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
10246 ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
10247 tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
10248 write_vec_element(s, tcg_rd, rd, i, size + 1);
10252 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
10253 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
10254 int immh, int immb, int opcode, int rn, int rd)
10256 int immhb = immh << 3 | immb;
10257 int size = 32 - clz32(immh) - 1;
10258 int dsize = 64;
10259 int esize = 8 << size;
10260 int elements = dsize/esize;
10261 int shift = (2 * esize) - immhb;
10262 bool round = extract32(opcode, 0, 1);
10263 TCGv_i64 tcg_rn, tcg_rd, tcg_final;
10264 TCGv_i64 tcg_round;
10265 int i;
10267 if (extract32(immh, 3, 1)) {
10268 unallocated_encoding(s);
10269 return;
10272 if (!fp_access_check(s)) {
10273 return;
10276 tcg_rn = tcg_temp_new_i64();
10277 tcg_rd = tcg_temp_new_i64();
10278 tcg_final = tcg_temp_new_i64();
10279 read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
10281 if (round) {
10282 uint64_t round_const = 1ULL << (shift - 1);
10283 tcg_round = tcg_const_i64(round_const);
10284 } else {
10285 tcg_round = NULL;
10288 for (i = 0; i < elements; i++) {
10289 read_vec_element(s, tcg_rn, rn, i, size+1);
10290 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
10291 false, true, size+1, shift);
10293 tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
10296 if (!is_q) {
10297 write_vec_element(s, tcg_final, rd, 0, MO_64);
10298 } else {
10299 write_vec_element(s, tcg_final, rd, 1, MO_64);
10301 if (round) {
10302 tcg_temp_free_i64(tcg_round);
10304 tcg_temp_free_i64(tcg_rn);
10305 tcg_temp_free_i64(tcg_rd);
10306 tcg_temp_free_i64(tcg_final);
10308 clear_vec_high(s, is_q, rd);
10312 /* AdvSIMD shift by immediate
10313 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0
10314 * +---+---+---+-------------+------+------+--------+---+------+------+
10315 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd |
10316 * +---+---+---+-------------+------+------+--------+---+------+------+
10318 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
10320 int rd = extract32(insn, 0, 5);
10321 int rn = extract32(insn, 5, 5);
10322 int opcode = extract32(insn, 11, 5);
10323 int immb = extract32(insn, 16, 3);
10324 int immh = extract32(insn, 19, 4);
10325 bool is_u = extract32(insn, 29, 1);
10326 bool is_q = extract32(insn, 30, 1);
10328 /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */
10329 assert(immh != 0);
10331 switch (opcode) {
10332 case 0x08: /* SRI */
10333 if (!is_u) {
10334 unallocated_encoding(s);
10335 return;
10337 /* fall through */
10338 case 0x00: /* SSHR / USHR */
10339 case 0x02: /* SSRA / USRA (accumulate) */
10340 case 0x04: /* SRSHR / URSHR (rounding) */
10341 case 0x06: /* SRSRA / URSRA (accum + rounding) */
10342 handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
10343 break;
10344 case 0x0a: /* SHL / SLI */
10345 handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10346 break;
10347 case 0x10: /* SHRN */
10348 case 0x11: /* RSHRN / SQRSHRUN */
10349 if (is_u) {
10350 handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
10351 opcode, rn, rd);
10352 } else {
10353 handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
10355 break;
10356 case 0x12: /* SQSHRN / UQSHRN */
10357 case 0x13: /* SQRSHRN / UQRSHRN */
10358 handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
10359 opcode, rn, rd);
10360 break;
10361 case 0x14: /* SSHLL / USHLL */
10362 handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10363 break;
10364 case 0x1c: /* SCVTF / UCVTF */
10365 handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
10366 opcode, rn, rd);
10367 break;
10368 case 0xc: /* SQSHLU */
10369 if (!is_u) {
10370 unallocated_encoding(s);
10371 return;
10373 handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
10374 break;
10375 case 0xe: /* SQSHL, UQSHL */
10376 handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
10377 break;
10378 case 0x1f: /* FCVTZS/ FCVTZU */
10379 handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
10380 return;
10381 default:
10382 unallocated_encoding(s);
10383 return;
10387 /* Generate code to do a "long" addition or subtraction, ie one done in
10388 * TCGv_i64 on vector lanes twice the width specified by size.
10390 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
10391 TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
10393 static NeonGenTwo64OpFn * const fns[3][2] = {
10394 { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
10395 { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
10396 { tcg_gen_add_i64, tcg_gen_sub_i64 },
10398 NeonGenTwo64OpFn *genfn;
10399 assert(size < 3);
10401 genfn = fns[size][is_sub];
10402 genfn(tcg_res, tcg_op1, tcg_op2);
10405 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
10406 int opcode, int rd, int rn, int rm)
10408 /* 3-reg-different widening insns: 64 x 64 -> 128 */
10409 TCGv_i64 tcg_res[2];
10410 int pass, accop;
10412 tcg_res[0] = tcg_temp_new_i64();
10413 tcg_res[1] = tcg_temp_new_i64();
10415 /* Does this op do an adding accumulate, a subtracting accumulate,
10416 * or no accumulate at all?
10418 switch (opcode) {
10419 case 5:
10420 case 8:
10421 case 9:
10422 accop = 1;
10423 break;
10424 case 10:
10425 case 11:
10426 accop = -1;
10427 break;
10428 default:
10429 accop = 0;
10430 break;
10433 if (accop != 0) {
10434 read_vec_element(s, tcg_res[0], rd, 0, MO_64);
10435 read_vec_element(s, tcg_res[1], rd, 1, MO_64);
10438 /* size == 2 means two 32x32->64 operations; this is worth special
10439 * casing because we can generally handle it inline.
10441 if (size == 2) {
10442 for (pass = 0; pass < 2; pass++) {
10443 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10444 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10445 TCGv_i64 tcg_passres;
10446 MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
10448 int elt = pass + is_q * 2;
10450 read_vec_element(s, tcg_op1, rn, elt, memop);
10451 read_vec_element(s, tcg_op2, rm, elt, memop);
10453 if (accop == 0) {
10454 tcg_passres = tcg_res[pass];
10455 } else {
10456 tcg_passres = tcg_temp_new_i64();
10459 switch (opcode) {
10460 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10461 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
10462 break;
10463 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10464 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
10465 break;
10466 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10467 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10469 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
10470 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
10472 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
10473 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
10474 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
10475 tcg_passres,
10476 tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
10477 tcg_temp_free_i64(tcg_tmp1);
10478 tcg_temp_free_i64(tcg_tmp2);
10479 break;
10481 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10482 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10483 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10484 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10485 break;
10486 case 9: /* SQDMLAL, SQDMLAL2 */
10487 case 11: /* SQDMLSL, SQDMLSL2 */
10488 case 13: /* SQDMULL, SQDMULL2 */
10489 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10490 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10491 tcg_passres, tcg_passres);
10492 break;
10493 default:
10494 g_assert_not_reached();
10497 if (opcode == 9 || opcode == 11) {
10498 /* saturating accumulate ops */
10499 if (accop < 0) {
10500 tcg_gen_neg_i64(tcg_passres, tcg_passres);
10502 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10503 tcg_res[pass], tcg_passres);
10504 } else if (accop > 0) {
10505 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10506 } else if (accop < 0) {
10507 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10510 if (accop != 0) {
10511 tcg_temp_free_i64(tcg_passres);
10514 tcg_temp_free_i64(tcg_op1);
10515 tcg_temp_free_i64(tcg_op2);
10517 } else {
10518 /* size 0 or 1, generally helper functions */
10519 for (pass = 0; pass < 2; pass++) {
10520 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10521 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10522 TCGv_i64 tcg_passres;
10523 int elt = pass + is_q * 2;
10525 read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
10526 read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
10528 if (accop == 0) {
10529 tcg_passres = tcg_res[pass];
10530 } else {
10531 tcg_passres = tcg_temp_new_i64();
10534 switch (opcode) {
10535 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10536 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10538 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
10539 static NeonGenWidenFn * const widenfns[2][2] = {
10540 { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10541 { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10543 NeonGenWidenFn *widenfn = widenfns[size][is_u];
10545 widenfn(tcg_op2_64, tcg_op2);
10546 widenfn(tcg_passres, tcg_op1);
10547 gen_neon_addl(size, (opcode == 2), tcg_passres,
10548 tcg_passres, tcg_op2_64);
10549 tcg_temp_free_i64(tcg_op2_64);
10550 break;
10552 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10553 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10554 if (size == 0) {
10555 if (is_u) {
10556 gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
10557 } else {
10558 gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
10560 } else {
10561 if (is_u) {
10562 gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
10563 } else {
10564 gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
10567 break;
10568 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10569 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10570 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10571 if (size == 0) {
10572 if (is_u) {
10573 gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
10574 } else {
10575 gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
10577 } else {
10578 if (is_u) {
10579 gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
10580 } else {
10581 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10584 break;
10585 case 9: /* SQDMLAL, SQDMLAL2 */
10586 case 11: /* SQDMLSL, SQDMLSL2 */
10587 case 13: /* SQDMULL, SQDMULL2 */
10588 assert(size == 1);
10589 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10590 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10591 tcg_passres, tcg_passres);
10592 break;
10593 default:
10594 g_assert_not_reached();
10596 tcg_temp_free_i32(tcg_op1);
10597 tcg_temp_free_i32(tcg_op2);
10599 if (accop != 0) {
10600 if (opcode == 9 || opcode == 11) {
10601 /* saturating accumulate ops */
10602 if (accop < 0) {
10603 gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10605 gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10606 tcg_res[pass],
10607 tcg_passres);
10608 } else {
10609 gen_neon_addl(size, (accop < 0), tcg_res[pass],
10610 tcg_res[pass], tcg_passres);
10612 tcg_temp_free_i64(tcg_passres);
10617 write_vec_element(s, tcg_res[0], rd, 0, MO_64);
10618 write_vec_element(s, tcg_res[1], rd, 1, MO_64);
10619 tcg_temp_free_i64(tcg_res[0]);
10620 tcg_temp_free_i64(tcg_res[1]);
10623 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
10624 int opcode, int rd, int rn, int rm)
10626 TCGv_i64 tcg_res[2];
10627 int part = is_q ? 2 : 0;
10628 int pass;
10630 for (pass = 0; pass < 2; pass++) {
10631 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10632 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10633 TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
10634 static NeonGenWidenFn * const widenfns[3][2] = {
10635 { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10636 { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10637 { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
10639 NeonGenWidenFn *widenfn = widenfns[size][is_u];
10641 read_vec_element(s, tcg_op1, rn, pass, MO_64);
10642 read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
10643 widenfn(tcg_op2_wide, tcg_op2);
10644 tcg_temp_free_i32(tcg_op2);
10645 tcg_res[pass] = tcg_temp_new_i64();
10646 gen_neon_addl(size, (opcode == 3),
10647 tcg_res[pass], tcg_op1, tcg_op2_wide);
10648 tcg_temp_free_i64(tcg_op1);
10649 tcg_temp_free_i64(tcg_op2_wide);
10652 for (pass = 0; pass < 2; pass++) {
10653 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10654 tcg_temp_free_i64(tcg_res[pass]);
10658 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
10660 tcg_gen_addi_i64(in, in, 1U << 31);
10661 tcg_gen_extrh_i64_i32(res, in);
10664 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
10665 int opcode, int rd, int rn, int rm)
10667 TCGv_i32 tcg_res[2];
10668 int part = is_q ? 2 : 0;
10669 int pass;
10671 for (pass = 0; pass < 2; pass++) {
10672 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10673 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10674 TCGv_i64 tcg_wideres = tcg_temp_new_i64();
10675 static NeonGenNarrowFn * const narrowfns[3][2] = {
10676 { gen_helper_neon_narrow_high_u8,
10677 gen_helper_neon_narrow_round_high_u8 },
10678 { gen_helper_neon_narrow_high_u16,
10679 gen_helper_neon_narrow_round_high_u16 },
10680 { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
10682 NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
10684 read_vec_element(s, tcg_op1, rn, pass, MO_64);
10685 read_vec_element(s, tcg_op2, rm, pass, MO_64);
10687 gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
10689 tcg_temp_free_i64(tcg_op1);
10690 tcg_temp_free_i64(tcg_op2);
10692 tcg_res[pass] = tcg_temp_new_i32();
10693 gennarrow(tcg_res[pass], tcg_wideres);
10694 tcg_temp_free_i64(tcg_wideres);
10697 for (pass = 0; pass < 2; pass++) {
10698 write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
10699 tcg_temp_free_i32(tcg_res[pass]);
10701 clear_vec_high(s, is_q, rd);
10704 /* AdvSIMD three different
10705 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
10706 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10707 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd |
10708 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10710 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
10712 /* Instructions in this group fall into three basic classes
10713 * (in each case with the operation working on each element in
10714 * the input vectors):
10715 * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
10716 * 128 bit input)
10717 * (2) wide 64 x 128 -> 128
10718 * (3) narrowing 128 x 128 -> 64
10719 * Here we do initial decode, catch unallocated cases and
10720 * dispatch to separate functions for each class.
10722 int is_q = extract32(insn, 30, 1);
10723 int is_u = extract32(insn, 29, 1);
10724 int size = extract32(insn, 22, 2);
10725 int opcode = extract32(insn, 12, 4);
10726 int rm = extract32(insn, 16, 5);
10727 int rn = extract32(insn, 5, 5);
10728 int rd = extract32(insn, 0, 5);
10730 switch (opcode) {
10731 case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
10732 case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
10733 /* 64 x 128 -> 128 */
10734 if (size == 3) {
10735 unallocated_encoding(s);
10736 return;
10738 if (!fp_access_check(s)) {
10739 return;
10741 handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
10742 break;
10743 case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
10744 case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
10745 /* 128 x 128 -> 64 */
10746 if (size == 3) {
10747 unallocated_encoding(s);
10748 return;
10750 if (!fp_access_check(s)) {
10751 return;
10753 handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
10754 break;
10755 case 14: /* PMULL, PMULL2 */
10756 if (is_u) {
10757 unallocated_encoding(s);
10758 return;
10760 switch (size) {
10761 case 0: /* PMULL.P8 */
10762 if (!fp_access_check(s)) {
10763 return;
10765 /* The Q field specifies lo/hi half input for this insn. */
10766 gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
10767 gen_helper_neon_pmull_h);
10768 break;
10770 case 3: /* PMULL.P64 */
10771 if (!dc_isar_feature(aa64_pmull, s)) {
10772 unallocated_encoding(s);
10773 return;
10775 if (!fp_access_check(s)) {
10776 return;
10778 /* The Q field specifies lo/hi half input for this insn. */
10779 gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
10780 gen_helper_gvec_pmull_q);
10781 break;
10783 default:
10784 unallocated_encoding(s);
10785 break;
10787 return;
10788 case 9: /* SQDMLAL, SQDMLAL2 */
10789 case 11: /* SQDMLSL, SQDMLSL2 */
10790 case 13: /* SQDMULL, SQDMULL2 */
10791 if (is_u || size == 0) {
10792 unallocated_encoding(s);
10793 return;
10795 /* fall through */
10796 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10797 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10798 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10799 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10800 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10801 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10802 case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
10803 /* 64 x 64 -> 128 */
10804 if (size == 3) {
10805 unallocated_encoding(s);
10806 return;
10808 if (!fp_access_check(s)) {
10809 return;
10812 handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
10813 break;
10814 default:
10815 /* opcode 15 not allocated */
10816 unallocated_encoding(s);
10817 break;
10821 /* Logic op (opcode == 3) subgroup of C3.6.16. */
10822 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
10824 int rd = extract32(insn, 0, 5);
10825 int rn = extract32(insn, 5, 5);
10826 int rm = extract32(insn, 16, 5);
10827 int size = extract32(insn, 22, 2);
10828 bool is_u = extract32(insn, 29, 1);
10829 bool is_q = extract32(insn, 30, 1);
10831 if (!fp_access_check(s)) {
10832 return;
10835 switch (size + 4 * is_u) {
10836 case 0: /* AND */
10837 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
10838 return;
10839 case 1: /* BIC */
10840 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
10841 return;
10842 case 2: /* ORR */
10843 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
10844 return;
10845 case 3: /* ORN */
10846 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
10847 return;
10848 case 4: /* EOR */
10849 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
10850 return;
10852 case 5: /* BSL bitwise select */
10853 gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0);
10854 return;
10855 case 6: /* BIT, bitwise insert if true */
10856 gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0);
10857 return;
10858 case 7: /* BIF, bitwise insert if false */
10859 gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0);
10860 return;
10862 default:
10863 g_assert_not_reached();
10867 /* Pairwise op subgroup of C3.6.16.
10869 * This is called directly or via the handle_3same_float for float pairwise
10870 * operations where the opcode and size are calculated differently.
10872 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
10873 int size, int rn, int rm, int rd)
10875 TCGv_ptr fpst;
10876 int pass;
10878 /* Floating point operations need fpst */
10879 if (opcode >= 0x58) {
10880 fpst = get_fpstatus_ptr(false);
10881 } else {
10882 fpst = NULL;
10885 if (!fp_access_check(s)) {
10886 return;
10889 /* These operations work on the concatenated rm:rn, with each pair of
10890 * adjacent elements being operated on to produce an element in the result.
10892 if (size == 3) {
10893 TCGv_i64 tcg_res[2];
10895 for (pass = 0; pass < 2; pass++) {
10896 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10897 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10898 int passreg = (pass == 0) ? rn : rm;
10900 read_vec_element(s, tcg_op1, passreg, 0, MO_64);
10901 read_vec_element(s, tcg_op2, passreg, 1, MO_64);
10902 tcg_res[pass] = tcg_temp_new_i64();
10904 switch (opcode) {
10905 case 0x17: /* ADDP */
10906 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
10907 break;
10908 case 0x58: /* FMAXNMP */
10909 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10910 break;
10911 case 0x5a: /* FADDP */
10912 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10913 break;
10914 case 0x5e: /* FMAXP */
10915 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10916 break;
10917 case 0x78: /* FMINNMP */
10918 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10919 break;
10920 case 0x7e: /* FMINP */
10921 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10922 break;
10923 default:
10924 g_assert_not_reached();
10927 tcg_temp_free_i64(tcg_op1);
10928 tcg_temp_free_i64(tcg_op2);
10931 for (pass = 0; pass < 2; pass++) {
10932 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10933 tcg_temp_free_i64(tcg_res[pass]);
10935 } else {
10936 int maxpass = is_q ? 4 : 2;
10937 TCGv_i32 tcg_res[4];
10939 for (pass = 0; pass < maxpass; pass++) {
10940 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10941 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10942 NeonGenTwoOpFn *genfn = NULL;
10943 int passreg = pass < (maxpass / 2) ? rn : rm;
10944 int passelt = (is_q && (pass & 1)) ? 2 : 0;
10946 read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
10947 read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
10948 tcg_res[pass] = tcg_temp_new_i32();
10950 switch (opcode) {
10951 case 0x17: /* ADDP */
10953 static NeonGenTwoOpFn * const fns[3] = {
10954 gen_helper_neon_padd_u8,
10955 gen_helper_neon_padd_u16,
10956 tcg_gen_add_i32,
10958 genfn = fns[size];
10959 break;
10961 case 0x14: /* SMAXP, UMAXP */
10963 static NeonGenTwoOpFn * const fns[3][2] = {
10964 { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
10965 { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
10966 { tcg_gen_smax_i32, tcg_gen_umax_i32 },
10968 genfn = fns[size][u];
10969 break;
10971 case 0x15: /* SMINP, UMINP */
10973 static NeonGenTwoOpFn * const fns[3][2] = {
10974 { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
10975 { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
10976 { tcg_gen_smin_i32, tcg_gen_umin_i32 },
10978 genfn = fns[size][u];
10979 break;
10981 /* The FP operations are all on single floats (32 bit) */
10982 case 0x58: /* FMAXNMP */
10983 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10984 break;
10985 case 0x5a: /* FADDP */
10986 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10987 break;
10988 case 0x5e: /* FMAXP */
10989 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10990 break;
10991 case 0x78: /* FMINNMP */
10992 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10993 break;
10994 case 0x7e: /* FMINP */
10995 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10996 break;
10997 default:
10998 g_assert_not_reached();
11001 /* FP ops called directly, otherwise call now */
11002 if (genfn) {
11003 genfn(tcg_res[pass], tcg_op1, tcg_op2);
11006 tcg_temp_free_i32(tcg_op1);
11007 tcg_temp_free_i32(tcg_op2);
11010 for (pass = 0; pass < maxpass; pass++) {
11011 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
11012 tcg_temp_free_i32(tcg_res[pass]);
11014 clear_vec_high(s, is_q, rd);
11017 if (fpst) {
11018 tcg_temp_free_ptr(fpst);
11022 /* Floating point op subgroup of C3.6.16. */
11023 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
11025 /* For floating point ops, the U, size[1] and opcode bits
11026 * together indicate the operation. size[0] indicates single
11027 * or double.
11029 int fpopcode = extract32(insn, 11, 5)
11030 | (extract32(insn, 23, 1) << 5)
11031 | (extract32(insn, 29, 1) << 6);
11032 int is_q = extract32(insn, 30, 1);
11033 int size = extract32(insn, 22, 1);
11034 int rm = extract32(insn, 16, 5);
11035 int rn = extract32(insn, 5, 5);
11036 int rd = extract32(insn, 0, 5);
11038 int datasize = is_q ? 128 : 64;
11039 int esize = 32 << size;
11040 int elements = datasize / esize;
11042 if (size == 1 && !is_q) {
11043 unallocated_encoding(s);
11044 return;
11047 switch (fpopcode) {
11048 case 0x58: /* FMAXNMP */
11049 case 0x5a: /* FADDP */
11050 case 0x5e: /* FMAXP */
11051 case 0x78: /* FMINNMP */
11052 case 0x7e: /* FMINP */
11053 if (size && !is_q) {
11054 unallocated_encoding(s);
11055 return;
11057 handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
11058 rn, rm, rd);
11059 return;
11060 case 0x1b: /* FMULX */
11061 case 0x1f: /* FRECPS */
11062 case 0x3f: /* FRSQRTS */
11063 case 0x5d: /* FACGE */
11064 case 0x7d: /* FACGT */
11065 case 0x19: /* FMLA */
11066 case 0x39: /* FMLS */
11067 case 0x18: /* FMAXNM */
11068 case 0x1a: /* FADD */
11069 case 0x1c: /* FCMEQ */
11070 case 0x1e: /* FMAX */
11071 case 0x38: /* FMINNM */
11072 case 0x3a: /* FSUB */
11073 case 0x3e: /* FMIN */
11074 case 0x5b: /* FMUL */
11075 case 0x5c: /* FCMGE */
11076 case 0x5f: /* FDIV */
11077 case 0x7a: /* FABD */
11078 case 0x7c: /* FCMGT */
11079 if (!fp_access_check(s)) {
11080 return;
11082 handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
11083 return;
11085 case 0x1d: /* FMLAL */
11086 case 0x3d: /* FMLSL */
11087 case 0x59: /* FMLAL2 */
11088 case 0x79: /* FMLSL2 */
11089 if (size & 1 || !dc_isar_feature(aa64_fhm, s)) {
11090 unallocated_encoding(s);
11091 return;
11093 if (fp_access_check(s)) {
11094 int is_s = extract32(insn, 23, 1);
11095 int is_2 = extract32(insn, 29, 1);
11096 int data = (is_2 << 1) | is_s;
11097 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
11098 vec_full_reg_offset(s, rn),
11099 vec_full_reg_offset(s, rm), cpu_env,
11100 is_q ? 16 : 8, vec_full_reg_size(s),
11101 data, gen_helper_gvec_fmlal_a64);
11103 return;
11105 default:
11106 unallocated_encoding(s);
11107 return;
11111 /* Integer op subgroup of C3.6.16. */
11112 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
11114 int is_q = extract32(insn, 30, 1);
11115 int u = extract32(insn, 29, 1);
11116 int size = extract32(insn, 22, 2);
11117 int opcode = extract32(insn, 11, 5);
11118 int rm = extract32(insn, 16, 5);
11119 int rn = extract32(insn, 5, 5);
11120 int rd = extract32(insn, 0, 5);
11121 int pass;
11122 TCGCond cond;
11124 switch (opcode) {
11125 case 0x13: /* MUL, PMUL */
11126 if (u && size != 0) {
11127 unallocated_encoding(s);
11128 return;
11130 /* fall through */
11131 case 0x0: /* SHADD, UHADD */
11132 case 0x2: /* SRHADD, URHADD */
11133 case 0x4: /* SHSUB, UHSUB */
11134 case 0xc: /* SMAX, UMAX */
11135 case 0xd: /* SMIN, UMIN */
11136 case 0xe: /* SABD, UABD */
11137 case 0xf: /* SABA, UABA */
11138 case 0x12: /* MLA, MLS */
11139 if (size == 3) {
11140 unallocated_encoding(s);
11141 return;
11143 break;
11144 case 0x16: /* SQDMULH, SQRDMULH */
11145 if (size == 0 || size == 3) {
11146 unallocated_encoding(s);
11147 return;
11149 break;
11150 default:
11151 if (size == 3 && !is_q) {
11152 unallocated_encoding(s);
11153 return;
11155 break;
11158 if (!fp_access_check(s)) {
11159 return;
11162 switch (opcode) {
11163 case 0x01: /* SQADD, UQADD */
11164 if (u) {
11165 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size);
11166 } else {
11167 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size);
11169 return;
11170 case 0x05: /* SQSUB, UQSUB */
11171 if (u) {
11172 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size);
11173 } else {
11174 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size);
11176 return;
11177 case 0x08: /* SSHL, USHL */
11178 if (u) {
11179 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size);
11180 } else {
11181 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size);
11183 return;
11184 case 0x0c: /* SMAX, UMAX */
11185 if (u) {
11186 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size);
11187 } else {
11188 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size);
11190 return;
11191 case 0x0d: /* SMIN, UMIN */
11192 if (u) {
11193 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size);
11194 } else {
11195 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size);
11197 return;
11198 case 0xe: /* SABD, UABD */
11199 if (u) {
11200 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size);
11201 } else {
11202 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size);
11204 return;
11205 case 0xf: /* SABA, UABA */
11206 if (u) {
11207 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size);
11208 } else {
11209 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size);
11211 return;
11212 case 0x10: /* ADD, SUB */
11213 if (u) {
11214 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
11215 } else {
11216 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
11218 return;
11219 case 0x13: /* MUL, PMUL */
11220 if (!u) { /* MUL */
11221 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
11222 } else { /* PMUL */
11223 gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b);
11225 return;
11226 case 0x12: /* MLA, MLS */
11227 if (u) {
11228 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size);
11229 } else {
11230 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size);
11232 return;
11233 case 0x11:
11234 if (!u) { /* CMTST */
11235 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size);
11236 return;
11238 /* else CMEQ */
11239 cond = TCG_COND_EQ;
11240 goto do_gvec_cmp;
11241 case 0x06: /* CMGT, CMHI */
11242 cond = u ? TCG_COND_GTU : TCG_COND_GT;
11243 goto do_gvec_cmp;
11244 case 0x07: /* CMGE, CMHS */
11245 cond = u ? TCG_COND_GEU : TCG_COND_GE;
11246 do_gvec_cmp:
11247 tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd),
11248 vec_full_reg_offset(s, rn),
11249 vec_full_reg_offset(s, rm),
11250 is_q ? 16 : 8, vec_full_reg_size(s));
11251 return;
11254 if (size == 3) {
11255 assert(is_q);
11256 for (pass = 0; pass < 2; pass++) {
11257 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11258 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11259 TCGv_i64 tcg_res = tcg_temp_new_i64();
11261 read_vec_element(s, tcg_op1, rn, pass, MO_64);
11262 read_vec_element(s, tcg_op2, rm, pass, MO_64);
11264 handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
11266 write_vec_element(s, tcg_res, rd, pass, MO_64);
11268 tcg_temp_free_i64(tcg_res);
11269 tcg_temp_free_i64(tcg_op1);
11270 tcg_temp_free_i64(tcg_op2);
11272 } else {
11273 for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
11274 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11275 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11276 TCGv_i32 tcg_res = tcg_temp_new_i32();
11277 NeonGenTwoOpFn *genfn = NULL;
11278 NeonGenTwoOpEnvFn *genenvfn = NULL;
11280 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
11281 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
11283 switch (opcode) {
11284 case 0x0: /* SHADD, UHADD */
11286 static NeonGenTwoOpFn * const fns[3][2] = {
11287 { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
11288 { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
11289 { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
11291 genfn = fns[size][u];
11292 break;
11294 case 0x2: /* SRHADD, URHADD */
11296 static NeonGenTwoOpFn * const fns[3][2] = {
11297 { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
11298 { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
11299 { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
11301 genfn = fns[size][u];
11302 break;
11304 case 0x4: /* SHSUB, UHSUB */
11306 static NeonGenTwoOpFn * const fns[3][2] = {
11307 { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
11308 { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
11309 { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
11311 genfn = fns[size][u];
11312 break;
11314 case 0x9: /* SQSHL, UQSHL */
11316 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11317 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
11318 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
11319 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
11321 genenvfn = fns[size][u];
11322 break;
11324 case 0xa: /* SRSHL, URSHL */
11326 static NeonGenTwoOpFn * const fns[3][2] = {
11327 { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
11328 { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
11329 { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
11331 genfn = fns[size][u];
11332 break;
11334 case 0xb: /* SQRSHL, UQRSHL */
11336 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11337 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
11338 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
11339 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
11341 genenvfn = fns[size][u];
11342 break;
11344 case 0x16: /* SQDMULH, SQRDMULH */
11346 static NeonGenTwoOpEnvFn * const fns[2][2] = {
11347 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
11348 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
11350 assert(size == 1 || size == 2);
11351 genenvfn = fns[size - 1][u];
11352 break;
11354 default:
11355 g_assert_not_reached();
11358 if (genenvfn) {
11359 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
11360 } else {
11361 genfn(tcg_res, tcg_op1, tcg_op2);
11364 if (opcode == 0xf) {
11365 /* SABA, UABA: accumulating ops */
11366 static NeonGenTwoOpFn * const fns[3] = {
11367 gen_helper_neon_add_u8,
11368 gen_helper_neon_add_u16,
11369 tcg_gen_add_i32,
11372 read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
11373 fns[size](tcg_res, tcg_op1, tcg_res);
11376 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
11378 tcg_temp_free_i32(tcg_res);
11379 tcg_temp_free_i32(tcg_op1);
11380 tcg_temp_free_i32(tcg_op2);
11383 clear_vec_high(s, is_q, rd);
11386 /* AdvSIMD three same
11387 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0
11388 * +---+---+---+-----------+------+---+------+--------+---+------+------+
11389 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd |
11390 * +---+---+---+-----------+------+---+------+--------+---+------+------+
11392 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
11394 int opcode = extract32(insn, 11, 5);
11396 switch (opcode) {
11397 case 0x3: /* logic ops */
11398 disas_simd_3same_logic(s, insn);
11399 break;
11400 case 0x17: /* ADDP */
11401 case 0x14: /* SMAXP, UMAXP */
11402 case 0x15: /* SMINP, UMINP */
11404 /* Pairwise operations */
11405 int is_q = extract32(insn, 30, 1);
11406 int u = extract32(insn, 29, 1);
11407 int size = extract32(insn, 22, 2);
11408 int rm = extract32(insn, 16, 5);
11409 int rn = extract32(insn, 5, 5);
11410 int rd = extract32(insn, 0, 5);
11411 if (opcode == 0x17) {
11412 if (u || (size == 3 && !is_q)) {
11413 unallocated_encoding(s);
11414 return;
11416 } else {
11417 if (size == 3) {
11418 unallocated_encoding(s);
11419 return;
11422 handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
11423 break;
11425 case 0x18 ... 0x31:
11426 /* floating point ops, sz[1] and U are part of opcode */
11427 disas_simd_3same_float(s, insn);
11428 break;
11429 default:
11430 disas_simd_3same_int(s, insn);
11431 break;
11436 * Advanced SIMD three same (ARMv8.2 FP16 variants)
11438 * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0
11439 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11440 * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd |
11441 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11443 * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
11444 * (register), FACGE, FABD, FCMGT (register) and FACGT.
11447 static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
11449 int opcode, fpopcode;
11450 int is_q, u, a, rm, rn, rd;
11451 int datasize, elements;
11452 int pass;
11453 TCGv_ptr fpst;
11454 bool pairwise = false;
11456 if (!dc_isar_feature(aa64_fp16, s)) {
11457 unallocated_encoding(s);
11458 return;
11461 if (!fp_access_check(s)) {
11462 return;
11465 /* For these floating point ops, the U, a and opcode bits
11466 * together indicate the operation.
11468 opcode = extract32(insn, 11, 3);
11469 u = extract32(insn, 29, 1);
11470 a = extract32(insn, 23, 1);
11471 is_q = extract32(insn, 30, 1);
11472 rm = extract32(insn, 16, 5);
11473 rn = extract32(insn, 5, 5);
11474 rd = extract32(insn, 0, 5);
11476 fpopcode = opcode | (a << 3) | (u << 4);
11477 datasize = is_q ? 128 : 64;
11478 elements = datasize / 16;
11480 switch (fpopcode) {
11481 case 0x10: /* FMAXNMP */
11482 case 0x12: /* FADDP */
11483 case 0x16: /* FMAXP */
11484 case 0x18: /* FMINNMP */
11485 case 0x1e: /* FMINP */
11486 pairwise = true;
11487 break;
11490 fpst = get_fpstatus_ptr(true);
11492 if (pairwise) {
11493 int maxpass = is_q ? 8 : 4;
11494 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11495 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11496 TCGv_i32 tcg_res[8];
11498 for (pass = 0; pass < maxpass; pass++) {
11499 int passreg = pass < (maxpass / 2) ? rn : rm;
11500 int passelt = (pass << 1) & (maxpass - 1);
11502 read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
11503 read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
11504 tcg_res[pass] = tcg_temp_new_i32();
11506 switch (fpopcode) {
11507 case 0x10: /* FMAXNMP */
11508 gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
11509 fpst);
11510 break;
11511 case 0x12: /* FADDP */
11512 gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11513 break;
11514 case 0x16: /* FMAXP */
11515 gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11516 break;
11517 case 0x18: /* FMINNMP */
11518 gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
11519 fpst);
11520 break;
11521 case 0x1e: /* FMINP */
11522 gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11523 break;
11524 default:
11525 g_assert_not_reached();
11529 for (pass = 0; pass < maxpass; pass++) {
11530 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
11531 tcg_temp_free_i32(tcg_res[pass]);
11534 tcg_temp_free_i32(tcg_op1);
11535 tcg_temp_free_i32(tcg_op2);
11537 } else {
11538 for (pass = 0; pass < elements; pass++) {
11539 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11540 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11541 TCGv_i32 tcg_res = tcg_temp_new_i32();
11543 read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
11544 read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
11546 switch (fpopcode) {
11547 case 0x0: /* FMAXNM */
11548 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11549 break;
11550 case 0x1: /* FMLA */
11551 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11552 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11553 fpst);
11554 break;
11555 case 0x2: /* FADD */
11556 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
11557 break;
11558 case 0x3: /* FMULX */
11559 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
11560 break;
11561 case 0x4: /* FCMEQ */
11562 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11563 break;
11564 case 0x6: /* FMAX */
11565 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
11566 break;
11567 case 0x7: /* FRECPS */
11568 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11569 break;
11570 case 0x8: /* FMINNM */
11571 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11572 break;
11573 case 0x9: /* FMLS */
11574 /* As usual for ARM, separate negation for fused multiply-add */
11575 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
11576 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11577 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11578 fpst);
11579 break;
11580 case 0xa: /* FSUB */
11581 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11582 break;
11583 case 0xe: /* FMIN */
11584 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
11585 break;
11586 case 0xf: /* FRSQRTS */
11587 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11588 break;
11589 case 0x13: /* FMUL */
11590 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
11591 break;
11592 case 0x14: /* FCMGE */
11593 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11594 break;
11595 case 0x15: /* FACGE */
11596 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11597 break;
11598 case 0x17: /* FDIV */
11599 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
11600 break;
11601 case 0x1a: /* FABD */
11602 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11603 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
11604 break;
11605 case 0x1c: /* FCMGT */
11606 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11607 break;
11608 case 0x1d: /* FACGT */
11609 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11610 break;
11611 default:
11612 fprintf(stderr, "%s: insn %#04x, fpop %#2x @ %#" PRIx64 "\n",
11613 __func__, insn, fpopcode, s->pc_curr);
11614 g_assert_not_reached();
11617 write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11618 tcg_temp_free_i32(tcg_res);
11619 tcg_temp_free_i32(tcg_op1);
11620 tcg_temp_free_i32(tcg_op2);
11624 tcg_temp_free_ptr(fpst);
11626 clear_vec_high(s, is_q, rd);
11629 /* AdvSIMD three same extra
11630 * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0
11631 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11632 * | 0 | Q | U | 0 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd |
11633 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11635 static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
11637 int rd = extract32(insn, 0, 5);
11638 int rn = extract32(insn, 5, 5);
11639 int opcode = extract32(insn, 11, 4);
11640 int rm = extract32(insn, 16, 5);
11641 int size = extract32(insn, 22, 2);
11642 bool u = extract32(insn, 29, 1);
11643 bool is_q = extract32(insn, 30, 1);
11644 bool feature;
11645 int rot;
11647 switch (u * 16 + opcode) {
11648 case 0x10: /* SQRDMLAH (vector) */
11649 case 0x11: /* SQRDMLSH (vector) */
11650 if (size != 1 && size != 2) {
11651 unallocated_encoding(s);
11652 return;
11654 feature = dc_isar_feature(aa64_rdm, s);
11655 break;
11656 case 0x02: /* SDOT (vector) */
11657 case 0x12: /* UDOT (vector) */
11658 if (size != MO_32) {
11659 unallocated_encoding(s);
11660 return;
11662 feature = dc_isar_feature(aa64_dp, s);
11663 break;
11664 case 0x18: /* FCMLA, #0 */
11665 case 0x19: /* FCMLA, #90 */
11666 case 0x1a: /* FCMLA, #180 */
11667 case 0x1b: /* FCMLA, #270 */
11668 case 0x1c: /* FCADD, #90 */
11669 case 0x1e: /* FCADD, #270 */
11670 if (size == 0
11671 || (size == 1 && !dc_isar_feature(aa64_fp16, s))
11672 || (size == 3 && !is_q)) {
11673 unallocated_encoding(s);
11674 return;
11676 feature = dc_isar_feature(aa64_fcma, s);
11677 break;
11678 default:
11679 unallocated_encoding(s);
11680 return;
11682 if (!feature) {
11683 unallocated_encoding(s);
11684 return;
11686 if (!fp_access_check(s)) {
11687 return;
11690 switch (opcode) {
11691 case 0x0: /* SQRDMLAH (vector) */
11692 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size);
11693 return;
11695 case 0x1: /* SQRDMLSH (vector) */
11696 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size);
11697 return;
11699 case 0x2: /* SDOT / UDOT */
11700 gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0,
11701 u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b);
11702 return;
11704 case 0x8: /* FCMLA, #0 */
11705 case 0x9: /* FCMLA, #90 */
11706 case 0xa: /* FCMLA, #180 */
11707 case 0xb: /* FCMLA, #270 */
11708 rot = extract32(opcode, 0, 2);
11709 switch (size) {
11710 case 1:
11711 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, true, rot,
11712 gen_helper_gvec_fcmlah);
11713 break;
11714 case 2:
11715 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, false, rot,
11716 gen_helper_gvec_fcmlas);
11717 break;
11718 case 3:
11719 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, false, rot,
11720 gen_helper_gvec_fcmlad);
11721 break;
11722 default:
11723 g_assert_not_reached();
11725 return;
11727 case 0xc: /* FCADD, #90 */
11728 case 0xe: /* FCADD, #270 */
11729 rot = extract32(opcode, 1, 1);
11730 switch (size) {
11731 case 1:
11732 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11733 gen_helper_gvec_fcaddh);
11734 break;
11735 case 2:
11736 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11737 gen_helper_gvec_fcadds);
11738 break;
11739 case 3:
11740 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11741 gen_helper_gvec_fcaddd);
11742 break;
11743 default:
11744 g_assert_not_reached();
11746 return;
11748 default:
11749 g_assert_not_reached();
11753 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
11754 int size, int rn, int rd)
11756 /* Handle 2-reg-misc ops which are widening (so each size element
11757 * in the source becomes a 2*size element in the destination.
11758 * The only instruction like this is FCVTL.
11760 int pass;
11762 if (size == 3) {
11763 /* 32 -> 64 bit fp conversion */
11764 TCGv_i64 tcg_res[2];
11765 int srcelt = is_q ? 2 : 0;
11767 for (pass = 0; pass < 2; pass++) {
11768 TCGv_i32 tcg_op = tcg_temp_new_i32();
11769 tcg_res[pass] = tcg_temp_new_i64();
11771 read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
11772 gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
11773 tcg_temp_free_i32(tcg_op);
11775 for (pass = 0; pass < 2; pass++) {
11776 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11777 tcg_temp_free_i64(tcg_res[pass]);
11779 } else {
11780 /* 16 -> 32 bit fp conversion */
11781 int srcelt = is_q ? 4 : 0;
11782 TCGv_i32 tcg_res[4];
11783 TCGv_ptr fpst = get_fpstatus_ptr(false);
11784 TCGv_i32 ahp = get_ahp_flag();
11786 for (pass = 0; pass < 4; pass++) {
11787 tcg_res[pass] = tcg_temp_new_i32();
11789 read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
11790 gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
11791 fpst, ahp);
11793 for (pass = 0; pass < 4; pass++) {
11794 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
11795 tcg_temp_free_i32(tcg_res[pass]);
11798 tcg_temp_free_ptr(fpst);
11799 tcg_temp_free_i32(ahp);
11803 static void handle_rev(DisasContext *s, int opcode, bool u,
11804 bool is_q, int size, int rn, int rd)
11806 int op = (opcode << 1) | u;
11807 int opsz = op + size;
11808 int grp_size = 3 - opsz;
11809 int dsize = is_q ? 128 : 64;
11810 int i;
11812 if (opsz >= 3) {
11813 unallocated_encoding(s);
11814 return;
11817 if (!fp_access_check(s)) {
11818 return;
11821 if (size == 0) {
11822 /* Special case bytes, use bswap op on each group of elements */
11823 int groups = dsize / (8 << grp_size);
11825 for (i = 0; i < groups; i++) {
11826 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
11828 read_vec_element(s, tcg_tmp, rn, i, grp_size);
11829 switch (grp_size) {
11830 case MO_16:
11831 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
11832 break;
11833 case MO_32:
11834 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
11835 break;
11836 case MO_64:
11837 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
11838 break;
11839 default:
11840 g_assert_not_reached();
11842 write_vec_element(s, tcg_tmp, rd, i, grp_size);
11843 tcg_temp_free_i64(tcg_tmp);
11845 clear_vec_high(s, is_q, rd);
11846 } else {
11847 int revmask = (1 << grp_size) - 1;
11848 int esize = 8 << size;
11849 int elements = dsize / esize;
11850 TCGv_i64 tcg_rn = tcg_temp_new_i64();
11851 TCGv_i64 tcg_rd = tcg_const_i64(0);
11852 TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
11854 for (i = 0; i < elements; i++) {
11855 int e_rev = (i & 0xf) ^ revmask;
11856 int off = e_rev * esize;
11857 read_vec_element(s, tcg_rn, rn, i, size);
11858 if (off >= 64) {
11859 tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
11860 tcg_rn, off - 64, esize);
11861 } else {
11862 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
11865 write_vec_element(s, tcg_rd, rd, 0, MO_64);
11866 write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
11868 tcg_temp_free_i64(tcg_rd_hi);
11869 tcg_temp_free_i64(tcg_rd);
11870 tcg_temp_free_i64(tcg_rn);
11874 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
11875 bool is_q, int size, int rn, int rd)
11877 /* Implement the pairwise operations from 2-misc:
11878 * SADDLP, UADDLP, SADALP, UADALP.
11879 * These all add pairs of elements in the input to produce a
11880 * double-width result element in the output (possibly accumulating).
11882 bool accum = (opcode == 0x6);
11883 int maxpass = is_q ? 2 : 1;
11884 int pass;
11885 TCGv_i64 tcg_res[2];
11887 if (size == 2) {
11888 /* 32 + 32 -> 64 op */
11889 MemOp memop = size + (u ? 0 : MO_SIGN);
11891 for (pass = 0; pass < maxpass; pass++) {
11892 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11893 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11895 tcg_res[pass] = tcg_temp_new_i64();
11897 read_vec_element(s, tcg_op1, rn, pass * 2, memop);
11898 read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
11899 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
11900 if (accum) {
11901 read_vec_element(s, tcg_op1, rd, pass, MO_64);
11902 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
11905 tcg_temp_free_i64(tcg_op1);
11906 tcg_temp_free_i64(tcg_op2);
11908 } else {
11909 for (pass = 0; pass < maxpass; pass++) {
11910 TCGv_i64 tcg_op = tcg_temp_new_i64();
11911 NeonGenOneOpFn *genfn;
11912 static NeonGenOneOpFn * const fns[2][2] = {
11913 { gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 },
11914 { gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 },
11917 genfn = fns[size][u];
11919 tcg_res[pass] = tcg_temp_new_i64();
11921 read_vec_element(s, tcg_op, rn, pass, MO_64);
11922 genfn(tcg_res[pass], tcg_op);
11924 if (accum) {
11925 read_vec_element(s, tcg_op, rd, pass, MO_64);
11926 if (size == 0) {
11927 gen_helper_neon_addl_u16(tcg_res[pass],
11928 tcg_res[pass], tcg_op);
11929 } else {
11930 gen_helper_neon_addl_u32(tcg_res[pass],
11931 tcg_res[pass], tcg_op);
11934 tcg_temp_free_i64(tcg_op);
11937 if (!is_q) {
11938 tcg_res[1] = tcg_const_i64(0);
11940 for (pass = 0; pass < 2; pass++) {
11941 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11942 tcg_temp_free_i64(tcg_res[pass]);
11946 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
11948 /* Implement SHLL and SHLL2 */
11949 int pass;
11950 int part = is_q ? 2 : 0;
11951 TCGv_i64 tcg_res[2];
11953 for (pass = 0; pass < 2; pass++) {
11954 static NeonGenWidenFn * const widenfns[3] = {
11955 gen_helper_neon_widen_u8,
11956 gen_helper_neon_widen_u16,
11957 tcg_gen_extu_i32_i64,
11959 NeonGenWidenFn *widenfn = widenfns[size];
11960 TCGv_i32 tcg_op = tcg_temp_new_i32();
11962 read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
11963 tcg_res[pass] = tcg_temp_new_i64();
11964 widenfn(tcg_res[pass], tcg_op);
11965 tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
11967 tcg_temp_free_i32(tcg_op);
11970 for (pass = 0; pass < 2; pass++) {
11971 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11972 tcg_temp_free_i64(tcg_res[pass]);
11976 /* AdvSIMD two reg misc
11977 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
11978 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11979 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd |
11980 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11982 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
11984 int size = extract32(insn, 22, 2);
11985 int opcode = extract32(insn, 12, 5);
11986 bool u = extract32(insn, 29, 1);
11987 bool is_q = extract32(insn, 30, 1);
11988 int rn = extract32(insn, 5, 5);
11989 int rd = extract32(insn, 0, 5);
11990 bool need_fpstatus = false;
11991 bool need_rmode = false;
11992 int rmode = -1;
11993 TCGv_i32 tcg_rmode;
11994 TCGv_ptr tcg_fpstatus;
11996 switch (opcode) {
11997 case 0x0: /* REV64, REV32 */
11998 case 0x1: /* REV16 */
11999 handle_rev(s, opcode, u, is_q, size, rn, rd);
12000 return;
12001 case 0x5: /* CNT, NOT, RBIT */
12002 if (u && size == 0) {
12003 /* NOT */
12004 break;
12005 } else if (u && size == 1) {
12006 /* RBIT */
12007 break;
12008 } else if (!u && size == 0) {
12009 /* CNT */
12010 break;
12012 unallocated_encoding(s);
12013 return;
12014 case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
12015 case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
12016 if (size == 3) {
12017 unallocated_encoding(s);
12018 return;
12020 if (!fp_access_check(s)) {
12021 return;
12024 handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
12025 return;
12026 case 0x4: /* CLS, CLZ */
12027 if (size == 3) {
12028 unallocated_encoding(s);
12029 return;
12031 break;
12032 case 0x2: /* SADDLP, UADDLP */
12033 case 0x6: /* SADALP, UADALP */
12034 if (size == 3) {
12035 unallocated_encoding(s);
12036 return;
12038 if (!fp_access_check(s)) {
12039 return;
12041 handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
12042 return;
12043 case 0x13: /* SHLL, SHLL2 */
12044 if (u == 0 || size == 3) {
12045 unallocated_encoding(s);
12046 return;
12048 if (!fp_access_check(s)) {
12049 return;
12051 handle_shll(s, is_q, size, rn, rd);
12052 return;
12053 case 0xa: /* CMLT */
12054 if (u == 1) {
12055 unallocated_encoding(s);
12056 return;
12058 /* fall through */
12059 case 0x8: /* CMGT, CMGE */
12060 case 0x9: /* CMEQ, CMLE */
12061 case 0xb: /* ABS, NEG */
12062 if (size == 3 && !is_q) {
12063 unallocated_encoding(s);
12064 return;
12066 break;
12067 case 0x3: /* SUQADD, USQADD */
12068 if (size == 3 && !is_q) {
12069 unallocated_encoding(s);
12070 return;
12072 if (!fp_access_check(s)) {
12073 return;
12075 handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
12076 return;
12077 case 0x7: /* SQABS, SQNEG */
12078 if (size == 3 && !is_q) {
12079 unallocated_encoding(s);
12080 return;
12082 break;
12083 case 0xc ... 0xf:
12084 case 0x16 ... 0x1f:
12086 /* Floating point: U, size[1] and opcode indicate operation;
12087 * size[0] indicates single or double precision.
12089 int is_double = extract32(size, 0, 1);
12090 opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
12091 size = is_double ? 3 : 2;
12092 switch (opcode) {
12093 case 0x2f: /* FABS */
12094 case 0x6f: /* FNEG */
12095 if (size == 3 && !is_q) {
12096 unallocated_encoding(s);
12097 return;
12099 break;
12100 case 0x1d: /* SCVTF */
12101 case 0x5d: /* UCVTF */
12103 bool is_signed = (opcode == 0x1d) ? true : false;
12104 int elements = is_double ? 2 : is_q ? 4 : 2;
12105 if (is_double && !is_q) {
12106 unallocated_encoding(s);
12107 return;
12109 if (!fp_access_check(s)) {
12110 return;
12112 handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
12113 return;
12115 case 0x2c: /* FCMGT (zero) */
12116 case 0x2d: /* FCMEQ (zero) */
12117 case 0x2e: /* FCMLT (zero) */
12118 case 0x6c: /* FCMGE (zero) */
12119 case 0x6d: /* FCMLE (zero) */
12120 if (size == 3 && !is_q) {
12121 unallocated_encoding(s);
12122 return;
12124 handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
12125 return;
12126 case 0x7f: /* FSQRT */
12127 if (size == 3 && !is_q) {
12128 unallocated_encoding(s);
12129 return;
12131 break;
12132 case 0x1a: /* FCVTNS */
12133 case 0x1b: /* FCVTMS */
12134 case 0x3a: /* FCVTPS */
12135 case 0x3b: /* FCVTZS */
12136 case 0x5a: /* FCVTNU */
12137 case 0x5b: /* FCVTMU */
12138 case 0x7a: /* FCVTPU */
12139 case 0x7b: /* FCVTZU */
12140 need_fpstatus = true;
12141 need_rmode = true;
12142 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12143 if (size == 3 && !is_q) {
12144 unallocated_encoding(s);
12145 return;
12147 break;
12148 case 0x5c: /* FCVTAU */
12149 case 0x1c: /* FCVTAS */
12150 need_fpstatus = true;
12151 need_rmode = true;
12152 rmode = FPROUNDING_TIEAWAY;
12153 if (size == 3 && !is_q) {
12154 unallocated_encoding(s);
12155 return;
12157 break;
12158 case 0x3c: /* URECPE */
12159 if (size == 3) {
12160 unallocated_encoding(s);
12161 return;
12163 /* fall through */
12164 case 0x3d: /* FRECPE */
12165 case 0x7d: /* FRSQRTE */
12166 if (size == 3 && !is_q) {
12167 unallocated_encoding(s);
12168 return;
12170 if (!fp_access_check(s)) {
12171 return;
12173 handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
12174 return;
12175 case 0x56: /* FCVTXN, FCVTXN2 */
12176 if (size == 2) {
12177 unallocated_encoding(s);
12178 return;
12180 /* fall through */
12181 case 0x16: /* FCVTN, FCVTN2 */
12182 /* handle_2misc_narrow does a 2*size -> size operation, but these
12183 * instructions encode the source size rather than dest size.
12185 if (!fp_access_check(s)) {
12186 return;
12188 handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12189 return;
12190 case 0x17: /* FCVTL, FCVTL2 */
12191 if (!fp_access_check(s)) {
12192 return;
12194 handle_2misc_widening(s, opcode, is_q, size, rn, rd);
12195 return;
12196 case 0x18: /* FRINTN */
12197 case 0x19: /* FRINTM */
12198 case 0x38: /* FRINTP */
12199 case 0x39: /* FRINTZ */
12200 need_rmode = true;
12201 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12202 /* fall through */
12203 case 0x59: /* FRINTX */
12204 case 0x79: /* FRINTI */
12205 need_fpstatus = true;
12206 if (size == 3 && !is_q) {
12207 unallocated_encoding(s);
12208 return;
12210 break;
12211 case 0x58: /* FRINTA */
12212 need_rmode = true;
12213 rmode = FPROUNDING_TIEAWAY;
12214 need_fpstatus = true;
12215 if (size == 3 && !is_q) {
12216 unallocated_encoding(s);
12217 return;
12219 break;
12220 case 0x7c: /* URSQRTE */
12221 if (size == 3) {
12222 unallocated_encoding(s);
12223 return;
12225 break;
12226 case 0x1e: /* FRINT32Z */
12227 case 0x1f: /* FRINT64Z */
12228 need_rmode = true;
12229 rmode = FPROUNDING_ZERO;
12230 /* fall through */
12231 case 0x5e: /* FRINT32X */
12232 case 0x5f: /* FRINT64X */
12233 need_fpstatus = true;
12234 if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) {
12235 unallocated_encoding(s);
12236 return;
12238 break;
12239 default:
12240 unallocated_encoding(s);
12241 return;
12243 break;
12245 default:
12246 unallocated_encoding(s);
12247 return;
12250 if (!fp_access_check(s)) {
12251 return;
12254 if (need_fpstatus || need_rmode) {
12255 tcg_fpstatus = get_fpstatus_ptr(false);
12256 } else {
12257 tcg_fpstatus = NULL;
12259 if (need_rmode) {
12260 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
12261 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12262 } else {
12263 tcg_rmode = NULL;
12266 switch (opcode) {
12267 case 0x5:
12268 if (u && size == 0) { /* NOT */
12269 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
12270 return;
12272 break;
12273 case 0x8: /* CMGT, CMGE */
12274 if (u) {
12275 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size);
12276 } else {
12277 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size);
12279 return;
12280 case 0x9: /* CMEQ, CMLE */
12281 if (u) {
12282 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size);
12283 } else {
12284 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size);
12286 return;
12287 case 0xa: /* CMLT */
12288 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size);
12289 return;
12290 case 0xb:
12291 if (u) { /* ABS, NEG */
12292 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
12293 } else {
12294 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
12296 return;
12299 if (size == 3) {
12300 /* All 64-bit element operations can be shared with scalar 2misc */
12301 int pass;
12303 /* Coverity claims (size == 3 && !is_q) has been eliminated
12304 * from all paths leading to here.
12306 tcg_debug_assert(is_q);
12307 for (pass = 0; pass < 2; pass++) {
12308 TCGv_i64 tcg_op = tcg_temp_new_i64();
12309 TCGv_i64 tcg_res = tcg_temp_new_i64();
12311 read_vec_element(s, tcg_op, rn, pass, MO_64);
12313 handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
12314 tcg_rmode, tcg_fpstatus);
12316 write_vec_element(s, tcg_res, rd, pass, MO_64);
12318 tcg_temp_free_i64(tcg_res);
12319 tcg_temp_free_i64(tcg_op);
12321 } else {
12322 int pass;
12324 for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
12325 TCGv_i32 tcg_op = tcg_temp_new_i32();
12326 TCGv_i32 tcg_res = tcg_temp_new_i32();
12328 read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
12330 if (size == 2) {
12331 /* Special cases for 32 bit elements */
12332 switch (opcode) {
12333 case 0x4: /* CLS */
12334 if (u) {
12335 tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
12336 } else {
12337 tcg_gen_clrsb_i32(tcg_res, tcg_op);
12339 break;
12340 case 0x7: /* SQABS, SQNEG */
12341 if (u) {
12342 gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
12343 } else {
12344 gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
12346 break;
12347 case 0x2f: /* FABS */
12348 gen_helper_vfp_abss(tcg_res, tcg_op);
12349 break;
12350 case 0x6f: /* FNEG */
12351 gen_helper_vfp_negs(tcg_res, tcg_op);
12352 break;
12353 case 0x7f: /* FSQRT */
12354 gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
12355 break;
12356 case 0x1a: /* FCVTNS */
12357 case 0x1b: /* FCVTMS */
12358 case 0x1c: /* FCVTAS */
12359 case 0x3a: /* FCVTPS */
12360 case 0x3b: /* FCVTZS */
12362 TCGv_i32 tcg_shift = tcg_const_i32(0);
12363 gen_helper_vfp_tosls(tcg_res, tcg_op,
12364 tcg_shift, tcg_fpstatus);
12365 tcg_temp_free_i32(tcg_shift);
12366 break;
12368 case 0x5a: /* FCVTNU */
12369 case 0x5b: /* FCVTMU */
12370 case 0x5c: /* FCVTAU */
12371 case 0x7a: /* FCVTPU */
12372 case 0x7b: /* FCVTZU */
12374 TCGv_i32 tcg_shift = tcg_const_i32(0);
12375 gen_helper_vfp_touls(tcg_res, tcg_op,
12376 tcg_shift, tcg_fpstatus);
12377 tcg_temp_free_i32(tcg_shift);
12378 break;
12380 case 0x18: /* FRINTN */
12381 case 0x19: /* FRINTM */
12382 case 0x38: /* FRINTP */
12383 case 0x39: /* FRINTZ */
12384 case 0x58: /* FRINTA */
12385 case 0x79: /* FRINTI */
12386 gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
12387 break;
12388 case 0x59: /* FRINTX */
12389 gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
12390 break;
12391 case 0x7c: /* URSQRTE */
12392 gen_helper_rsqrte_u32(tcg_res, tcg_op);
12393 break;
12394 case 0x1e: /* FRINT32Z */
12395 case 0x5e: /* FRINT32X */
12396 gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus);
12397 break;
12398 case 0x1f: /* FRINT64Z */
12399 case 0x5f: /* FRINT64X */
12400 gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus);
12401 break;
12402 default:
12403 g_assert_not_reached();
12405 } else {
12406 /* Use helpers for 8 and 16 bit elements */
12407 switch (opcode) {
12408 case 0x5: /* CNT, RBIT */
12409 /* For these two insns size is part of the opcode specifier
12410 * (handled earlier); they always operate on byte elements.
12412 if (u) {
12413 gen_helper_neon_rbit_u8(tcg_res, tcg_op);
12414 } else {
12415 gen_helper_neon_cnt_u8(tcg_res, tcg_op);
12417 break;
12418 case 0x7: /* SQABS, SQNEG */
12420 NeonGenOneOpEnvFn *genfn;
12421 static NeonGenOneOpEnvFn * const fns[2][2] = {
12422 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
12423 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
12425 genfn = fns[size][u];
12426 genfn(tcg_res, cpu_env, tcg_op);
12427 break;
12429 case 0x4: /* CLS, CLZ */
12430 if (u) {
12431 if (size == 0) {
12432 gen_helper_neon_clz_u8(tcg_res, tcg_op);
12433 } else {
12434 gen_helper_neon_clz_u16(tcg_res, tcg_op);
12436 } else {
12437 if (size == 0) {
12438 gen_helper_neon_cls_s8(tcg_res, tcg_op);
12439 } else {
12440 gen_helper_neon_cls_s16(tcg_res, tcg_op);
12443 break;
12444 default:
12445 g_assert_not_reached();
12449 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
12451 tcg_temp_free_i32(tcg_res);
12452 tcg_temp_free_i32(tcg_op);
12455 clear_vec_high(s, is_q, rd);
12457 if (need_rmode) {
12458 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12459 tcg_temp_free_i32(tcg_rmode);
12461 if (need_fpstatus) {
12462 tcg_temp_free_ptr(tcg_fpstatus);
12466 /* AdvSIMD [scalar] two register miscellaneous (FP16)
12468 * 31 30 29 28 27 24 23 22 21 17 16 12 11 10 9 5 4 0
12469 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12470 * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 | Rn | Rd |
12471 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12472 * mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
12473 * val: 0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
12475 * This actually covers two groups where scalar access is governed by
12476 * bit 28. A bunch of the instructions (float to integral) only exist
12477 * in the vector form and are un-allocated for the scalar decode. Also
12478 * in the scalar decode Q is always 1.
12480 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
12482 int fpop, opcode, a, u;
12483 int rn, rd;
12484 bool is_q;
12485 bool is_scalar;
12486 bool only_in_vector = false;
12488 int pass;
12489 TCGv_i32 tcg_rmode = NULL;
12490 TCGv_ptr tcg_fpstatus = NULL;
12491 bool need_rmode = false;
12492 bool need_fpst = true;
12493 int rmode;
12495 if (!dc_isar_feature(aa64_fp16, s)) {
12496 unallocated_encoding(s);
12497 return;
12500 rd = extract32(insn, 0, 5);
12501 rn = extract32(insn, 5, 5);
12503 a = extract32(insn, 23, 1);
12504 u = extract32(insn, 29, 1);
12505 is_scalar = extract32(insn, 28, 1);
12506 is_q = extract32(insn, 30, 1);
12508 opcode = extract32(insn, 12, 5);
12509 fpop = deposit32(opcode, 5, 1, a);
12510 fpop = deposit32(fpop, 6, 1, u);
12512 rd = extract32(insn, 0, 5);
12513 rn = extract32(insn, 5, 5);
12515 switch (fpop) {
12516 case 0x1d: /* SCVTF */
12517 case 0x5d: /* UCVTF */
12519 int elements;
12521 if (is_scalar) {
12522 elements = 1;
12523 } else {
12524 elements = (is_q ? 8 : 4);
12527 if (!fp_access_check(s)) {
12528 return;
12530 handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
12531 return;
12533 break;
12534 case 0x2c: /* FCMGT (zero) */
12535 case 0x2d: /* FCMEQ (zero) */
12536 case 0x2e: /* FCMLT (zero) */
12537 case 0x6c: /* FCMGE (zero) */
12538 case 0x6d: /* FCMLE (zero) */
12539 handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
12540 return;
12541 case 0x3d: /* FRECPE */
12542 case 0x3f: /* FRECPX */
12543 break;
12544 case 0x18: /* FRINTN */
12545 need_rmode = true;
12546 only_in_vector = true;
12547 rmode = FPROUNDING_TIEEVEN;
12548 break;
12549 case 0x19: /* FRINTM */
12550 need_rmode = true;
12551 only_in_vector = true;
12552 rmode = FPROUNDING_NEGINF;
12553 break;
12554 case 0x38: /* FRINTP */
12555 need_rmode = true;
12556 only_in_vector = true;
12557 rmode = FPROUNDING_POSINF;
12558 break;
12559 case 0x39: /* FRINTZ */
12560 need_rmode = true;
12561 only_in_vector = true;
12562 rmode = FPROUNDING_ZERO;
12563 break;
12564 case 0x58: /* FRINTA */
12565 need_rmode = true;
12566 only_in_vector = true;
12567 rmode = FPROUNDING_TIEAWAY;
12568 break;
12569 case 0x59: /* FRINTX */
12570 case 0x79: /* FRINTI */
12571 only_in_vector = true;
12572 /* current rounding mode */
12573 break;
12574 case 0x1a: /* FCVTNS */
12575 need_rmode = true;
12576 rmode = FPROUNDING_TIEEVEN;
12577 break;
12578 case 0x1b: /* FCVTMS */
12579 need_rmode = true;
12580 rmode = FPROUNDING_NEGINF;
12581 break;
12582 case 0x1c: /* FCVTAS */
12583 need_rmode = true;
12584 rmode = FPROUNDING_TIEAWAY;
12585 break;
12586 case 0x3a: /* FCVTPS */
12587 need_rmode = true;
12588 rmode = FPROUNDING_POSINF;
12589 break;
12590 case 0x3b: /* FCVTZS */
12591 need_rmode = true;
12592 rmode = FPROUNDING_ZERO;
12593 break;
12594 case 0x5a: /* FCVTNU */
12595 need_rmode = true;
12596 rmode = FPROUNDING_TIEEVEN;
12597 break;
12598 case 0x5b: /* FCVTMU */
12599 need_rmode = true;
12600 rmode = FPROUNDING_NEGINF;
12601 break;
12602 case 0x5c: /* FCVTAU */
12603 need_rmode = true;
12604 rmode = FPROUNDING_TIEAWAY;
12605 break;
12606 case 0x7a: /* FCVTPU */
12607 need_rmode = true;
12608 rmode = FPROUNDING_POSINF;
12609 break;
12610 case 0x7b: /* FCVTZU */
12611 need_rmode = true;
12612 rmode = FPROUNDING_ZERO;
12613 break;
12614 case 0x2f: /* FABS */
12615 case 0x6f: /* FNEG */
12616 need_fpst = false;
12617 break;
12618 case 0x7d: /* FRSQRTE */
12619 case 0x7f: /* FSQRT (vector) */
12620 break;
12621 default:
12622 fprintf(stderr, "%s: insn %#04x fpop %#2x\n", __func__, insn, fpop);
12623 g_assert_not_reached();
12627 /* Check additional constraints for the scalar encoding */
12628 if (is_scalar) {
12629 if (!is_q) {
12630 unallocated_encoding(s);
12631 return;
12633 /* FRINTxx is only in the vector form */
12634 if (only_in_vector) {
12635 unallocated_encoding(s);
12636 return;
12640 if (!fp_access_check(s)) {
12641 return;
12644 if (need_rmode || need_fpst) {
12645 tcg_fpstatus = get_fpstatus_ptr(true);
12648 if (need_rmode) {
12649 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
12650 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12653 if (is_scalar) {
12654 TCGv_i32 tcg_op = read_fp_hreg(s, rn);
12655 TCGv_i32 tcg_res = tcg_temp_new_i32();
12657 switch (fpop) {
12658 case 0x1a: /* FCVTNS */
12659 case 0x1b: /* FCVTMS */
12660 case 0x1c: /* FCVTAS */
12661 case 0x3a: /* FCVTPS */
12662 case 0x3b: /* FCVTZS */
12663 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12664 break;
12665 case 0x3d: /* FRECPE */
12666 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12667 break;
12668 case 0x3f: /* FRECPX */
12669 gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
12670 break;
12671 case 0x5a: /* FCVTNU */
12672 case 0x5b: /* FCVTMU */
12673 case 0x5c: /* FCVTAU */
12674 case 0x7a: /* FCVTPU */
12675 case 0x7b: /* FCVTZU */
12676 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12677 break;
12678 case 0x6f: /* FNEG */
12679 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12680 break;
12681 case 0x7d: /* FRSQRTE */
12682 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12683 break;
12684 default:
12685 g_assert_not_reached();
12688 /* limit any sign extension going on */
12689 tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
12690 write_fp_sreg(s, rd, tcg_res);
12692 tcg_temp_free_i32(tcg_res);
12693 tcg_temp_free_i32(tcg_op);
12694 } else {
12695 for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
12696 TCGv_i32 tcg_op = tcg_temp_new_i32();
12697 TCGv_i32 tcg_res = tcg_temp_new_i32();
12699 read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
12701 switch (fpop) {
12702 case 0x1a: /* FCVTNS */
12703 case 0x1b: /* FCVTMS */
12704 case 0x1c: /* FCVTAS */
12705 case 0x3a: /* FCVTPS */
12706 case 0x3b: /* FCVTZS */
12707 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12708 break;
12709 case 0x3d: /* FRECPE */
12710 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12711 break;
12712 case 0x5a: /* FCVTNU */
12713 case 0x5b: /* FCVTMU */
12714 case 0x5c: /* FCVTAU */
12715 case 0x7a: /* FCVTPU */
12716 case 0x7b: /* FCVTZU */
12717 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12718 break;
12719 case 0x18: /* FRINTN */
12720 case 0x19: /* FRINTM */
12721 case 0x38: /* FRINTP */
12722 case 0x39: /* FRINTZ */
12723 case 0x58: /* FRINTA */
12724 case 0x79: /* FRINTI */
12725 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
12726 break;
12727 case 0x59: /* FRINTX */
12728 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
12729 break;
12730 case 0x2f: /* FABS */
12731 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
12732 break;
12733 case 0x6f: /* FNEG */
12734 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12735 break;
12736 case 0x7d: /* FRSQRTE */
12737 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12738 break;
12739 case 0x7f: /* FSQRT */
12740 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
12741 break;
12742 default:
12743 g_assert_not_reached();
12746 write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
12748 tcg_temp_free_i32(tcg_res);
12749 tcg_temp_free_i32(tcg_op);
12752 clear_vec_high(s, is_q, rd);
12755 if (tcg_rmode) {
12756 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12757 tcg_temp_free_i32(tcg_rmode);
12760 if (tcg_fpstatus) {
12761 tcg_temp_free_ptr(tcg_fpstatus);
12765 /* AdvSIMD scalar x indexed element
12766 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
12767 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12768 * | 0 1 | U | 1 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd |
12769 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12770 * AdvSIMD vector x indexed element
12771 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
12772 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12773 * | 0 | Q | U | 0 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd |
12774 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12776 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
12778 /* This encoding has two kinds of instruction:
12779 * normal, where we perform elt x idxelt => elt for each
12780 * element in the vector
12781 * long, where we perform elt x idxelt and generate a result of
12782 * double the width of the input element
12783 * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
12785 bool is_scalar = extract32(insn, 28, 1);
12786 bool is_q = extract32(insn, 30, 1);
12787 bool u = extract32(insn, 29, 1);
12788 int size = extract32(insn, 22, 2);
12789 int l = extract32(insn, 21, 1);
12790 int m = extract32(insn, 20, 1);
12791 /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
12792 int rm = extract32(insn, 16, 4);
12793 int opcode = extract32(insn, 12, 4);
12794 int h = extract32(insn, 11, 1);
12795 int rn = extract32(insn, 5, 5);
12796 int rd = extract32(insn, 0, 5);
12797 bool is_long = false;
12798 int is_fp = 0;
12799 bool is_fp16 = false;
12800 int index;
12801 TCGv_ptr fpst;
12803 switch (16 * u + opcode) {
12804 case 0x08: /* MUL */
12805 case 0x10: /* MLA */
12806 case 0x14: /* MLS */
12807 if (is_scalar) {
12808 unallocated_encoding(s);
12809 return;
12811 break;
12812 case 0x02: /* SMLAL, SMLAL2 */
12813 case 0x12: /* UMLAL, UMLAL2 */
12814 case 0x06: /* SMLSL, SMLSL2 */
12815 case 0x16: /* UMLSL, UMLSL2 */
12816 case 0x0a: /* SMULL, SMULL2 */
12817 case 0x1a: /* UMULL, UMULL2 */
12818 if (is_scalar) {
12819 unallocated_encoding(s);
12820 return;
12822 is_long = true;
12823 break;
12824 case 0x03: /* SQDMLAL, SQDMLAL2 */
12825 case 0x07: /* SQDMLSL, SQDMLSL2 */
12826 case 0x0b: /* SQDMULL, SQDMULL2 */
12827 is_long = true;
12828 break;
12829 case 0x0c: /* SQDMULH */
12830 case 0x0d: /* SQRDMULH */
12831 break;
12832 case 0x01: /* FMLA */
12833 case 0x05: /* FMLS */
12834 case 0x09: /* FMUL */
12835 case 0x19: /* FMULX */
12836 is_fp = 1;
12837 break;
12838 case 0x1d: /* SQRDMLAH */
12839 case 0x1f: /* SQRDMLSH */
12840 if (!dc_isar_feature(aa64_rdm, s)) {
12841 unallocated_encoding(s);
12842 return;
12844 break;
12845 case 0x0e: /* SDOT */
12846 case 0x1e: /* UDOT */
12847 if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) {
12848 unallocated_encoding(s);
12849 return;
12851 break;
12852 case 0x11: /* FCMLA #0 */
12853 case 0x13: /* FCMLA #90 */
12854 case 0x15: /* FCMLA #180 */
12855 case 0x17: /* FCMLA #270 */
12856 if (is_scalar || !dc_isar_feature(aa64_fcma, s)) {
12857 unallocated_encoding(s);
12858 return;
12860 is_fp = 2;
12861 break;
12862 case 0x00: /* FMLAL */
12863 case 0x04: /* FMLSL */
12864 case 0x18: /* FMLAL2 */
12865 case 0x1c: /* FMLSL2 */
12866 if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) {
12867 unallocated_encoding(s);
12868 return;
12870 size = MO_16;
12871 /* is_fp, but we pass cpu_env not fp_status. */
12872 break;
12873 default:
12874 unallocated_encoding(s);
12875 return;
12878 switch (is_fp) {
12879 case 1: /* normal fp */
12880 /* convert insn encoded size to MemOp size */
12881 switch (size) {
12882 case 0: /* half-precision */
12883 size = MO_16;
12884 is_fp16 = true;
12885 break;
12886 case MO_32: /* single precision */
12887 case MO_64: /* double precision */
12888 break;
12889 default:
12890 unallocated_encoding(s);
12891 return;
12893 break;
12895 case 2: /* complex fp */
12896 /* Each indexable element is a complex pair. */
12897 size += 1;
12898 switch (size) {
12899 case MO_32:
12900 if (h && !is_q) {
12901 unallocated_encoding(s);
12902 return;
12904 is_fp16 = true;
12905 break;
12906 case MO_64:
12907 break;
12908 default:
12909 unallocated_encoding(s);
12910 return;
12912 break;
12914 default: /* integer */
12915 switch (size) {
12916 case MO_8:
12917 case MO_64:
12918 unallocated_encoding(s);
12919 return;
12921 break;
12923 if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) {
12924 unallocated_encoding(s);
12925 return;
12928 /* Given MemOp size, adjust register and indexing. */
12929 switch (size) {
12930 case MO_16:
12931 index = h << 2 | l << 1 | m;
12932 break;
12933 case MO_32:
12934 index = h << 1 | l;
12935 rm |= m << 4;
12936 break;
12937 case MO_64:
12938 if (l || !is_q) {
12939 unallocated_encoding(s);
12940 return;
12942 index = h;
12943 rm |= m << 4;
12944 break;
12945 default:
12946 g_assert_not_reached();
12949 if (!fp_access_check(s)) {
12950 return;
12953 if (is_fp) {
12954 fpst = get_fpstatus_ptr(is_fp16);
12955 } else {
12956 fpst = NULL;
12959 switch (16 * u + opcode) {
12960 case 0x0e: /* SDOT */
12961 case 0x1e: /* UDOT */
12962 gen_gvec_op3_ool(s, is_q, rd, rn, rm, index,
12963 u ? gen_helper_gvec_udot_idx_b
12964 : gen_helper_gvec_sdot_idx_b);
12965 return;
12966 case 0x11: /* FCMLA #0 */
12967 case 0x13: /* FCMLA #90 */
12968 case 0x15: /* FCMLA #180 */
12969 case 0x17: /* FCMLA #270 */
12971 int rot = extract32(insn, 13, 2);
12972 int data = (index << 2) | rot;
12973 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
12974 vec_full_reg_offset(s, rn),
12975 vec_full_reg_offset(s, rm), fpst,
12976 is_q ? 16 : 8, vec_full_reg_size(s), data,
12977 size == MO_64
12978 ? gen_helper_gvec_fcmlas_idx
12979 : gen_helper_gvec_fcmlah_idx);
12980 tcg_temp_free_ptr(fpst);
12982 return;
12984 case 0x00: /* FMLAL */
12985 case 0x04: /* FMLSL */
12986 case 0x18: /* FMLAL2 */
12987 case 0x1c: /* FMLSL2 */
12989 int is_s = extract32(opcode, 2, 1);
12990 int is_2 = u;
12991 int data = (index << 2) | (is_2 << 1) | is_s;
12992 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
12993 vec_full_reg_offset(s, rn),
12994 vec_full_reg_offset(s, rm), cpu_env,
12995 is_q ? 16 : 8, vec_full_reg_size(s),
12996 data, gen_helper_gvec_fmlal_idx_a64);
12998 return;
13001 if (size == 3) {
13002 TCGv_i64 tcg_idx = tcg_temp_new_i64();
13003 int pass;
13005 assert(is_fp && is_q && !is_long);
13007 read_vec_element(s, tcg_idx, rm, index, MO_64);
13009 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13010 TCGv_i64 tcg_op = tcg_temp_new_i64();
13011 TCGv_i64 tcg_res = tcg_temp_new_i64();
13013 read_vec_element(s, tcg_op, rn, pass, MO_64);
13015 switch (16 * u + opcode) {
13016 case 0x05: /* FMLS */
13017 /* As usual for ARM, separate negation for fused multiply-add */
13018 gen_helper_vfp_negd(tcg_op, tcg_op);
13019 /* fall through */
13020 case 0x01: /* FMLA */
13021 read_vec_element(s, tcg_res, rd, pass, MO_64);
13022 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
13023 break;
13024 case 0x09: /* FMUL */
13025 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
13026 break;
13027 case 0x19: /* FMULX */
13028 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
13029 break;
13030 default:
13031 g_assert_not_reached();
13034 write_vec_element(s, tcg_res, rd, pass, MO_64);
13035 tcg_temp_free_i64(tcg_op);
13036 tcg_temp_free_i64(tcg_res);
13039 tcg_temp_free_i64(tcg_idx);
13040 clear_vec_high(s, !is_scalar, rd);
13041 } else if (!is_long) {
13042 /* 32 bit floating point, or 16 or 32 bit integer.
13043 * For the 16 bit scalar case we use the usual Neon helpers and
13044 * rely on the fact that 0 op 0 == 0 with no side effects.
13046 TCGv_i32 tcg_idx = tcg_temp_new_i32();
13047 int pass, maxpasses;
13049 if (is_scalar) {
13050 maxpasses = 1;
13051 } else {
13052 maxpasses = is_q ? 4 : 2;
13055 read_vec_element_i32(s, tcg_idx, rm, index, size);
13057 if (size == 1 && !is_scalar) {
13058 /* The simplest way to handle the 16x16 indexed ops is to duplicate
13059 * the index into both halves of the 32 bit tcg_idx and then use
13060 * the usual Neon helpers.
13062 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13065 for (pass = 0; pass < maxpasses; pass++) {
13066 TCGv_i32 tcg_op = tcg_temp_new_i32();
13067 TCGv_i32 tcg_res = tcg_temp_new_i32();
13069 read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
13071 switch (16 * u + opcode) {
13072 case 0x08: /* MUL */
13073 case 0x10: /* MLA */
13074 case 0x14: /* MLS */
13076 static NeonGenTwoOpFn * const fns[2][2] = {
13077 { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
13078 { tcg_gen_add_i32, tcg_gen_sub_i32 },
13080 NeonGenTwoOpFn *genfn;
13081 bool is_sub = opcode == 0x4;
13083 if (size == 1) {
13084 gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
13085 } else {
13086 tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
13088 if (opcode == 0x8) {
13089 break;
13091 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
13092 genfn = fns[size - 1][is_sub];
13093 genfn(tcg_res, tcg_op, tcg_res);
13094 break;
13096 case 0x05: /* FMLS */
13097 case 0x01: /* FMLA */
13098 read_vec_element_i32(s, tcg_res, rd, pass,
13099 is_scalar ? size : MO_32);
13100 switch (size) {
13101 case 1:
13102 if (opcode == 0x5) {
13103 /* As usual for ARM, separate negation for fused
13104 * multiply-add */
13105 tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
13107 if (is_scalar) {
13108 gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
13109 tcg_res, fpst);
13110 } else {
13111 gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
13112 tcg_res, fpst);
13114 break;
13115 case 2:
13116 if (opcode == 0x5) {
13117 /* As usual for ARM, separate negation for
13118 * fused multiply-add */
13119 tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
13121 gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
13122 tcg_res, fpst);
13123 break;
13124 default:
13125 g_assert_not_reached();
13127 break;
13128 case 0x09: /* FMUL */
13129 switch (size) {
13130 case 1:
13131 if (is_scalar) {
13132 gen_helper_advsimd_mulh(tcg_res, tcg_op,
13133 tcg_idx, fpst);
13134 } else {
13135 gen_helper_advsimd_mul2h(tcg_res, tcg_op,
13136 tcg_idx, fpst);
13138 break;
13139 case 2:
13140 gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
13141 break;
13142 default:
13143 g_assert_not_reached();
13145 break;
13146 case 0x19: /* FMULX */
13147 switch (size) {
13148 case 1:
13149 if (is_scalar) {
13150 gen_helper_advsimd_mulxh(tcg_res, tcg_op,
13151 tcg_idx, fpst);
13152 } else {
13153 gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
13154 tcg_idx, fpst);
13156 break;
13157 case 2:
13158 gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
13159 break;
13160 default:
13161 g_assert_not_reached();
13163 break;
13164 case 0x0c: /* SQDMULH */
13165 if (size == 1) {
13166 gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
13167 tcg_op, tcg_idx);
13168 } else {
13169 gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
13170 tcg_op, tcg_idx);
13172 break;
13173 case 0x0d: /* SQRDMULH */
13174 if (size == 1) {
13175 gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
13176 tcg_op, tcg_idx);
13177 } else {
13178 gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
13179 tcg_op, tcg_idx);
13181 break;
13182 case 0x1d: /* SQRDMLAH */
13183 read_vec_element_i32(s, tcg_res, rd, pass,
13184 is_scalar ? size : MO_32);
13185 if (size == 1) {
13186 gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env,
13187 tcg_op, tcg_idx, tcg_res);
13188 } else {
13189 gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env,
13190 tcg_op, tcg_idx, tcg_res);
13192 break;
13193 case 0x1f: /* SQRDMLSH */
13194 read_vec_element_i32(s, tcg_res, rd, pass,
13195 is_scalar ? size : MO_32);
13196 if (size == 1) {
13197 gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env,
13198 tcg_op, tcg_idx, tcg_res);
13199 } else {
13200 gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env,
13201 tcg_op, tcg_idx, tcg_res);
13203 break;
13204 default:
13205 g_assert_not_reached();
13208 if (is_scalar) {
13209 write_fp_sreg(s, rd, tcg_res);
13210 } else {
13211 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
13214 tcg_temp_free_i32(tcg_op);
13215 tcg_temp_free_i32(tcg_res);
13218 tcg_temp_free_i32(tcg_idx);
13219 clear_vec_high(s, is_q, rd);
13220 } else {
13221 /* long ops: 16x16->32 or 32x32->64 */
13222 TCGv_i64 tcg_res[2];
13223 int pass;
13224 bool satop = extract32(opcode, 0, 1);
13225 MemOp memop = MO_32;
13227 if (satop || !u) {
13228 memop |= MO_SIGN;
13231 if (size == 2) {
13232 TCGv_i64 tcg_idx = tcg_temp_new_i64();
13234 read_vec_element(s, tcg_idx, rm, index, memop);
13236 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13237 TCGv_i64 tcg_op = tcg_temp_new_i64();
13238 TCGv_i64 tcg_passres;
13239 int passelt;
13241 if (is_scalar) {
13242 passelt = 0;
13243 } else {
13244 passelt = pass + (is_q * 2);
13247 read_vec_element(s, tcg_op, rn, passelt, memop);
13249 tcg_res[pass] = tcg_temp_new_i64();
13251 if (opcode == 0xa || opcode == 0xb) {
13252 /* Non-accumulating ops */
13253 tcg_passres = tcg_res[pass];
13254 } else {
13255 tcg_passres = tcg_temp_new_i64();
13258 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
13259 tcg_temp_free_i64(tcg_op);
13261 if (satop) {
13262 /* saturating, doubling */
13263 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
13264 tcg_passres, tcg_passres);
13267 if (opcode == 0xa || opcode == 0xb) {
13268 continue;
13271 /* Accumulating op: handle accumulate step */
13272 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13274 switch (opcode) {
13275 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13276 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13277 break;
13278 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13279 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13280 break;
13281 case 0x7: /* SQDMLSL, SQDMLSL2 */
13282 tcg_gen_neg_i64(tcg_passres, tcg_passres);
13283 /* fall through */
13284 case 0x3: /* SQDMLAL, SQDMLAL2 */
13285 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
13286 tcg_res[pass],
13287 tcg_passres);
13288 break;
13289 default:
13290 g_assert_not_reached();
13292 tcg_temp_free_i64(tcg_passres);
13294 tcg_temp_free_i64(tcg_idx);
13296 clear_vec_high(s, !is_scalar, rd);
13297 } else {
13298 TCGv_i32 tcg_idx = tcg_temp_new_i32();
13300 assert(size == 1);
13301 read_vec_element_i32(s, tcg_idx, rm, index, size);
13303 if (!is_scalar) {
13304 /* The simplest way to handle the 16x16 indexed ops is to
13305 * duplicate the index into both halves of the 32 bit tcg_idx
13306 * and then use the usual Neon helpers.
13308 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13311 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13312 TCGv_i32 tcg_op = tcg_temp_new_i32();
13313 TCGv_i64 tcg_passres;
13315 if (is_scalar) {
13316 read_vec_element_i32(s, tcg_op, rn, pass, size);
13317 } else {
13318 read_vec_element_i32(s, tcg_op, rn,
13319 pass + (is_q * 2), MO_32);
13322 tcg_res[pass] = tcg_temp_new_i64();
13324 if (opcode == 0xa || opcode == 0xb) {
13325 /* Non-accumulating ops */
13326 tcg_passres = tcg_res[pass];
13327 } else {
13328 tcg_passres = tcg_temp_new_i64();
13331 if (memop & MO_SIGN) {
13332 gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
13333 } else {
13334 gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
13336 if (satop) {
13337 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
13338 tcg_passres, tcg_passres);
13340 tcg_temp_free_i32(tcg_op);
13342 if (opcode == 0xa || opcode == 0xb) {
13343 continue;
13346 /* Accumulating op: handle accumulate step */
13347 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13349 switch (opcode) {
13350 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13351 gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
13352 tcg_passres);
13353 break;
13354 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13355 gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
13356 tcg_passres);
13357 break;
13358 case 0x7: /* SQDMLSL, SQDMLSL2 */
13359 gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
13360 /* fall through */
13361 case 0x3: /* SQDMLAL, SQDMLAL2 */
13362 gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
13363 tcg_res[pass],
13364 tcg_passres);
13365 break;
13366 default:
13367 g_assert_not_reached();
13369 tcg_temp_free_i64(tcg_passres);
13371 tcg_temp_free_i32(tcg_idx);
13373 if (is_scalar) {
13374 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
13378 if (is_scalar) {
13379 tcg_res[1] = tcg_const_i64(0);
13382 for (pass = 0; pass < 2; pass++) {
13383 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13384 tcg_temp_free_i64(tcg_res[pass]);
13388 if (fpst) {
13389 tcg_temp_free_ptr(fpst);
13393 /* Crypto AES
13394 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0
13395 * +-----------------+------+-----------+--------+-----+------+------+
13396 * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd |
13397 * +-----------------+------+-----------+--------+-----+------+------+
13399 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
13401 int size = extract32(insn, 22, 2);
13402 int opcode = extract32(insn, 12, 5);
13403 int rn = extract32(insn, 5, 5);
13404 int rd = extract32(insn, 0, 5);
13405 int decrypt;
13406 TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
13407 TCGv_i32 tcg_decrypt;
13408 CryptoThreeOpIntFn *genfn;
13410 if (!dc_isar_feature(aa64_aes, s) || size != 0) {
13411 unallocated_encoding(s);
13412 return;
13415 switch (opcode) {
13416 case 0x4: /* AESE */
13417 decrypt = 0;
13418 genfn = gen_helper_crypto_aese;
13419 break;
13420 case 0x6: /* AESMC */
13421 decrypt = 0;
13422 genfn = gen_helper_crypto_aesmc;
13423 break;
13424 case 0x5: /* AESD */
13425 decrypt = 1;
13426 genfn = gen_helper_crypto_aese;
13427 break;
13428 case 0x7: /* AESIMC */
13429 decrypt = 1;
13430 genfn = gen_helper_crypto_aesmc;
13431 break;
13432 default:
13433 unallocated_encoding(s);
13434 return;
13437 if (!fp_access_check(s)) {
13438 return;
13441 tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13442 tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13443 tcg_decrypt = tcg_const_i32(decrypt);
13445 genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_decrypt);
13447 tcg_temp_free_ptr(tcg_rd_ptr);
13448 tcg_temp_free_ptr(tcg_rn_ptr);
13449 tcg_temp_free_i32(tcg_decrypt);
13452 /* Crypto three-reg SHA
13453 * 31 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0
13454 * +-----------------+------+---+------+---+--------+-----+------+------+
13455 * | 0 1 0 1 1 1 1 0 | size | 0 | Rm | 0 | opcode | 0 0 | Rn | Rd |
13456 * +-----------------+------+---+------+---+--------+-----+------+------+
13458 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
13460 int size = extract32(insn, 22, 2);
13461 int opcode = extract32(insn, 12, 3);
13462 int rm = extract32(insn, 16, 5);
13463 int rn = extract32(insn, 5, 5);
13464 int rd = extract32(insn, 0, 5);
13465 CryptoThreeOpFn *genfn;
13466 TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
13467 bool feature;
13469 if (size != 0) {
13470 unallocated_encoding(s);
13471 return;
13474 switch (opcode) {
13475 case 0: /* SHA1C */
13476 case 1: /* SHA1P */
13477 case 2: /* SHA1M */
13478 case 3: /* SHA1SU0 */
13479 genfn = NULL;
13480 feature = dc_isar_feature(aa64_sha1, s);
13481 break;
13482 case 4: /* SHA256H */
13483 genfn = gen_helper_crypto_sha256h;
13484 feature = dc_isar_feature(aa64_sha256, s);
13485 break;
13486 case 5: /* SHA256H2 */
13487 genfn = gen_helper_crypto_sha256h2;
13488 feature = dc_isar_feature(aa64_sha256, s);
13489 break;
13490 case 6: /* SHA256SU1 */
13491 genfn = gen_helper_crypto_sha256su1;
13492 feature = dc_isar_feature(aa64_sha256, s);
13493 break;
13494 default:
13495 unallocated_encoding(s);
13496 return;
13499 if (!feature) {
13500 unallocated_encoding(s);
13501 return;
13504 if (!fp_access_check(s)) {
13505 return;
13508 tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13509 tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13510 tcg_rm_ptr = vec_full_reg_ptr(s, rm);
13512 if (genfn) {
13513 genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
13514 } else {
13515 TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
13517 gen_helper_crypto_sha1_3reg(tcg_rd_ptr, tcg_rn_ptr,
13518 tcg_rm_ptr, tcg_opcode);
13519 tcg_temp_free_i32(tcg_opcode);
13522 tcg_temp_free_ptr(tcg_rd_ptr);
13523 tcg_temp_free_ptr(tcg_rn_ptr);
13524 tcg_temp_free_ptr(tcg_rm_ptr);
13527 /* Crypto two-reg SHA
13528 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0
13529 * +-----------------+------+-----------+--------+-----+------+------+
13530 * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd |
13531 * +-----------------+------+-----------+--------+-----+------+------+
13533 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
13535 int size = extract32(insn, 22, 2);
13536 int opcode = extract32(insn, 12, 5);
13537 int rn = extract32(insn, 5, 5);
13538 int rd = extract32(insn, 0, 5);
13539 CryptoTwoOpFn *genfn;
13540 bool feature;
13541 TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
13543 if (size != 0) {
13544 unallocated_encoding(s);
13545 return;
13548 switch (opcode) {
13549 case 0: /* SHA1H */
13550 feature = dc_isar_feature(aa64_sha1, s);
13551 genfn = gen_helper_crypto_sha1h;
13552 break;
13553 case 1: /* SHA1SU1 */
13554 feature = dc_isar_feature(aa64_sha1, s);
13555 genfn = gen_helper_crypto_sha1su1;
13556 break;
13557 case 2: /* SHA256SU0 */
13558 feature = dc_isar_feature(aa64_sha256, s);
13559 genfn = gen_helper_crypto_sha256su0;
13560 break;
13561 default:
13562 unallocated_encoding(s);
13563 return;
13566 if (!feature) {
13567 unallocated_encoding(s);
13568 return;
13571 if (!fp_access_check(s)) {
13572 return;
13575 tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13576 tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13578 genfn(tcg_rd_ptr, tcg_rn_ptr);
13580 tcg_temp_free_ptr(tcg_rd_ptr);
13581 tcg_temp_free_ptr(tcg_rn_ptr);
13584 /* Crypto three-reg SHA512
13585 * 31 21 20 16 15 14 13 12 11 10 9 5 4 0
13586 * +-----------------------+------+---+---+-----+--------+------+------+
13587 * | 1 1 0 0 1 1 1 0 0 1 1 | Rm | 1 | O | 0 0 | opcode | Rn | Rd |
13588 * +-----------------------+------+---+---+-----+--------+------+------+
13590 static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
13592 int opcode = extract32(insn, 10, 2);
13593 int o = extract32(insn, 14, 1);
13594 int rm = extract32(insn, 16, 5);
13595 int rn = extract32(insn, 5, 5);
13596 int rd = extract32(insn, 0, 5);
13597 bool feature;
13598 CryptoThreeOpFn *genfn;
13600 if (o == 0) {
13601 switch (opcode) {
13602 case 0: /* SHA512H */
13603 feature = dc_isar_feature(aa64_sha512, s);
13604 genfn = gen_helper_crypto_sha512h;
13605 break;
13606 case 1: /* SHA512H2 */
13607 feature = dc_isar_feature(aa64_sha512, s);
13608 genfn = gen_helper_crypto_sha512h2;
13609 break;
13610 case 2: /* SHA512SU1 */
13611 feature = dc_isar_feature(aa64_sha512, s);
13612 genfn = gen_helper_crypto_sha512su1;
13613 break;
13614 case 3: /* RAX1 */
13615 feature = dc_isar_feature(aa64_sha3, s);
13616 genfn = NULL;
13617 break;
13618 default:
13619 g_assert_not_reached();
13621 } else {
13622 switch (opcode) {
13623 case 0: /* SM3PARTW1 */
13624 feature = dc_isar_feature(aa64_sm3, s);
13625 genfn = gen_helper_crypto_sm3partw1;
13626 break;
13627 case 1: /* SM3PARTW2 */
13628 feature = dc_isar_feature(aa64_sm3, s);
13629 genfn = gen_helper_crypto_sm3partw2;
13630 break;
13631 case 2: /* SM4EKEY */
13632 feature = dc_isar_feature(aa64_sm4, s);
13633 genfn = gen_helper_crypto_sm4ekey;
13634 break;
13635 default:
13636 unallocated_encoding(s);
13637 return;
13641 if (!feature) {
13642 unallocated_encoding(s);
13643 return;
13646 if (!fp_access_check(s)) {
13647 return;
13650 if (genfn) {
13651 TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
13653 tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13654 tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13655 tcg_rm_ptr = vec_full_reg_ptr(s, rm);
13657 genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
13659 tcg_temp_free_ptr(tcg_rd_ptr);
13660 tcg_temp_free_ptr(tcg_rn_ptr);
13661 tcg_temp_free_ptr(tcg_rm_ptr);
13662 } else {
13663 TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
13664 int pass;
13666 tcg_op1 = tcg_temp_new_i64();
13667 tcg_op2 = tcg_temp_new_i64();
13668 tcg_res[0] = tcg_temp_new_i64();
13669 tcg_res[1] = tcg_temp_new_i64();
13671 for (pass = 0; pass < 2; pass++) {
13672 read_vec_element(s, tcg_op1, rn, pass, MO_64);
13673 read_vec_element(s, tcg_op2, rm, pass, MO_64);
13675 tcg_gen_rotli_i64(tcg_res[pass], tcg_op2, 1);
13676 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
13678 write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13679 write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13681 tcg_temp_free_i64(tcg_op1);
13682 tcg_temp_free_i64(tcg_op2);
13683 tcg_temp_free_i64(tcg_res[0]);
13684 tcg_temp_free_i64(tcg_res[1]);
13688 /* Crypto two-reg SHA512
13689 * 31 12 11 10 9 5 4 0
13690 * +-----------------------------------------+--------+------+------+
13691 * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode | Rn | Rd |
13692 * +-----------------------------------------+--------+------+------+
13694 static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
13696 int opcode = extract32(insn, 10, 2);
13697 int rn = extract32(insn, 5, 5);
13698 int rd = extract32(insn, 0, 5);
13699 TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
13700 bool feature;
13701 CryptoTwoOpFn *genfn;
13703 switch (opcode) {
13704 case 0: /* SHA512SU0 */
13705 feature = dc_isar_feature(aa64_sha512, s);
13706 genfn = gen_helper_crypto_sha512su0;
13707 break;
13708 case 1: /* SM4E */
13709 feature = dc_isar_feature(aa64_sm4, s);
13710 genfn = gen_helper_crypto_sm4e;
13711 break;
13712 default:
13713 unallocated_encoding(s);
13714 return;
13717 if (!feature) {
13718 unallocated_encoding(s);
13719 return;
13722 if (!fp_access_check(s)) {
13723 return;
13726 tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13727 tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13729 genfn(tcg_rd_ptr, tcg_rn_ptr);
13731 tcg_temp_free_ptr(tcg_rd_ptr);
13732 tcg_temp_free_ptr(tcg_rn_ptr);
13735 /* Crypto four-register
13736 * 31 23 22 21 20 16 15 14 10 9 5 4 0
13737 * +-------------------+-----+------+---+------+------+------+
13738 * | 1 1 0 0 1 1 1 0 0 | Op0 | Rm | 0 | Ra | Rn | Rd |
13739 * +-------------------+-----+------+---+------+------+------+
13741 static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
13743 int op0 = extract32(insn, 21, 2);
13744 int rm = extract32(insn, 16, 5);
13745 int ra = extract32(insn, 10, 5);
13746 int rn = extract32(insn, 5, 5);
13747 int rd = extract32(insn, 0, 5);
13748 bool feature;
13750 switch (op0) {
13751 case 0: /* EOR3 */
13752 case 1: /* BCAX */
13753 feature = dc_isar_feature(aa64_sha3, s);
13754 break;
13755 case 2: /* SM3SS1 */
13756 feature = dc_isar_feature(aa64_sm3, s);
13757 break;
13758 default:
13759 unallocated_encoding(s);
13760 return;
13763 if (!feature) {
13764 unallocated_encoding(s);
13765 return;
13768 if (!fp_access_check(s)) {
13769 return;
13772 if (op0 < 2) {
13773 TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2];
13774 int pass;
13776 tcg_op1 = tcg_temp_new_i64();
13777 tcg_op2 = tcg_temp_new_i64();
13778 tcg_op3 = tcg_temp_new_i64();
13779 tcg_res[0] = tcg_temp_new_i64();
13780 tcg_res[1] = tcg_temp_new_i64();
13782 for (pass = 0; pass < 2; pass++) {
13783 read_vec_element(s, tcg_op1, rn, pass, MO_64);
13784 read_vec_element(s, tcg_op2, rm, pass, MO_64);
13785 read_vec_element(s, tcg_op3, ra, pass, MO_64);
13787 if (op0 == 0) {
13788 /* EOR3 */
13789 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3);
13790 } else {
13791 /* BCAX */
13792 tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3);
13794 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
13796 write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13797 write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13799 tcg_temp_free_i64(tcg_op1);
13800 tcg_temp_free_i64(tcg_op2);
13801 tcg_temp_free_i64(tcg_op3);
13802 tcg_temp_free_i64(tcg_res[0]);
13803 tcg_temp_free_i64(tcg_res[1]);
13804 } else {
13805 TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
13807 tcg_op1 = tcg_temp_new_i32();
13808 tcg_op2 = tcg_temp_new_i32();
13809 tcg_op3 = tcg_temp_new_i32();
13810 tcg_res = tcg_temp_new_i32();
13811 tcg_zero = tcg_const_i32(0);
13813 read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
13814 read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
13815 read_vec_element_i32(s, tcg_op3, ra, 3, MO_32);
13817 tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
13818 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
13819 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
13820 tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
13822 write_vec_element_i32(s, tcg_zero, rd, 0, MO_32);
13823 write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
13824 write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
13825 write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
13827 tcg_temp_free_i32(tcg_op1);
13828 tcg_temp_free_i32(tcg_op2);
13829 tcg_temp_free_i32(tcg_op3);
13830 tcg_temp_free_i32(tcg_res);
13831 tcg_temp_free_i32(tcg_zero);
13835 /* Crypto XAR
13836 * 31 21 20 16 15 10 9 5 4 0
13837 * +-----------------------+------+--------+------+------+
13838 * | 1 1 0 0 1 1 1 0 1 0 0 | Rm | imm6 | Rn | Rd |
13839 * +-----------------------+------+--------+------+------+
13841 static void disas_crypto_xar(DisasContext *s, uint32_t insn)
13843 int rm = extract32(insn, 16, 5);
13844 int imm6 = extract32(insn, 10, 6);
13845 int rn = extract32(insn, 5, 5);
13846 int rd = extract32(insn, 0, 5);
13847 TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
13848 int pass;
13850 if (!dc_isar_feature(aa64_sha3, s)) {
13851 unallocated_encoding(s);
13852 return;
13855 if (!fp_access_check(s)) {
13856 return;
13859 tcg_op1 = tcg_temp_new_i64();
13860 tcg_op2 = tcg_temp_new_i64();
13861 tcg_res[0] = tcg_temp_new_i64();
13862 tcg_res[1] = tcg_temp_new_i64();
13864 for (pass = 0; pass < 2; pass++) {
13865 read_vec_element(s, tcg_op1, rn, pass, MO_64);
13866 read_vec_element(s, tcg_op2, rm, pass, MO_64);
13868 tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
13869 tcg_gen_rotri_i64(tcg_res[pass], tcg_res[pass], imm6);
13871 write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13872 write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13874 tcg_temp_free_i64(tcg_op1);
13875 tcg_temp_free_i64(tcg_op2);
13876 tcg_temp_free_i64(tcg_res[0]);
13877 tcg_temp_free_i64(tcg_res[1]);
13880 /* Crypto three-reg imm2
13881 * 31 21 20 16 15 14 13 12 11 10 9 5 4 0
13882 * +-----------------------+------+-----+------+--------+------+------+
13883 * | 1 1 0 0 1 1 1 0 0 1 0 | Rm | 1 0 | imm2 | opcode | Rn | Rd |
13884 * +-----------------------+------+-----+------+--------+------+------+
13886 static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
13888 int opcode = extract32(insn, 10, 2);
13889 int imm2 = extract32(insn, 12, 2);
13890 int rm = extract32(insn, 16, 5);
13891 int rn = extract32(insn, 5, 5);
13892 int rd = extract32(insn, 0, 5);
13893 TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
13894 TCGv_i32 tcg_imm2, tcg_opcode;
13896 if (!dc_isar_feature(aa64_sm3, s)) {
13897 unallocated_encoding(s);
13898 return;
13901 if (!fp_access_check(s)) {
13902 return;
13905 tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13906 tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13907 tcg_rm_ptr = vec_full_reg_ptr(s, rm);
13908 tcg_imm2 = tcg_const_i32(imm2);
13909 tcg_opcode = tcg_const_i32(opcode);
13911 gen_helper_crypto_sm3tt(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr, tcg_imm2,
13912 tcg_opcode);
13914 tcg_temp_free_ptr(tcg_rd_ptr);
13915 tcg_temp_free_ptr(tcg_rn_ptr);
13916 tcg_temp_free_ptr(tcg_rm_ptr);
13917 tcg_temp_free_i32(tcg_imm2);
13918 tcg_temp_free_i32(tcg_opcode);
13921 /* C3.6 Data processing - SIMD, inc Crypto
13923 * As the decode gets a little complex we are using a table based
13924 * approach for this part of the decode.
13926 static const AArch64DecodeTable data_proc_simd[] = {
13927 /* pattern , mask , fn */
13928 { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
13929 { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
13930 { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
13931 { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
13932 { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
13933 { 0x0e000400, 0x9fe08400, disas_simd_copy },
13934 { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
13935 /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
13936 { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
13937 { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
13938 { 0x0e000000, 0xbf208c00, disas_simd_tb },
13939 { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
13940 { 0x2e000000, 0xbf208400, disas_simd_ext },
13941 { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
13942 { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
13943 { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
13944 { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
13945 { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
13946 { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
13947 { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
13948 { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
13949 { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
13950 { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
13951 { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
13952 { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
13953 { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
13954 { 0xce000000, 0xff808000, disas_crypto_four_reg },
13955 { 0xce800000, 0xffe00000, disas_crypto_xar },
13956 { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
13957 { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
13958 { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
13959 { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
13960 { 0x00000000, 0x00000000, NULL }
13963 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
13965 /* Note that this is called with all non-FP cases from
13966 * table C3-6 so it must UNDEF for entries not specifically
13967 * allocated to instructions in that table.
13969 AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
13970 if (fn) {
13971 fn(s, insn);
13972 } else {
13973 unallocated_encoding(s);
13977 /* C3.6 Data processing - SIMD and floating point */
13978 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
13980 if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
13981 disas_data_proc_fp(s, insn);
13982 } else {
13983 /* SIMD, including crypto */
13984 disas_data_proc_simd(s, insn);
13989 * is_guarded_page:
13990 * @env: The cpu environment
13991 * @s: The DisasContext
13993 * Return true if the page is guarded.
13995 static bool is_guarded_page(CPUARMState *env, DisasContext *s)
13997 #ifdef CONFIG_USER_ONLY
13998 return false; /* FIXME */
13999 #else
14000 uint64_t addr = s->base.pc_first;
14001 int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx);
14002 unsigned int index = tlb_index(env, mmu_idx, addr);
14003 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
14006 * We test this immediately after reading an insn, which means
14007 * that any normal page must be in the TLB. The only exception
14008 * would be for executing from flash or device memory, which
14009 * does not retain the TLB entry.
14011 * FIXME: Assume false for those, for now. We could use
14012 * arm_cpu_get_phys_page_attrs_debug to re-read the page
14013 * table entry even for that case.
14015 return (tlb_hit(entry->addr_code, addr) &&
14016 env_tlb(env)->d[mmu_idx].iotlb[index].attrs.target_tlb_bit0);
14017 #endif
14021 * btype_destination_ok:
14022 * @insn: The instruction at the branch destination
14023 * @bt: SCTLR_ELx.BT
14024 * @btype: PSTATE.BTYPE, and is non-zero
14026 * On a guarded page, there are a limited number of insns
14027 * that may be present at the branch target:
14028 * - branch target identifiers,
14029 * - paciasp, pacibsp,
14030 * - BRK insn
14031 * - HLT insn
14032 * Anything else causes a Branch Target Exception.
14034 * Return true if the branch is compatible, false to raise BTITRAP.
14036 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
14038 if ((insn & 0xfffff01fu) == 0xd503201fu) {
14039 /* HINT space */
14040 switch (extract32(insn, 5, 7)) {
14041 case 0b011001: /* PACIASP */
14042 case 0b011011: /* PACIBSP */
14044 * If SCTLR_ELx.BT, then PACI*SP are not compatible
14045 * with btype == 3. Otherwise all btype are ok.
14047 return !bt || btype != 3;
14048 case 0b100000: /* BTI */
14049 /* Not compatible with any btype. */
14050 return false;
14051 case 0b100010: /* BTI c */
14052 /* Not compatible with btype == 3 */
14053 return btype != 3;
14054 case 0b100100: /* BTI j */
14055 /* Not compatible with btype == 2 */
14056 return btype != 2;
14057 case 0b100110: /* BTI jc */
14058 /* Compatible with any btype. */
14059 return true;
14061 } else {
14062 switch (insn & 0xffe0001fu) {
14063 case 0xd4200000u: /* BRK */
14064 case 0xd4400000u: /* HLT */
14065 /* Give priority to the breakpoint exception. */
14066 return true;
14069 return false;
14072 /* C3.1 A64 instruction index by encoding */
14073 static void disas_a64_insn(CPUARMState *env, DisasContext *s)
14075 uint32_t insn;
14077 s->pc_curr = s->base.pc_next;
14078 insn = arm_ldl_code(env, s->base.pc_next, s->sctlr_b);
14079 s->insn = insn;
14080 s->base.pc_next += 4;
14082 s->fp_access_checked = false;
14084 if (dc_isar_feature(aa64_bti, s)) {
14085 if (s->base.num_insns == 1) {
14087 * At the first insn of the TB, compute s->guarded_page.
14088 * We delayed computing this until successfully reading
14089 * the first insn of the TB, above. This (mostly) ensures
14090 * that the softmmu tlb entry has been populated, and the
14091 * page table GP bit is available.
14093 * Note that we need to compute this even if btype == 0,
14094 * because this value is used for BR instructions later
14095 * where ENV is not available.
14097 s->guarded_page = is_guarded_page(env, s);
14099 /* First insn can have btype set to non-zero. */
14100 tcg_debug_assert(s->btype >= 0);
14103 * Note that the Branch Target Exception has fairly high
14104 * priority -- below debugging exceptions but above most
14105 * everything else. This allows us to handle this now
14106 * instead of waiting until the insn is otherwise decoded.
14108 if (s->btype != 0
14109 && s->guarded_page
14110 && !btype_destination_ok(insn, s->bt, s->btype)) {
14111 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
14112 syn_btitrap(s->btype),
14113 default_exception_el(s));
14114 return;
14116 } else {
14117 /* Not the first insn: btype must be 0. */
14118 tcg_debug_assert(s->btype == 0);
14122 switch (extract32(insn, 25, 4)) {
14123 case 0x0: case 0x1: case 0x3: /* UNALLOCATED */
14124 unallocated_encoding(s);
14125 break;
14126 case 0x2:
14127 if (!dc_isar_feature(aa64_sve, s) || !disas_sve(s, insn)) {
14128 unallocated_encoding(s);
14130 break;
14131 case 0x8: case 0x9: /* Data processing - immediate */
14132 disas_data_proc_imm(s, insn);
14133 break;
14134 case 0xa: case 0xb: /* Branch, exception generation and system insns */
14135 disas_b_exc_sys(s, insn);
14136 break;
14137 case 0x4:
14138 case 0x6:
14139 case 0xc:
14140 case 0xe: /* Loads and stores */
14141 disas_ldst(s, insn);
14142 break;
14143 case 0x5:
14144 case 0xd: /* Data processing - register */
14145 disas_data_proc_reg(s, insn);
14146 break;
14147 case 0x7:
14148 case 0xf: /* Data processing - SIMD and floating point */
14149 disas_data_proc_simd_fp(s, insn);
14150 break;
14151 default:
14152 assert(FALSE); /* all 15 cases should be handled above */
14153 break;
14156 /* if we allocated any temporaries, free them here */
14157 free_tmp_a64(s);
14160 * After execution of most insns, btype is reset to 0.
14161 * Note that we set btype == -1 when the insn sets btype.
14163 if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
14164 reset_btype(s);
14168 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
14169 CPUState *cpu)
14171 DisasContext *dc = container_of(dcbase, DisasContext, base);
14172 CPUARMState *env = cpu->env_ptr;
14173 ARMCPU *arm_cpu = env_archcpu(env);
14174 uint32_t tb_flags = dc->base.tb->flags;
14175 int bound, core_mmu_idx;
14177 dc->isar = &arm_cpu->isar;
14178 dc->condjmp = 0;
14180 dc->aarch64 = 1;
14181 /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
14182 * there is no secure EL1, so we route exceptions to EL3.
14184 dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
14185 !arm_el_is_aa64(env, 3);
14186 dc->thumb = 0;
14187 dc->sctlr_b = 0;
14188 dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
14189 dc->condexec_mask = 0;
14190 dc->condexec_cond = 0;
14191 core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
14192 dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
14193 dc->tbii = FIELD_EX32(tb_flags, TBFLAG_A64, TBII);
14194 dc->tbid = FIELD_EX32(tb_flags, TBFLAG_A64, TBID);
14195 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
14196 #if !defined(CONFIG_USER_ONLY)
14197 dc->user = (dc->current_el == 0);
14198 #endif
14199 dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
14200 dc->sve_excp_el = FIELD_EX32(tb_flags, TBFLAG_A64, SVEEXC_EL);
14201 dc->sve_len = (FIELD_EX32(tb_flags, TBFLAG_A64, ZCR_LEN) + 1) * 16;
14202 dc->pauth_active = FIELD_EX32(tb_flags, TBFLAG_A64, PAUTH_ACTIVE);
14203 dc->bt = FIELD_EX32(tb_flags, TBFLAG_A64, BT);
14204 dc->btype = FIELD_EX32(tb_flags, TBFLAG_A64, BTYPE);
14205 dc->unpriv = FIELD_EX32(tb_flags, TBFLAG_A64, UNPRIV);
14206 dc->vec_len = 0;
14207 dc->vec_stride = 0;
14208 dc->cp_regs = arm_cpu->cp_regs;
14209 dc->features = env->features;
14211 /* Single step state. The code-generation logic here is:
14212 * SS_ACTIVE == 0:
14213 * generate code with no special handling for single-stepping (except
14214 * that anything that can make us go to SS_ACTIVE == 1 must end the TB;
14215 * this happens anyway because those changes are all system register or
14216 * PSTATE writes).
14217 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
14218 * emit code for one insn
14219 * emit code to clear PSTATE.SS
14220 * emit code to generate software step exception for completed step
14221 * end TB (as usual for having generated an exception)
14222 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
14223 * emit code to generate a software step exception
14224 * end the TB
14226 dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
14227 dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
14228 dc->is_ldex = false;
14229 dc->debug_target_el = FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL);
14231 /* Bound the number of insns to execute to those left on the page. */
14232 bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
14234 /* If architectural single step active, limit to 1. */
14235 if (dc->ss_active) {
14236 bound = 1;
14238 dc->base.max_insns = MIN(dc->base.max_insns, bound);
14240 init_tmp_a64_array(dc);
14243 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
14247 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
14249 DisasContext *dc = container_of(dcbase, DisasContext, base);
14251 tcg_gen_insn_start(dc->base.pc_next, 0, 0);
14252 dc->insn_start = tcg_last_op();
14255 static bool aarch64_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
14256 const CPUBreakpoint *bp)
14258 DisasContext *dc = container_of(dcbase, DisasContext, base);
14260 if (bp->flags & BP_CPU) {
14261 gen_a64_set_pc_im(dc->base.pc_next);
14262 gen_helper_check_breakpoints(cpu_env);
14263 /* End the TB early; it likely won't be executed */
14264 dc->base.is_jmp = DISAS_TOO_MANY;
14265 } else {
14266 gen_exception_internal_insn(dc, dc->base.pc_next, EXCP_DEBUG);
14267 /* The address covered by the breakpoint must be
14268 included in [tb->pc, tb->pc + tb->size) in order
14269 to for it to be properly cleared -- thus we
14270 increment the PC here so that the logic setting
14271 tb->size below does the right thing. */
14272 dc->base.pc_next += 4;
14273 dc->base.is_jmp = DISAS_NORETURN;
14276 return true;
14279 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
14281 DisasContext *dc = container_of(dcbase, DisasContext, base);
14282 CPUARMState *env = cpu->env_ptr;
14284 if (dc->ss_active && !dc->pstate_ss) {
14285 /* Singlestep state is Active-pending.
14286 * If we're in this state at the start of a TB then either
14287 * a) we just took an exception to an EL which is being debugged
14288 * and this is the first insn in the exception handler
14289 * b) debug exceptions were masked and we just unmasked them
14290 * without changing EL (eg by clearing PSTATE.D)
14291 * In either case we're going to take a swstep exception in the
14292 * "did not step an insn" case, and so the syndrome ISV and EX
14293 * bits should be zero.
14295 assert(dc->base.num_insns == 1);
14296 gen_swstep_exception(dc, 0, 0);
14297 dc->base.is_jmp = DISAS_NORETURN;
14298 } else {
14299 disas_a64_insn(env, dc);
14302 translator_loop_temp_check(&dc->base);
14305 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
14307 DisasContext *dc = container_of(dcbase, DisasContext, base);
14309 if (unlikely(dc->base.singlestep_enabled || dc->ss_active)) {
14310 /* Note that this means single stepping WFI doesn't halt the CPU.
14311 * For conditional branch insns this is harmless unreachable code as
14312 * gen_goto_tb() has already handled emitting the debug exception
14313 * (and thus a tb-jump is not possible when singlestepping).
14315 switch (dc->base.is_jmp) {
14316 default:
14317 gen_a64_set_pc_im(dc->base.pc_next);
14318 /* fall through */
14319 case DISAS_EXIT:
14320 case DISAS_JUMP:
14321 if (dc->base.singlestep_enabled) {
14322 gen_exception_internal(EXCP_DEBUG);
14323 } else {
14324 gen_step_complete_exception(dc);
14326 break;
14327 case DISAS_NORETURN:
14328 break;
14330 } else {
14331 switch (dc->base.is_jmp) {
14332 case DISAS_NEXT:
14333 case DISAS_TOO_MANY:
14334 gen_goto_tb(dc, 1, dc->base.pc_next);
14335 break;
14336 default:
14337 case DISAS_UPDATE:
14338 gen_a64_set_pc_im(dc->base.pc_next);
14339 /* fall through */
14340 case DISAS_EXIT:
14341 tcg_gen_exit_tb(NULL, 0);
14342 break;
14343 case DISAS_JUMP:
14344 tcg_gen_lookup_and_goto_ptr();
14345 break;
14346 case DISAS_NORETURN:
14347 case DISAS_SWI:
14348 break;
14349 case DISAS_WFE:
14350 gen_a64_set_pc_im(dc->base.pc_next);
14351 gen_helper_wfe(cpu_env);
14352 break;
14353 case DISAS_YIELD:
14354 gen_a64_set_pc_im(dc->base.pc_next);
14355 gen_helper_yield(cpu_env);
14356 break;
14357 case DISAS_WFI:
14359 /* This is a special case because we don't want to just halt the CPU
14360 * if trying to debug across a WFI.
14362 TCGv_i32 tmp = tcg_const_i32(4);
14364 gen_a64_set_pc_im(dc->base.pc_next);
14365 gen_helper_wfi(cpu_env, tmp);
14366 tcg_temp_free_i32(tmp);
14367 /* The helper doesn't necessarily throw an exception, but we
14368 * must go back to the main loop to check for interrupts anyway.
14370 tcg_gen_exit_tb(NULL, 0);
14371 break;
14377 static void aarch64_tr_disas_log(const DisasContextBase *dcbase,
14378 CPUState *cpu)
14380 DisasContext *dc = container_of(dcbase, DisasContext, base);
14382 qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
14383 log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
14386 const TranslatorOps aarch64_translator_ops = {
14387 .init_disas_context = aarch64_tr_init_disas_context,
14388 .tb_start = aarch64_tr_tb_start,
14389 .insn_start = aarch64_tr_insn_start,
14390 .breakpoint_check = aarch64_tr_breakpoint_check,
14391 .translate_insn = aarch64_tr_translate_insn,
14392 .tb_stop = aarch64_tr_tb_stop,
14393 .disas_log = aarch64_tr_disas_log,