xlnx-zynqmp: Remove unnecessary brackets around error messages
[qemu/ar7.git] / target-arm / translate-a64.c
blobfaece2cd43b7c7c85b44bd96ff500710e8630d93
1 /*
2 * AArch64 translation
4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de>
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include <stdarg.h>
20 #include <stdlib.h>
21 #include <stdio.h>
22 #include <string.h>
23 #include <inttypes.h>
25 #include "cpu.h"
26 #include "tcg-op.h"
27 #include "qemu/log.h"
28 #include "arm_ldst.h"
29 #include "translate.h"
30 #include "internals.h"
31 #include "qemu/host-utils.h"
33 #include "exec/semihost.h"
34 #include "exec/gen-icount.h"
36 #include "exec/helper-proto.h"
37 #include "exec/helper-gen.h"
39 #include "trace-tcg.h"
41 static TCGv_i64 cpu_X[32];
42 static TCGv_i64 cpu_pc;
43 static TCGv_i32 cpu_NF, cpu_ZF, cpu_CF, cpu_VF;
45 /* Load/store exclusive handling */
46 static TCGv_i64 cpu_exclusive_addr;
47 static TCGv_i64 cpu_exclusive_val;
48 static TCGv_i64 cpu_exclusive_high;
49 #ifdef CONFIG_USER_ONLY
50 static TCGv_i64 cpu_exclusive_test;
51 static TCGv_i32 cpu_exclusive_info;
52 #endif
54 static const char *regnames[] = {
55 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
56 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
57 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
58 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
61 enum a64_shift_type {
62 A64_SHIFT_TYPE_LSL = 0,
63 A64_SHIFT_TYPE_LSR = 1,
64 A64_SHIFT_TYPE_ASR = 2,
65 A64_SHIFT_TYPE_ROR = 3
68 /* Table based decoder typedefs - used when the relevant bits for decode
69 * are too awkwardly scattered across the instruction (eg SIMD).
71 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
73 typedef struct AArch64DecodeTable {
74 uint32_t pattern;
75 uint32_t mask;
76 AArch64DecodeFn *disas_fn;
77 } AArch64DecodeTable;
79 /* Function prototype for gen_ functions for calling Neon helpers */
80 typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
81 typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
82 typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
83 typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
84 typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
85 typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
86 typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
87 typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
88 typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
89 typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
90 typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
91 typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32);
92 typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
94 /* initialize TCG globals. */
95 void a64_translate_init(void)
97 int i;
99 cpu_pc = tcg_global_mem_new_i64(TCG_AREG0,
100 offsetof(CPUARMState, pc),
101 "pc");
102 for (i = 0; i < 32; i++) {
103 cpu_X[i] = tcg_global_mem_new_i64(TCG_AREG0,
104 offsetof(CPUARMState, xregs[i]),
105 regnames[i]);
108 cpu_NF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, NF), "NF");
109 cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF), "ZF");
110 cpu_CF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, CF), "CF");
111 cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF), "VF");
113 cpu_exclusive_addr = tcg_global_mem_new_i64(TCG_AREG0,
114 offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
115 cpu_exclusive_val = tcg_global_mem_new_i64(TCG_AREG0,
116 offsetof(CPUARMState, exclusive_val), "exclusive_val");
117 cpu_exclusive_high = tcg_global_mem_new_i64(TCG_AREG0,
118 offsetof(CPUARMState, exclusive_high), "exclusive_high");
119 #ifdef CONFIG_USER_ONLY
120 cpu_exclusive_test = tcg_global_mem_new_i64(TCG_AREG0,
121 offsetof(CPUARMState, exclusive_test), "exclusive_test");
122 cpu_exclusive_info = tcg_global_mem_new_i32(TCG_AREG0,
123 offsetof(CPUARMState, exclusive_info), "exclusive_info");
124 #endif
127 static inline ARMMMUIdx get_a64_user_mem_index(DisasContext *s)
129 /* Return the mmu_idx to use for A64 "unprivileged load/store" insns:
130 * if EL1, access as if EL0; otherwise access at current EL
132 switch (s->mmu_idx) {
133 case ARMMMUIdx_S12NSE1:
134 return ARMMMUIdx_S12NSE0;
135 case ARMMMUIdx_S1SE1:
136 return ARMMMUIdx_S1SE0;
137 case ARMMMUIdx_S2NS:
138 g_assert_not_reached();
139 default:
140 return s->mmu_idx;
144 void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
145 fprintf_function cpu_fprintf, int flags)
147 ARMCPU *cpu = ARM_CPU(cs);
148 CPUARMState *env = &cpu->env;
149 uint32_t psr = pstate_read(env);
150 int i;
152 cpu_fprintf(f, "PC=%016"PRIx64" SP=%016"PRIx64"\n",
153 env->pc, env->xregs[31]);
154 for (i = 0; i < 31; i++) {
155 cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
156 if ((i % 4) == 3) {
157 cpu_fprintf(f, "\n");
158 } else {
159 cpu_fprintf(f, " ");
162 cpu_fprintf(f, "PSTATE=%08x (flags %c%c%c%c)\n",
163 psr,
164 psr & PSTATE_N ? 'N' : '-',
165 psr & PSTATE_Z ? 'Z' : '-',
166 psr & PSTATE_C ? 'C' : '-',
167 psr & PSTATE_V ? 'V' : '-');
168 cpu_fprintf(f, "\n");
170 if (flags & CPU_DUMP_FPU) {
171 int numvfpregs = 32;
172 for (i = 0; i < numvfpregs; i += 2) {
173 uint64_t vlo = float64_val(env->vfp.regs[i * 2]);
174 uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]);
175 cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ",
176 i, vhi, vlo);
177 vlo = float64_val(env->vfp.regs[(i + 1) * 2]);
178 vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]);
179 cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n",
180 i + 1, vhi, vlo);
182 cpu_fprintf(f, "FPCR: %08x FPSR: %08x\n",
183 vfp_get_fpcr(env), vfp_get_fpsr(env));
187 void gen_a64_set_pc_im(uint64_t val)
189 tcg_gen_movi_i64(cpu_pc, val);
192 static void gen_exception_internal(int excp)
194 TCGv_i32 tcg_excp = tcg_const_i32(excp);
196 assert(excp_is_internal(excp));
197 gen_helper_exception_internal(cpu_env, tcg_excp);
198 tcg_temp_free_i32(tcg_excp);
201 static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
203 TCGv_i32 tcg_excp = tcg_const_i32(excp);
204 TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
205 TCGv_i32 tcg_el = tcg_const_i32(target_el);
207 gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
208 tcg_syn, tcg_el);
209 tcg_temp_free_i32(tcg_el);
210 tcg_temp_free_i32(tcg_syn);
211 tcg_temp_free_i32(tcg_excp);
214 static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
216 gen_a64_set_pc_im(s->pc - offset);
217 gen_exception_internal(excp);
218 s->is_jmp = DISAS_EXC;
221 static void gen_exception_insn(DisasContext *s, int offset, int excp,
222 uint32_t syndrome, uint32_t target_el)
224 gen_a64_set_pc_im(s->pc - offset);
225 gen_exception(excp, syndrome, target_el);
226 s->is_jmp = DISAS_EXC;
229 static void gen_ss_advance(DisasContext *s)
231 /* If the singlestep state is Active-not-pending, advance to
232 * Active-pending.
234 if (s->ss_active) {
235 s->pstate_ss = 0;
236 gen_helper_clear_pstate_ss(cpu_env);
240 static void gen_step_complete_exception(DisasContext *s)
242 /* We just completed step of an insn. Move from Active-not-pending
243 * to Active-pending, and then also take the swstep exception.
244 * This corresponds to making the (IMPDEF) choice to prioritize
245 * swstep exceptions over asynchronous exceptions taken to an exception
246 * level where debug is disabled. This choice has the advantage that
247 * we do not need to maintain internal state corresponding to the
248 * ISV/EX syndrome bits between completion of the step and generation
249 * of the exception, and our syndrome information is always correct.
251 gen_ss_advance(s);
252 gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
253 default_exception_el(s));
254 s->is_jmp = DISAS_EXC;
257 static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
259 /* No direct tb linking with singlestep (either QEMU's or the ARM
260 * debug architecture kind) or deterministic io
262 if (s->singlestep_enabled || s->ss_active || (s->tb->cflags & CF_LAST_IO)) {
263 return false;
266 /* Only link tbs from inside the same guest page */
267 if ((s->tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
268 return false;
271 return true;
274 static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
276 TranslationBlock *tb;
278 tb = s->tb;
279 if (use_goto_tb(s, n, dest)) {
280 tcg_gen_goto_tb(n);
281 gen_a64_set_pc_im(dest);
282 tcg_gen_exit_tb((intptr_t)tb + n);
283 s->is_jmp = DISAS_TB_JUMP;
284 } else {
285 gen_a64_set_pc_im(dest);
286 if (s->ss_active) {
287 gen_step_complete_exception(s);
288 } else if (s->singlestep_enabled) {
289 gen_exception_internal(EXCP_DEBUG);
290 } else {
291 tcg_gen_exit_tb(0);
292 s->is_jmp = DISAS_TB_JUMP;
297 static void unallocated_encoding(DisasContext *s)
299 /* Unallocated and reserved encodings are uncategorized */
300 gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
301 default_exception_el(s));
304 #define unsupported_encoding(s, insn) \
305 do { \
306 qemu_log_mask(LOG_UNIMP, \
307 "%s:%d: unsupported instruction encoding 0x%08x " \
308 "at pc=%016" PRIx64 "\n", \
309 __FILE__, __LINE__, insn, s->pc - 4); \
310 unallocated_encoding(s); \
311 } while (0);
313 static void init_tmp_a64_array(DisasContext *s)
315 #ifdef CONFIG_DEBUG_TCG
316 int i;
317 for (i = 0; i < ARRAY_SIZE(s->tmp_a64); i++) {
318 TCGV_UNUSED_I64(s->tmp_a64[i]);
320 #endif
321 s->tmp_a64_count = 0;
324 static void free_tmp_a64(DisasContext *s)
326 int i;
327 for (i = 0; i < s->tmp_a64_count; i++) {
328 tcg_temp_free_i64(s->tmp_a64[i]);
330 init_tmp_a64_array(s);
333 static TCGv_i64 new_tmp_a64(DisasContext *s)
335 assert(s->tmp_a64_count < TMP_A64_MAX);
336 return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
339 static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
341 TCGv_i64 t = new_tmp_a64(s);
342 tcg_gen_movi_i64(t, 0);
343 return t;
347 * Register access functions
349 * These functions are used for directly accessing a register in where
350 * changes to the final register value are likely to be made. If you
351 * need to use a register for temporary calculation (e.g. index type
352 * operations) use the read_* form.
354 * B1.2.1 Register mappings
356 * In instruction register encoding 31 can refer to ZR (zero register) or
357 * the SP (stack pointer) depending on context. In QEMU's case we map SP
358 * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
359 * This is the point of the _sp forms.
361 static TCGv_i64 cpu_reg(DisasContext *s, int reg)
363 if (reg == 31) {
364 return new_tmp_a64_zero(s);
365 } else {
366 return cpu_X[reg];
370 /* register access for when 31 == SP */
371 static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
373 return cpu_X[reg];
376 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
377 * representing the register contents. This TCGv is an auto-freed
378 * temporary so it need not be explicitly freed, and may be modified.
380 static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
382 TCGv_i64 v = new_tmp_a64(s);
383 if (reg != 31) {
384 if (sf) {
385 tcg_gen_mov_i64(v, cpu_X[reg]);
386 } else {
387 tcg_gen_ext32u_i64(v, cpu_X[reg]);
389 } else {
390 tcg_gen_movi_i64(v, 0);
392 return v;
395 static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
397 TCGv_i64 v = new_tmp_a64(s);
398 if (sf) {
399 tcg_gen_mov_i64(v, cpu_X[reg]);
400 } else {
401 tcg_gen_ext32u_i64(v, cpu_X[reg]);
403 return v;
406 /* We should have at some point before trying to access an FP register
407 * done the necessary access check, so assert that
408 * (a) we did the check and
409 * (b) we didn't then just plough ahead anyway if it failed.
410 * Print the instruction pattern in the abort message so we can figure
411 * out what we need to fix if a user encounters this problem in the wild.
413 static inline void assert_fp_access_checked(DisasContext *s)
415 #ifdef CONFIG_DEBUG_TCG
416 if (unlikely(!s->fp_access_checked || s->fp_excp_el)) {
417 fprintf(stderr, "target-arm: FP access check missing for "
418 "instruction 0x%08x\n", s->insn);
419 abort();
421 #endif
424 /* Return the offset into CPUARMState of an element of specified
425 * size, 'element' places in from the least significant end of
426 * the FP/vector register Qn.
428 static inline int vec_reg_offset(DisasContext *s, int regno,
429 int element, TCGMemOp size)
431 int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
432 #ifdef HOST_WORDS_BIGENDIAN
433 /* This is complicated slightly because vfp.regs[2n] is
434 * still the low half and vfp.regs[2n+1] the high half
435 * of the 128 bit vector, even on big endian systems.
436 * Calculate the offset assuming a fully bigendian 128 bits,
437 * then XOR to account for the order of the two 64 bit halves.
439 offs += (16 - ((element + 1) * (1 << size)));
440 offs ^= 8;
441 #else
442 offs += element * (1 << size);
443 #endif
444 assert_fp_access_checked(s);
445 return offs;
448 /* Return the offset into CPUARMState of a slice (from
449 * the least significant end) of FP register Qn (ie
450 * Dn, Sn, Hn or Bn).
451 * (Note that this is not the same mapping as for A32; see cpu.h)
453 static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
455 int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
456 #ifdef HOST_WORDS_BIGENDIAN
457 offs += (8 - (1 << size));
458 #endif
459 assert_fp_access_checked(s);
460 return offs;
463 /* Offset of the high half of the 128 bit vector Qn */
464 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
466 assert_fp_access_checked(s);
467 return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
470 /* Convenience accessors for reading and writing single and double
471 * FP registers. Writing clears the upper parts of the associated
472 * 128 bit vector register, as required by the architecture.
473 * Note that unlike the GP register accessors, the values returned
474 * by the read functions must be manually freed.
476 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
478 TCGv_i64 v = tcg_temp_new_i64();
480 tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
481 return v;
484 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
486 TCGv_i32 v = tcg_temp_new_i32();
488 tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
489 return v;
492 static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
494 TCGv_i64 tcg_zero = tcg_const_i64(0);
496 tcg_gen_st_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
497 tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(s, reg));
498 tcg_temp_free_i64(tcg_zero);
501 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
503 TCGv_i64 tmp = tcg_temp_new_i64();
505 tcg_gen_extu_i32_i64(tmp, v);
506 write_fp_dreg(s, reg, tmp);
507 tcg_temp_free_i64(tmp);
510 static TCGv_ptr get_fpstatus_ptr(void)
512 TCGv_ptr statusptr = tcg_temp_new_ptr();
513 int offset;
515 /* In A64 all instructions (both FP and Neon) use the FPCR;
516 * there is no equivalent of the A32 Neon "standard FPSCR value"
517 * and all operations use vfp.fp_status.
519 offset = offsetof(CPUARMState, vfp.fp_status);
520 tcg_gen_addi_ptr(statusptr, cpu_env, offset);
521 return statusptr;
524 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
525 * than the 32 bit equivalent.
527 static inline void gen_set_NZ64(TCGv_i64 result)
529 TCGv_i64 flag = tcg_temp_new_i64();
531 tcg_gen_setcondi_i64(TCG_COND_NE, flag, result, 0);
532 tcg_gen_extrl_i64_i32(cpu_ZF, flag);
533 tcg_gen_shri_i64(flag, result, 32);
534 tcg_gen_extrl_i64_i32(cpu_NF, flag);
535 tcg_temp_free_i64(flag);
538 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
539 static inline void gen_logic_CC(int sf, TCGv_i64 result)
541 if (sf) {
542 gen_set_NZ64(result);
543 } else {
544 tcg_gen_extrl_i64_i32(cpu_ZF, result);
545 tcg_gen_extrl_i64_i32(cpu_NF, result);
547 tcg_gen_movi_i32(cpu_CF, 0);
548 tcg_gen_movi_i32(cpu_VF, 0);
551 /* dest = T0 + T1; compute C, N, V and Z flags */
552 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
554 if (sf) {
555 TCGv_i64 result, flag, tmp;
556 result = tcg_temp_new_i64();
557 flag = tcg_temp_new_i64();
558 tmp = tcg_temp_new_i64();
560 tcg_gen_movi_i64(tmp, 0);
561 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
563 tcg_gen_extrl_i64_i32(cpu_CF, flag);
565 gen_set_NZ64(result);
567 tcg_gen_xor_i64(flag, result, t0);
568 tcg_gen_xor_i64(tmp, t0, t1);
569 tcg_gen_andc_i64(flag, flag, tmp);
570 tcg_temp_free_i64(tmp);
571 tcg_gen_shri_i64(flag, flag, 32);
572 tcg_gen_extrl_i64_i32(cpu_VF, flag);
574 tcg_gen_mov_i64(dest, result);
575 tcg_temp_free_i64(result);
576 tcg_temp_free_i64(flag);
577 } else {
578 /* 32 bit arithmetic */
579 TCGv_i32 t0_32 = tcg_temp_new_i32();
580 TCGv_i32 t1_32 = tcg_temp_new_i32();
581 TCGv_i32 tmp = tcg_temp_new_i32();
583 tcg_gen_movi_i32(tmp, 0);
584 tcg_gen_extrl_i64_i32(t0_32, t0);
585 tcg_gen_extrl_i64_i32(t1_32, t1);
586 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
587 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
588 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
589 tcg_gen_xor_i32(tmp, t0_32, t1_32);
590 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
591 tcg_gen_extu_i32_i64(dest, cpu_NF);
593 tcg_temp_free_i32(tmp);
594 tcg_temp_free_i32(t0_32);
595 tcg_temp_free_i32(t1_32);
599 /* dest = T0 - T1; compute C, N, V and Z flags */
600 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
602 if (sf) {
603 /* 64 bit arithmetic */
604 TCGv_i64 result, flag, tmp;
606 result = tcg_temp_new_i64();
607 flag = tcg_temp_new_i64();
608 tcg_gen_sub_i64(result, t0, t1);
610 gen_set_NZ64(result);
612 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
613 tcg_gen_extrl_i64_i32(cpu_CF, flag);
615 tcg_gen_xor_i64(flag, result, t0);
616 tmp = tcg_temp_new_i64();
617 tcg_gen_xor_i64(tmp, t0, t1);
618 tcg_gen_and_i64(flag, flag, tmp);
619 tcg_temp_free_i64(tmp);
620 tcg_gen_shri_i64(flag, flag, 32);
621 tcg_gen_extrl_i64_i32(cpu_VF, flag);
622 tcg_gen_mov_i64(dest, result);
623 tcg_temp_free_i64(flag);
624 tcg_temp_free_i64(result);
625 } else {
626 /* 32 bit arithmetic */
627 TCGv_i32 t0_32 = tcg_temp_new_i32();
628 TCGv_i32 t1_32 = tcg_temp_new_i32();
629 TCGv_i32 tmp;
631 tcg_gen_extrl_i64_i32(t0_32, t0);
632 tcg_gen_extrl_i64_i32(t1_32, t1);
633 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
634 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
635 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
636 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
637 tmp = tcg_temp_new_i32();
638 tcg_gen_xor_i32(tmp, t0_32, t1_32);
639 tcg_temp_free_i32(t0_32);
640 tcg_temp_free_i32(t1_32);
641 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
642 tcg_temp_free_i32(tmp);
643 tcg_gen_extu_i32_i64(dest, cpu_NF);
647 /* dest = T0 + T1 + CF; do not compute flags. */
648 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
650 TCGv_i64 flag = tcg_temp_new_i64();
651 tcg_gen_extu_i32_i64(flag, cpu_CF);
652 tcg_gen_add_i64(dest, t0, t1);
653 tcg_gen_add_i64(dest, dest, flag);
654 tcg_temp_free_i64(flag);
656 if (!sf) {
657 tcg_gen_ext32u_i64(dest, dest);
661 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
662 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
664 if (sf) {
665 TCGv_i64 result, cf_64, vf_64, tmp;
666 result = tcg_temp_new_i64();
667 cf_64 = tcg_temp_new_i64();
668 vf_64 = tcg_temp_new_i64();
669 tmp = tcg_const_i64(0);
671 tcg_gen_extu_i32_i64(cf_64, cpu_CF);
672 tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
673 tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
674 tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
675 gen_set_NZ64(result);
677 tcg_gen_xor_i64(vf_64, result, t0);
678 tcg_gen_xor_i64(tmp, t0, t1);
679 tcg_gen_andc_i64(vf_64, vf_64, tmp);
680 tcg_gen_shri_i64(vf_64, vf_64, 32);
681 tcg_gen_extrl_i64_i32(cpu_VF, vf_64);
683 tcg_gen_mov_i64(dest, result);
685 tcg_temp_free_i64(tmp);
686 tcg_temp_free_i64(vf_64);
687 tcg_temp_free_i64(cf_64);
688 tcg_temp_free_i64(result);
689 } else {
690 TCGv_i32 t0_32, t1_32, tmp;
691 t0_32 = tcg_temp_new_i32();
692 t1_32 = tcg_temp_new_i32();
693 tmp = tcg_const_i32(0);
695 tcg_gen_extrl_i64_i32(t0_32, t0);
696 tcg_gen_extrl_i64_i32(t1_32, t1);
697 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
698 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
700 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
701 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
702 tcg_gen_xor_i32(tmp, t0_32, t1_32);
703 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
704 tcg_gen_extu_i32_i64(dest, cpu_NF);
706 tcg_temp_free_i32(tmp);
707 tcg_temp_free_i32(t1_32);
708 tcg_temp_free_i32(t0_32);
713 * Load/Store generators
717 * Store from GPR register to memory.
719 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
720 TCGv_i64 tcg_addr, int size, int memidx)
722 g_assert(size <= 3);
723 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, MO_TE + size);
726 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
727 TCGv_i64 tcg_addr, int size)
729 do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s));
733 * Load from memory to GPR register
735 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
736 int size, bool is_signed, bool extend, int memidx)
738 TCGMemOp memop = MO_TE + size;
740 g_assert(size <= 3);
742 if (is_signed) {
743 memop += MO_SIGN;
746 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
748 if (extend && is_signed) {
749 g_assert(size < 3);
750 tcg_gen_ext32u_i64(dest, dest);
754 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
755 int size, bool is_signed, bool extend)
757 do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
758 get_mem_index(s));
762 * Store from FP register to memory
764 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
766 /* This writes the bottom N bits of a 128 bit wide vector to memory */
767 TCGv_i64 tmp = tcg_temp_new_i64();
768 tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
769 if (size < 4) {
770 tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TE + size);
771 } else {
772 TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
773 tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TEQ);
774 tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
775 tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
776 tcg_gen_qemu_st_i64(tmp, tcg_hiaddr, get_mem_index(s), MO_TEQ);
777 tcg_temp_free_i64(tcg_hiaddr);
780 tcg_temp_free_i64(tmp);
784 * Load from memory to FP register
786 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
788 /* This always zero-extends and writes to a full 128 bit wide vector */
789 TCGv_i64 tmplo = tcg_temp_new_i64();
790 TCGv_i64 tmphi;
792 if (size < 4) {
793 TCGMemOp memop = MO_TE + size;
794 tmphi = tcg_const_i64(0);
795 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
796 } else {
797 TCGv_i64 tcg_hiaddr;
798 tmphi = tcg_temp_new_i64();
799 tcg_hiaddr = tcg_temp_new_i64();
801 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), MO_TEQ);
802 tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
803 tcg_gen_qemu_ld_i64(tmphi, tcg_hiaddr, get_mem_index(s), MO_TEQ);
804 tcg_temp_free_i64(tcg_hiaddr);
807 tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
808 tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
810 tcg_temp_free_i64(tmplo);
811 tcg_temp_free_i64(tmphi);
815 * Vector load/store helpers.
817 * The principal difference between this and a FP load is that we don't
818 * zero extend as we are filling a partial chunk of the vector register.
819 * These functions don't support 128 bit loads/stores, which would be
820 * normal load/store operations.
822 * The _i32 versions are useful when operating on 32 bit quantities
823 * (eg for floating point single or using Neon helper functions).
826 /* Get value of an element within a vector register */
827 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
828 int element, TCGMemOp memop)
830 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
831 switch (memop) {
832 case MO_8:
833 tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
834 break;
835 case MO_16:
836 tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
837 break;
838 case MO_32:
839 tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
840 break;
841 case MO_8|MO_SIGN:
842 tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
843 break;
844 case MO_16|MO_SIGN:
845 tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
846 break;
847 case MO_32|MO_SIGN:
848 tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
849 break;
850 case MO_64:
851 case MO_64|MO_SIGN:
852 tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
853 break;
854 default:
855 g_assert_not_reached();
859 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
860 int element, TCGMemOp memop)
862 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
863 switch (memop) {
864 case MO_8:
865 tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
866 break;
867 case MO_16:
868 tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
869 break;
870 case MO_8|MO_SIGN:
871 tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
872 break;
873 case MO_16|MO_SIGN:
874 tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
875 break;
876 case MO_32:
877 case MO_32|MO_SIGN:
878 tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
879 break;
880 default:
881 g_assert_not_reached();
885 /* Set value of an element within a vector register */
886 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
887 int element, TCGMemOp memop)
889 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
890 switch (memop) {
891 case MO_8:
892 tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
893 break;
894 case MO_16:
895 tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
896 break;
897 case MO_32:
898 tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
899 break;
900 case MO_64:
901 tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
902 break;
903 default:
904 g_assert_not_reached();
908 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
909 int destidx, int element, TCGMemOp memop)
911 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
912 switch (memop) {
913 case MO_8:
914 tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
915 break;
916 case MO_16:
917 tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
918 break;
919 case MO_32:
920 tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
921 break;
922 default:
923 g_assert_not_reached();
927 /* Clear the high 64 bits of a 128 bit vector (in general non-quad
928 * vector ops all need to do this).
930 static void clear_vec_high(DisasContext *s, int rd)
932 TCGv_i64 tcg_zero = tcg_const_i64(0);
934 write_vec_element(s, tcg_zero, rd, 1, MO_64);
935 tcg_temp_free_i64(tcg_zero);
938 /* Store from vector register to memory */
939 static void do_vec_st(DisasContext *s, int srcidx, int element,
940 TCGv_i64 tcg_addr, int size)
942 TCGMemOp memop = MO_TE + size;
943 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
945 read_vec_element(s, tcg_tmp, srcidx, element, size);
946 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
948 tcg_temp_free_i64(tcg_tmp);
951 /* Load from memory to vector register */
952 static void do_vec_ld(DisasContext *s, int destidx, int element,
953 TCGv_i64 tcg_addr, int size)
955 TCGMemOp memop = MO_TE + size;
956 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
958 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
959 write_vec_element(s, tcg_tmp, destidx, element, size);
961 tcg_temp_free_i64(tcg_tmp);
964 /* Check that FP/Neon access is enabled. If it is, return
965 * true. If not, emit code to generate an appropriate exception,
966 * and return false; the caller should not emit any code for
967 * the instruction. Note that this check must happen after all
968 * unallocated-encoding checks (otherwise the syndrome information
969 * for the resulting exception will be incorrect).
971 static inline bool fp_access_check(DisasContext *s)
973 assert(!s->fp_access_checked);
974 s->fp_access_checked = true;
976 if (!s->fp_excp_el) {
977 return true;
980 gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false),
981 s->fp_excp_el);
982 return false;
986 * This utility function is for doing register extension with an
987 * optional shift. You will likely want to pass a temporary for the
988 * destination register. See DecodeRegExtend() in the ARM ARM.
990 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
991 int option, unsigned int shift)
993 int extsize = extract32(option, 0, 2);
994 bool is_signed = extract32(option, 2, 1);
996 if (is_signed) {
997 switch (extsize) {
998 case 0:
999 tcg_gen_ext8s_i64(tcg_out, tcg_in);
1000 break;
1001 case 1:
1002 tcg_gen_ext16s_i64(tcg_out, tcg_in);
1003 break;
1004 case 2:
1005 tcg_gen_ext32s_i64(tcg_out, tcg_in);
1006 break;
1007 case 3:
1008 tcg_gen_mov_i64(tcg_out, tcg_in);
1009 break;
1011 } else {
1012 switch (extsize) {
1013 case 0:
1014 tcg_gen_ext8u_i64(tcg_out, tcg_in);
1015 break;
1016 case 1:
1017 tcg_gen_ext16u_i64(tcg_out, tcg_in);
1018 break;
1019 case 2:
1020 tcg_gen_ext32u_i64(tcg_out, tcg_in);
1021 break;
1022 case 3:
1023 tcg_gen_mov_i64(tcg_out, tcg_in);
1024 break;
1028 if (shift) {
1029 tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1033 static inline void gen_check_sp_alignment(DisasContext *s)
1035 /* The AArch64 architecture mandates that (if enabled via PSTATE
1036 * or SCTLR bits) there is a check that SP is 16-aligned on every
1037 * SP-relative load or store (with an exception generated if it is not).
1038 * In line with general QEMU practice regarding misaligned accesses,
1039 * we omit these checks for the sake of guest program performance.
1040 * This function is provided as a hook so we can more easily add these
1041 * checks in future (possibly as a "favour catching guest program bugs
1042 * over speed" user selectable option).
1047 * This provides a simple table based table lookup decoder. It is
1048 * intended to be used when the relevant bits for decode are too
1049 * awkwardly placed and switch/if based logic would be confusing and
1050 * deeply nested. Since it's a linear search through the table, tables
1051 * should be kept small.
1053 * It returns the first handler where insn & mask == pattern, or
1054 * NULL if there is no match.
1055 * The table is terminated by an empty mask (i.e. 0)
1057 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1058 uint32_t insn)
1060 const AArch64DecodeTable *tptr = table;
1062 while (tptr->mask) {
1063 if ((insn & tptr->mask) == tptr->pattern) {
1064 return tptr->disas_fn;
1066 tptr++;
1068 return NULL;
1072 * the instruction disassembly implemented here matches
1073 * the instruction encoding classifications in chapter 3 (C3)
1074 * of the ARM Architecture Reference Manual (DDI0487A_a)
1077 /* C3.2.7 Unconditional branch (immediate)
1078 * 31 30 26 25 0
1079 * +----+-----------+-------------------------------------+
1080 * | op | 0 0 1 0 1 | imm26 |
1081 * +----+-----------+-------------------------------------+
1083 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1085 uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
1087 if (insn & (1U << 31)) {
1088 /* C5.6.26 BL Branch with link */
1089 tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1092 /* C5.6.20 B Branch / C5.6.26 BL Branch with link */
1093 gen_goto_tb(s, 0, addr);
1096 /* C3.2.1 Compare & branch (immediate)
1097 * 31 30 25 24 23 5 4 0
1098 * +----+-------------+----+---------------------+--------+
1099 * | sf | 0 1 1 0 1 0 | op | imm19 | Rt |
1100 * +----+-------------+----+---------------------+--------+
1102 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1104 unsigned int sf, op, rt;
1105 uint64_t addr;
1106 TCGLabel *label_match;
1107 TCGv_i64 tcg_cmp;
1109 sf = extract32(insn, 31, 1);
1110 op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1111 rt = extract32(insn, 0, 5);
1112 addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1114 tcg_cmp = read_cpu_reg(s, rt, sf);
1115 label_match = gen_new_label();
1117 tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1118 tcg_cmp, 0, label_match);
1120 gen_goto_tb(s, 0, s->pc);
1121 gen_set_label(label_match);
1122 gen_goto_tb(s, 1, addr);
1125 /* C3.2.5 Test & branch (immediate)
1126 * 31 30 25 24 23 19 18 5 4 0
1127 * +----+-------------+----+-------+-------------+------+
1128 * | b5 | 0 1 1 0 1 1 | op | b40 | imm14 | Rt |
1129 * +----+-------------+----+-------+-------------+------+
1131 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1133 unsigned int bit_pos, op, rt;
1134 uint64_t addr;
1135 TCGLabel *label_match;
1136 TCGv_i64 tcg_cmp;
1138 bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1139 op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1140 addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1141 rt = extract32(insn, 0, 5);
1143 tcg_cmp = tcg_temp_new_i64();
1144 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1145 label_match = gen_new_label();
1146 tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1147 tcg_cmp, 0, label_match);
1148 tcg_temp_free_i64(tcg_cmp);
1149 gen_goto_tb(s, 0, s->pc);
1150 gen_set_label(label_match);
1151 gen_goto_tb(s, 1, addr);
1154 /* C3.2.2 / C5.6.19 Conditional branch (immediate)
1155 * 31 25 24 23 5 4 3 0
1156 * +---------------+----+---------------------+----+------+
1157 * | 0 1 0 1 0 1 0 | o1 | imm19 | o0 | cond |
1158 * +---------------+----+---------------------+----+------+
1160 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1162 unsigned int cond;
1163 uint64_t addr;
1165 if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1166 unallocated_encoding(s);
1167 return;
1169 addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1170 cond = extract32(insn, 0, 4);
1172 if (cond < 0x0e) {
1173 /* genuinely conditional branches */
1174 TCGLabel *label_match = gen_new_label();
1175 arm_gen_test_cc(cond, label_match);
1176 gen_goto_tb(s, 0, s->pc);
1177 gen_set_label(label_match);
1178 gen_goto_tb(s, 1, addr);
1179 } else {
1180 /* 0xe and 0xf are both "always" conditions */
1181 gen_goto_tb(s, 0, addr);
1185 /* C5.6.68 HINT */
1186 static void handle_hint(DisasContext *s, uint32_t insn,
1187 unsigned int op1, unsigned int op2, unsigned int crm)
1189 unsigned int selector = crm << 3 | op2;
1191 if (op1 != 3) {
1192 unallocated_encoding(s);
1193 return;
1196 switch (selector) {
1197 case 0: /* NOP */
1198 return;
1199 case 3: /* WFI */
1200 s->is_jmp = DISAS_WFI;
1201 return;
1202 case 1: /* YIELD */
1203 s->is_jmp = DISAS_YIELD;
1204 return;
1205 case 2: /* WFE */
1206 s->is_jmp = DISAS_WFE;
1207 return;
1208 case 4: /* SEV */
1209 case 5: /* SEVL */
1210 /* we treat all as NOP at least for now */
1211 return;
1212 default:
1213 /* default specified as NOP equivalent */
1214 return;
1218 static void gen_clrex(DisasContext *s, uint32_t insn)
1220 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1223 /* CLREX, DSB, DMB, ISB */
1224 static void handle_sync(DisasContext *s, uint32_t insn,
1225 unsigned int op1, unsigned int op2, unsigned int crm)
1227 if (op1 != 3) {
1228 unallocated_encoding(s);
1229 return;
1232 switch (op2) {
1233 case 2: /* CLREX */
1234 gen_clrex(s, insn);
1235 return;
1236 case 4: /* DSB */
1237 case 5: /* DMB */
1238 case 6: /* ISB */
1239 /* We don't emulate caches so barriers are no-ops */
1240 return;
1241 default:
1242 unallocated_encoding(s);
1243 return;
1247 /* C5.6.130 MSR (immediate) - move immediate to processor state field */
1248 static void handle_msr_i(DisasContext *s, uint32_t insn,
1249 unsigned int op1, unsigned int op2, unsigned int crm)
1251 int op = op1 << 3 | op2;
1252 switch (op) {
1253 case 0x05: /* SPSel */
1254 if (s->current_el == 0) {
1255 unallocated_encoding(s);
1256 return;
1258 /* fall through */
1259 case 0x1e: /* DAIFSet */
1260 case 0x1f: /* DAIFClear */
1262 TCGv_i32 tcg_imm = tcg_const_i32(crm);
1263 TCGv_i32 tcg_op = tcg_const_i32(op);
1264 gen_a64_set_pc_im(s->pc - 4);
1265 gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
1266 tcg_temp_free_i32(tcg_imm);
1267 tcg_temp_free_i32(tcg_op);
1268 s->is_jmp = DISAS_UPDATE;
1269 break;
1271 default:
1272 unallocated_encoding(s);
1273 return;
1277 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1279 TCGv_i32 tmp = tcg_temp_new_i32();
1280 TCGv_i32 nzcv = tcg_temp_new_i32();
1282 /* build bit 31, N */
1283 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1284 /* build bit 30, Z */
1285 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1286 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1287 /* build bit 29, C */
1288 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1289 /* build bit 28, V */
1290 tcg_gen_shri_i32(tmp, cpu_VF, 31);
1291 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1292 /* generate result */
1293 tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1295 tcg_temp_free_i32(nzcv);
1296 tcg_temp_free_i32(tmp);
1299 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1302 TCGv_i32 nzcv = tcg_temp_new_i32();
1304 /* take NZCV from R[t] */
1305 tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1307 /* bit 31, N */
1308 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1309 /* bit 30, Z */
1310 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1311 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1312 /* bit 29, C */
1313 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1314 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1315 /* bit 28, V */
1316 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1317 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1318 tcg_temp_free_i32(nzcv);
1321 /* C5.6.129 MRS - move from system register
1322 * C5.6.131 MSR (register) - move to system register
1323 * C5.6.204 SYS
1324 * C5.6.205 SYSL
1325 * These are all essentially the same insn in 'read' and 'write'
1326 * versions, with varying op0 fields.
1328 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1329 unsigned int op0, unsigned int op1, unsigned int op2,
1330 unsigned int crn, unsigned int crm, unsigned int rt)
1332 const ARMCPRegInfo *ri;
1333 TCGv_i64 tcg_rt;
1335 ri = get_arm_cp_reginfo(s->cp_regs,
1336 ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1337 crn, crm, op0, op1, op2));
1339 if (!ri) {
1340 /* Unknown register; this might be a guest error or a QEMU
1341 * unimplemented feature.
1343 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1344 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1345 isread ? "read" : "write", op0, op1, crn, crm, op2);
1346 unallocated_encoding(s);
1347 return;
1350 /* Check access permissions */
1351 if (!cp_access_ok(s->current_el, ri, isread)) {
1352 unallocated_encoding(s);
1353 return;
1356 if (ri->accessfn) {
1357 /* Emit code to perform further access permissions checks at
1358 * runtime; this may result in an exception.
1360 TCGv_ptr tmpptr;
1361 TCGv_i32 tcg_syn;
1362 uint32_t syndrome;
1364 gen_a64_set_pc_im(s->pc - 4);
1365 tmpptr = tcg_const_ptr(ri);
1366 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1367 tcg_syn = tcg_const_i32(syndrome);
1368 gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn);
1369 tcg_temp_free_ptr(tmpptr);
1370 tcg_temp_free_i32(tcg_syn);
1373 /* Handle special cases first */
1374 switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1375 case ARM_CP_NOP:
1376 return;
1377 case ARM_CP_NZCV:
1378 tcg_rt = cpu_reg(s, rt);
1379 if (isread) {
1380 gen_get_nzcv(tcg_rt);
1381 } else {
1382 gen_set_nzcv(tcg_rt);
1384 return;
1385 case ARM_CP_CURRENTEL:
1386 /* Reads as current EL value from pstate, which is
1387 * guaranteed to be constant by the tb flags.
1389 tcg_rt = cpu_reg(s, rt);
1390 tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1391 return;
1392 case ARM_CP_DC_ZVA:
1393 /* Writes clear the aligned block of memory which rt points into. */
1394 tcg_rt = cpu_reg(s, rt);
1395 gen_helper_dc_zva(cpu_env, tcg_rt);
1396 return;
1397 default:
1398 break;
1401 if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1402 gen_io_start();
1405 tcg_rt = cpu_reg(s, rt);
1407 if (isread) {
1408 if (ri->type & ARM_CP_CONST) {
1409 tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1410 } else if (ri->readfn) {
1411 TCGv_ptr tmpptr;
1412 tmpptr = tcg_const_ptr(ri);
1413 gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1414 tcg_temp_free_ptr(tmpptr);
1415 } else {
1416 tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1418 } else {
1419 if (ri->type & ARM_CP_CONST) {
1420 /* If not forbidden by access permissions, treat as WI */
1421 return;
1422 } else if (ri->writefn) {
1423 TCGv_ptr tmpptr;
1424 tmpptr = tcg_const_ptr(ri);
1425 gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1426 tcg_temp_free_ptr(tmpptr);
1427 } else {
1428 tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1432 if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1433 /* I/O operations must end the TB here (whether read or write) */
1434 gen_io_end();
1435 s->is_jmp = DISAS_UPDATE;
1436 } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1437 /* We default to ending the TB on a coprocessor register write,
1438 * but allow this to be suppressed by the register definition
1439 * (usually only necessary to work around guest bugs).
1441 s->is_jmp = DISAS_UPDATE;
1445 /* C3.2.4 System
1446 * 31 22 21 20 19 18 16 15 12 11 8 7 5 4 0
1447 * +---------------------+---+-----+-----+-------+-------+-----+------+
1448 * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 | CRn | CRm | op2 | Rt |
1449 * +---------------------+---+-----+-----+-------+-------+-----+------+
1451 static void disas_system(DisasContext *s, uint32_t insn)
1453 unsigned int l, op0, op1, crn, crm, op2, rt;
1454 l = extract32(insn, 21, 1);
1455 op0 = extract32(insn, 19, 2);
1456 op1 = extract32(insn, 16, 3);
1457 crn = extract32(insn, 12, 4);
1458 crm = extract32(insn, 8, 4);
1459 op2 = extract32(insn, 5, 3);
1460 rt = extract32(insn, 0, 5);
1462 if (op0 == 0) {
1463 if (l || rt != 31) {
1464 unallocated_encoding(s);
1465 return;
1467 switch (crn) {
1468 case 2: /* C5.6.68 HINT */
1469 handle_hint(s, insn, op1, op2, crm);
1470 break;
1471 case 3: /* CLREX, DSB, DMB, ISB */
1472 handle_sync(s, insn, op1, op2, crm);
1473 break;
1474 case 4: /* C5.6.130 MSR (immediate) */
1475 handle_msr_i(s, insn, op1, op2, crm);
1476 break;
1477 default:
1478 unallocated_encoding(s);
1479 break;
1481 return;
1483 handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1486 /* C3.2.3 Exception generation
1488 * 31 24 23 21 20 5 4 2 1 0
1489 * +-----------------+-----+------------------------+-----+----+
1490 * | 1 1 0 1 0 1 0 0 | opc | imm16 | op2 | LL |
1491 * +-----------------------+------------------------+----------+
1493 static void disas_exc(DisasContext *s, uint32_t insn)
1495 int opc = extract32(insn, 21, 3);
1496 int op2_ll = extract32(insn, 0, 5);
1497 int imm16 = extract32(insn, 5, 16);
1498 TCGv_i32 tmp;
1500 switch (opc) {
1501 case 0:
1502 /* For SVC, HVC and SMC we advance the single-step state
1503 * machine before taking the exception. This is architecturally
1504 * mandated, to ensure that single-stepping a system call
1505 * instruction works properly.
1507 switch (op2_ll) {
1508 case 1:
1509 gen_ss_advance(s);
1510 gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16),
1511 default_exception_el(s));
1512 break;
1513 case 2:
1514 if (s->current_el == 0) {
1515 unallocated_encoding(s);
1516 break;
1518 /* The pre HVC helper handles cases when HVC gets trapped
1519 * as an undefined insn by runtime configuration.
1521 gen_a64_set_pc_im(s->pc - 4);
1522 gen_helper_pre_hvc(cpu_env);
1523 gen_ss_advance(s);
1524 gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2);
1525 break;
1526 case 3:
1527 if (s->current_el == 0) {
1528 unallocated_encoding(s);
1529 break;
1531 gen_a64_set_pc_im(s->pc - 4);
1532 tmp = tcg_const_i32(syn_aa64_smc(imm16));
1533 gen_helper_pre_smc(cpu_env, tmp);
1534 tcg_temp_free_i32(tmp);
1535 gen_ss_advance(s);
1536 gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16), 3);
1537 break;
1538 default:
1539 unallocated_encoding(s);
1540 break;
1542 break;
1543 case 1:
1544 if (op2_ll != 0) {
1545 unallocated_encoding(s);
1546 break;
1548 /* BRK */
1549 gen_exception_insn(s, 4, EXCP_BKPT, syn_aa64_bkpt(imm16),
1550 default_exception_el(s));
1551 break;
1552 case 2:
1553 if (op2_ll != 0) {
1554 unallocated_encoding(s);
1555 break;
1557 /* HLT. This has two purposes.
1558 * Architecturally, it is an external halting debug instruction.
1559 * Since QEMU doesn't implement external debug, we treat this as
1560 * it is required for halting debug disabled: it will UNDEF.
1561 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1563 if (semihosting_enabled() && imm16 == 0xf000) {
1564 #ifndef CONFIG_USER_ONLY
1565 /* In system mode, don't allow userspace access to semihosting,
1566 * to provide some semblance of security (and for consistency
1567 * with our 32-bit semihosting).
1569 if (s->current_el == 0) {
1570 unsupported_encoding(s, insn);
1571 break;
1573 #endif
1574 gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1575 } else {
1576 unsupported_encoding(s, insn);
1578 break;
1579 case 5:
1580 if (op2_ll < 1 || op2_ll > 3) {
1581 unallocated_encoding(s);
1582 break;
1584 /* DCPS1, DCPS2, DCPS3 */
1585 unsupported_encoding(s, insn);
1586 break;
1587 default:
1588 unallocated_encoding(s);
1589 break;
1593 /* C3.2.7 Unconditional branch (register)
1594 * 31 25 24 21 20 16 15 10 9 5 4 0
1595 * +---------------+-------+-------+-------+------+-------+
1596 * | 1 1 0 1 0 1 1 | opc | op2 | op3 | Rn | op4 |
1597 * +---------------+-------+-------+-------+------+-------+
1599 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1601 unsigned int opc, op2, op3, rn, op4;
1603 opc = extract32(insn, 21, 4);
1604 op2 = extract32(insn, 16, 5);
1605 op3 = extract32(insn, 10, 6);
1606 rn = extract32(insn, 5, 5);
1607 op4 = extract32(insn, 0, 5);
1609 if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1610 unallocated_encoding(s);
1611 return;
1614 switch (opc) {
1615 case 0: /* BR */
1616 case 2: /* RET */
1617 tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1618 break;
1619 case 1: /* BLR */
1620 tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1621 tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1622 break;
1623 case 4: /* ERET */
1624 if (s->current_el == 0) {
1625 unallocated_encoding(s);
1626 return;
1628 gen_helper_exception_return(cpu_env);
1629 s->is_jmp = DISAS_JUMP;
1630 return;
1631 case 5: /* DRPS */
1632 if (rn != 0x1f) {
1633 unallocated_encoding(s);
1634 } else {
1635 unsupported_encoding(s, insn);
1637 return;
1638 default:
1639 unallocated_encoding(s);
1640 return;
1643 s->is_jmp = DISAS_JUMP;
1646 /* C3.2 Branches, exception generating and system instructions */
1647 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1649 switch (extract32(insn, 25, 7)) {
1650 case 0x0a: case 0x0b:
1651 case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1652 disas_uncond_b_imm(s, insn);
1653 break;
1654 case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1655 disas_comp_b_imm(s, insn);
1656 break;
1657 case 0x1b: case 0x5b: /* Test & branch (immediate) */
1658 disas_test_b_imm(s, insn);
1659 break;
1660 case 0x2a: /* Conditional branch (immediate) */
1661 disas_cond_b_imm(s, insn);
1662 break;
1663 case 0x6a: /* Exception generation / System */
1664 if (insn & (1 << 24)) {
1665 disas_system(s, insn);
1666 } else {
1667 disas_exc(s, insn);
1669 break;
1670 case 0x6b: /* Unconditional branch (register) */
1671 disas_uncond_b_reg(s, insn);
1672 break;
1673 default:
1674 unallocated_encoding(s);
1675 break;
1680 * Load/Store exclusive instructions are implemented by remembering
1681 * the value/address loaded, and seeing if these are the same
1682 * when the store is performed. This is not actually the architecturally
1683 * mandated semantics, but it works for typical guest code sequences
1684 * and avoids having to monitor regular stores.
1686 * In system emulation mode only one CPU will be running at once, so
1687 * this sequence is effectively atomic. In user emulation mode we
1688 * throw an exception and handle the atomic operation elsewhere.
1690 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1691 TCGv_i64 addr, int size, bool is_pair)
1693 TCGv_i64 tmp = tcg_temp_new_i64();
1694 TCGMemOp memop = MO_TE + size;
1696 g_assert(size <= 3);
1697 tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop);
1699 if (is_pair) {
1700 TCGv_i64 addr2 = tcg_temp_new_i64();
1701 TCGv_i64 hitmp = tcg_temp_new_i64();
1703 g_assert(size >= 2);
1704 tcg_gen_addi_i64(addr2, addr, 1 << size);
1705 tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
1706 tcg_temp_free_i64(addr2);
1707 tcg_gen_mov_i64(cpu_exclusive_high, hitmp);
1708 tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
1709 tcg_temp_free_i64(hitmp);
1712 tcg_gen_mov_i64(cpu_exclusive_val, tmp);
1713 tcg_gen_mov_i64(cpu_reg(s, rt), tmp);
1715 tcg_temp_free_i64(tmp);
1716 tcg_gen_mov_i64(cpu_exclusive_addr, addr);
1719 #ifdef CONFIG_USER_ONLY
1720 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1721 TCGv_i64 addr, int size, int is_pair)
1723 tcg_gen_mov_i64(cpu_exclusive_test, addr);
1724 tcg_gen_movi_i32(cpu_exclusive_info,
1725 size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 << 14));
1726 gen_exception_internal_insn(s, 4, EXCP_STREX);
1728 #else
1729 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1730 TCGv_i64 inaddr, int size, int is_pair)
1732 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
1733 * && (!is_pair || env->exclusive_high == [addr + datasize])) {
1734 * [addr] = {Rt};
1735 * if (is_pair) {
1736 * [addr + datasize] = {Rt2};
1738 * {Rd} = 0;
1739 * } else {
1740 * {Rd} = 1;
1742 * env->exclusive_addr = -1;
1744 TCGLabel *fail_label = gen_new_label();
1745 TCGLabel *done_label = gen_new_label();
1746 TCGv_i64 addr = tcg_temp_local_new_i64();
1747 TCGv_i64 tmp;
1749 /* Copy input into a local temp so it is not trashed when the
1750 * basic block ends at the branch insn.
1752 tcg_gen_mov_i64(addr, inaddr);
1753 tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
1755 tmp = tcg_temp_new_i64();
1756 tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), MO_TE + size);
1757 tcg_gen_brcond_i64(TCG_COND_NE, tmp, cpu_exclusive_val, fail_label);
1758 tcg_temp_free_i64(tmp);
1760 if (is_pair) {
1761 TCGv_i64 addrhi = tcg_temp_new_i64();
1762 TCGv_i64 tmphi = tcg_temp_new_i64();
1764 tcg_gen_addi_i64(addrhi, addr, 1 << size);
1765 tcg_gen_qemu_ld_i64(tmphi, addrhi, get_mem_index(s), MO_TE + size);
1766 tcg_gen_brcond_i64(TCG_COND_NE, tmphi, cpu_exclusive_high, fail_label);
1768 tcg_temp_free_i64(tmphi);
1769 tcg_temp_free_i64(addrhi);
1772 /* We seem to still have the exclusive monitor, so do the store */
1773 tcg_gen_qemu_st_i64(cpu_reg(s, rt), addr, get_mem_index(s), MO_TE + size);
1774 if (is_pair) {
1775 TCGv_i64 addrhi = tcg_temp_new_i64();
1777 tcg_gen_addi_i64(addrhi, addr, 1 << size);
1778 tcg_gen_qemu_st_i64(cpu_reg(s, rt2), addrhi,
1779 get_mem_index(s), MO_TE + size);
1780 tcg_temp_free_i64(addrhi);
1783 tcg_temp_free_i64(addr);
1785 tcg_gen_movi_i64(cpu_reg(s, rd), 0);
1786 tcg_gen_br(done_label);
1787 gen_set_label(fail_label);
1788 tcg_gen_movi_i64(cpu_reg(s, rd), 1);
1789 gen_set_label(done_label);
1790 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1793 #endif
1795 /* C3.3.6 Load/store exclusive
1797 * 31 30 29 24 23 22 21 20 16 15 14 10 9 5 4 0
1798 * +-----+-------------+----+---+----+------+----+-------+------+------+
1799 * | sz | 0 0 1 0 0 0 | o2 | L | o1 | Rs | o0 | Rt2 | Rn | Rt |
1800 * +-----+-------------+----+---+----+------+----+-------+------+------+
1802 * sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
1803 * L: 0 -> store, 1 -> load
1804 * o2: 0 -> exclusive, 1 -> not
1805 * o1: 0 -> single register, 1 -> register pair
1806 * o0: 1 -> load-acquire/store-release, 0 -> not
1808 * o0 == 0 AND o2 == 1 is un-allocated
1809 * o1 == 1 is un-allocated except for 32 and 64 bit sizes
1811 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
1813 int rt = extract32(insn, 0, 5);
1814 int rn = extract32(insn, 5, 5);
1815 int rt2 = extract32(insn, 10, 5);
1816 int is_lasr = extract32(insn, 15, 1);
1817 int rs = extract32(insn, 16, 5);
1818 int is_pair = extract32(insn, 21, 1);
1819 int is_store = !extract32(insn, 22, 1);
1820 int is_excl = !extract32(insn, 23, 1);
1821 int size = extract32(insn, 30, 2);
1822 TCGv_i64 tcg_addr;
1824 if ((!is_excl && !is_lasr) ||
1825 (is_pair && size < 2)) {
1826 unallocated_encoding(s);
1827 return;
1830 if (rn == 31) {
1831 gen_check_sp_alignment(s);
1833 tcg_addr = read_cpu_reg_sp(s, rn, 1);
1835 /* Note that since TCG is single threaded load-acquire/store-release
1836 * semantics require no extra if (is_lasr) { ... } handling.
1839 if (is_excl) {
1840 if (!is_store) {
1841 s->is_ldex = true;
1842 gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
1843 } else {
1844 gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
1846 } else {
1847 TCGv_i64 tcg_rt = cpu_reg(s, rt);
1848 if (is_store) {
1849 do_gpr_st(s, tcg_rt, tcg_addr, size);
1850 } else {
1851 do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false);
1853 if (is_pair) {
1854 TCGv_i64 tcg_rt2 = cpu_reg(s, rt);
1855 tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
1856 if (is_store) {
1857 do_gpr_st(s, tcg_rt2, tcg_addr, size);
1858 } else {
1859 do_gpr_ld(s, tcg_rt2, tcg_addr, size, false, false);
1866 * C3.3.5 Load register (literal)
1868 * 31 30 29 27 26 25 24 23 5 4 0
1869 * +-----+-------+---+-----+-------------------+-------+
1870 * | opc | 0 1 1 | V | 0 0 | imm19 | Rt |
1871 * +-----+-------+---+-----+-------------------+-------+
1873 * V: 1 -> vector (simd/fp)
1874 * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
1875 * 10-> 32 bit signed, 11 -> prefetch
1876 * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
1878 static void disas_ld_lit(DisasContext *s, uint32_t insn)
1880 int rt = extract32(insn, 0, 5);
1881 int64_t imm = sextract32(insn, 5, 19) << 2;
1882 bool is_vector = extract32(insn, 26, 1);
1883 int opc = extract32(insn, 30, 2);
1884 bool is_signed = false;
1885 int size = 2;
1886 TCGv_i64 tcg_rt, tcg_addr;
1888 if (is_vector) {
1889 if (opc == 3) {
1890 unallocated_encoding(s);
1891 return;
1893 size = 2 + opc;
1894 if (!fp_access_check(s)) {
1895 return;
1897 } else {
1898 if (opc == 3) {
1899 /* PRFM (literal) : prefetch */
1900 return;
1902 size = 2 + extract32(opc, 0, 1);
1903 is_signed = extract32(opc, 1, 1);
1906 tcg_rt = cpu_reg(s, rt);
1908 tcg_addr = tcg_const_i64((s->pc - 4) + imm);
1909 if (is_vector) {
1910 do_fp_ld(s, rt, tcg_addr, size);
1911 } else {
1912 do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
1914 tcg_temp_free_i64(tcg_addr);
1918 * C5.6.80 LDNP (Load Pair - non-temporal hint)
1919 * C5.6.81 LDP (Load Pair - non vector)
1920 * C5.6.82 LDPSW (Load Pair Signed Word - non vector)
1921 * C5.6.176 STNP (Store Pair - non-temporal hint)
1922 * C5.6.177 STP (Store Pair - non vector)
1923 * C6.3.165 LDNP (Load Pair of SIMD&FP - non-temporal hint)
1924 * C6.3.165 LDP (Load Pair of SIMD&FP)
1925 * C6.3.284 STNP (Store Pair of SIMD&FP - non-temporal hint)
1926 * C6.3.284 STP (Store Pair of SIMD&FP)
1928 * 31 30 29 27 26 25 24 23 22 21 15 14 10 9 5 4 0
1929 * +-----+-------+---+---+-------+---+-----------------------------+
1930 * | opc | 1 0 1 | V | 0 | index | L | imm7 | Rt2 | Rn | Rt |
1931 * +-----+-------+---+---+-------+---+-------+-------+------+------+
1933 * opc: LDP/STP/LDNP/STNP 00 -> 32 bit, 10 -> 64 bit
1934 * LDPSW 01
1935 * LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
1936 * V: 0 -> GPR, 1 -> Vector
1937 * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
1938 * 10 -> signed offset, 11 -> pre-index
1939 * L: 0 -> Store 1 -> Load
1941 * Rt, Rt2 = GPR or SIMD registers to be stored
1942 * Rn = general purpose register containing address
1943 * imm7 = signed offset (multiple of 4 or 8 depending on size)
1945 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
1947 int rt = extract32(insn, 0, 5);
1948 int rn = extract32(insn, 5, 5);
1949 int rt2 = extract32(insn, 10, 5);
1950 uint64_t offset = sextract64(insn, 15, 7);
1951 int index = extract32(insn, 23, 2);
1952 bool is_vector = extract32(insn, 26, 1);
1953 bool is_load = extract32(insn, 22, 1);
1954 int opc = extract32(insn, 30, 2);
1956 bool is_signed = false;
1957 bool postindex = false;
1958 bool wback = false;
1960 TCGv_i64 tcg_addr; /* calculated address */
1961 int size;
1963 if (opc == 3) {
1964 unallocated_encoding(s);
1965 return;
1968 if (is_vector) {
1969 size = 2 + opc;
1970 } else {
1971 size = 2 + extract32(opc, 1, 1);
1972 is_signed = extract32(opc, 0, 1);
1973 if (!is_load && is_signed) {
1974 unallocated_encoding(s);
1975 return;
1979 switch (index) {
1980 case 1: /* post-index */
1981 postindex = true;
1982 wback = true;
1983 break;
1984 case 0:
1985 /* signed offset with "non-temporal" hint. Since we don't emulate
1986 * caches we don't care about hints to the cache system about
1987 * data access patterns, and handle this identically to plain
1988 * signed offset.
1990 if (is_signed) {
1991 /* There is no non-temporal-hint version of LDPSW */
1992 unallocated_encoding(s);
1993 return;
1995 postindex = false;
1996 break;
1997 case 2: /* signed offset, rn not updated */
1998 postindex = false;
1999 break;
2000 case 3: /* pre-index */
2001 postindex = false;
2002 wback = true;
2003 break;
2006 if (is_vector && !fp_access_check(s)) {
2007 return;
2010 offset <<= size;
2012 if (rn == 31) {
2013 gen_check_sp_alignment(s);
2016 tcg_addr = read_cpu_reg_sp(s, rn, 1);
2018 if (!postindex) {
2019 tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2022 if (is_vector) {
2023 if (is_load) {
2024 do_fp_ld(s, rt, tcg_addr, size);
2025 } else {
2026 do_fp_st(s, rt, tcg_addr, size);
2028 } else {
2029 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2030 if (is_load) {
2031 do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
2032 } else {
2033 do_gpr_st(s, tcg_rt, tcg_addr, size);
2036 tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2037 if (is_vector) {
2038 if (is_load) {
2039 do_fp_ld(s, rt2, tcg_addr, size);
2040 } else {
2041 do_fp_st(s, rt2, tcg_addr, size);
2043 } else {
2044 TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2045 if (is_load) {
2046 do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false);
2047 } else {
2048 do_gpr_st(s, tcg_rt2, tcg_addr, size);
2052 if (wback) {
2053 if (postindex) {
2054 tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
2055 } else {
2056 tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
2058 tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
2063 * C3.3.8 Load/store (immediate post-indexed)
2064 * C3.3.9 Load/store (immediate pre-indexed)
2065 * C3.3.12 Load/store (unscaled immediate)
2067 * 31 30 29 27 26 25 24 23 22 21 20 12 11 10 9 5 4 0
2068 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2069 * |size| 1 1 1 | V | 0 0 | opc | 0 | imm9 | idx | Rn | Rt |
2070 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2072 * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2073 10 -> unprivileged
2074 * V = 0 -> non-vector
2075 * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2076 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2078 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
2080 int rt = extract32(insn, 0, 5);
2081 int rn = extract32(insn, 5, 5);
2082 int imm9 = sextract32(insn, 12, 9);
2083 int opc = extract32(insn, 22, 2);
2084 int size = extract32(insn, 30, 2);
2085 int idx = extract32(insn, 10, 2);
2086 bool is_signed = false;
2087 bool is_store = false;
2088 bool is_extended = false;
2089 bool is_unpriv = (idx == 2);
2090 bool is_vector = extract32(insn, 26, 1);
2091 bool post_index;
2092 bool writeback;
2094 TCGv_i64 tcg_addr;
2096 if (is_vector) {
2097 size |= (opc & 2) << 1;
2098 if (size > 4 || is_unpriv) {
2099 unallocated_encoding(s);
2100 return;
2102 is_store = ((opc & 1) == 0);
2103 if (!fp_access_check(s)) {
2104 return;
2106 } else {
2107 if (size == 3 && opc == 2) {
2108 /* PRFM - prefetch */
2109 if (is_unpriv) {
2110 unallocated_encoding(s);
2111 return;
2113 return;
2115 if (opc == 3 && size > 1) {
2116 unallocated_encoding(s);
2117 return;
2119 is_store = (opc == 0);
2120 is_signed = opc & (1<<1);
2121 is_extended = (size < 3) && (opc & 1);
2124 switch (idx) {
2125 case 0:
2126 case 2:
2127 post_index = false;
2128 writeback = false;
2129 break;
2130 case 1:
2131 post_index = true;
2132 writeback = true;
2133 break;
2134 case 3:
2135 post_index = false;
2136 writeback = true;
2137 break;
2140 if (rn == 31) {
2141 gen_check_sp_alignment(s);
2143 tcg_addr = read_cpu_reg_sp(s, rn, 1);
2145 if (!post_index) {
2146 tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2149 if (is_vector) {
2150 if (is_store) {
2151 do_fp_st(s, rt, tcg_addr, size);
2152 } else {
2153 do_fp_ld(s, rt, tcg_addr, size);
2155 } else {
2156 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2157 int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2159 if (is_store) {
2160 do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx);
2161 } else {
2162 do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
2163 is_signed, is_extended, memidx);
2167 if (writeback) {
2168 TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2169 if (post_index) {
2170 tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2172 tcg_gen_mov_i64(tcg_rn, tcg_addr);
2177 * C3.3.10 Load/store (register offset)
2179 * 31 30 29 27 26 25 24 23 22 21 20 16 15 13 12 11 10 9 5 4 0
2180 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2181 * |size| 1 1 1 | V | 0 0 | opc | 1 | Rm | opt | S| 1 0 | Rn | Rt |
2182 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2184 * For non-vector:
2185 * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2186 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2187 * For vector:
2188 * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2189 * opc<0>: 0 -> store, 1 -> load
2190 * V: 1 -> vector/simd
2191 * opt: extend encoding (see DecodeRegExtend)
2192 * S: if S=1 then scale (essentially index by sizeof(size))
2193 * Rt: register to transfer into/out of
2194 * Rn: address register or SP for base
2195 * Rm: offset register or ZR for offset
2197 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn)
2199 int rt = extract32(insn, 0, 5);
2200 int rn = extract32(insn, 5, 5);
2201 int shift = extract32(insn, 12, 1);
2202 int rm = extract32(insn, 16, 5);
2203 int opc = extract32(insn, 22, 2);
2204 int opt = extract32(insn, 13, 3);
2205 int size = extract32(insn, 30, 2);
2206 bool is_signed = false;
2207 bool is_store = false;
2208 bool is_extended = false;
2209 bool is_vector = extract32(insn, 26, 1);
2211 TCGv_i64 tcg_rm;
2212 TCGv_i64 tcg_addr;
2214 if (extract32(opt, 1, 1) == 0) {
2215 unallocated_encoding(s);
2216 return;
2219 if (is_vector) {
2220 size |= (opc & 2) << 1;
2221 if (size > 4) {
2222 unallocated_encoding(s);
2223 return;
2225 is_store = !extract32(opc, 0, 1);
2226 if (!fp_access_check(s)) {
2227 return;
2229 } else {
2230 if (size == 3 && opc == 2) {
2231 /* PRFM - prefetch */
2232 return;
2234 if (opc == 3 && size > 1) {
2235 unallocated_encoding(s);
2236 return;
2238 is_store = (opc == 0);
2239 is_signed = extract32(opc, 1, 1);
2240 is_extended = (size < 3) && extract32(opc, 0, 1);
2243 if (rn == 31) {
2244 gen_check_sp_alignment(s);
2246 tcg_addr = read_cpu_reg_sp(s, rn, 1);
2248 tcg_rm = read_cpu_reg(s, rm, 1);
2249 ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2251 tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
2253 if (is_vector) {
2254 if (is_store) {
2255 do_fp_st(s, rt, tcg_addr, size);
2256 } else {
2257 do_fp_ld(s, rt, tcg_addr, size);
2259 } else {
2260 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2261 if (is_store) {
2262 do_gpr_st(s, tcg_rt, tcg_addr, size);
2263 } else {
2264 do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2270 * C3.3.13 Load/store (unsigned immediate)
2272 * 31 30 29 27 26 25 24 23 22 21 10 9 5
2273 * +----+-------+---+-----+-----+------------+-------+------+
2274 * |size| 1 1 1 | V | 0 1 | opc | imm12 | Rn | Rt |
2275 * +----+-------+---+-----+-----+------------+-------+------+
2277 * For non-vector:
2278 * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2279 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2280 * For vector:
2281 * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2282 * opc<0>: 0 -> store, 1 -> load
2283 * Rn: base address register (inc SP)
2284 * Rt: target register
2286 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn)
2288 int rt = extract32(insn, 0, 5);
2289 int rn = extract32(insn, 5, 5);
2290 unsigned int imm12 = extract32(insn, 10, 12);
2291 bool is_vector = extract32(insn, 26, 1);
2292 int size = extract32(insn, 30, 2);
2293 int opc = extract32(insn, 22, 2);
2294 unsigned int offset;
2296 TCGv_i64 tcg_addr;
2298 bool is_store;
2299 bool is_signed = false;
2300 bool is_extended = false;
2302 if (is_vector) {
2303 size |= (opc & 2) << 1;
2304 if (size > 4) {
2305 unallocated_encoding(s);
2306 return;
2308 is_store = !extract32(opc, 0, 1);
2309 if (!fp_access_check(s)) {
2310 return;
2312 } else {
2313 if (size == 3 && opc == 2) {
2314 /* PRFM - prefetch */
2315 return;
2317 if (opc == 3 && size > 1) {
2318 unallocated_encoding(s);
2319 return;
2321 is_store = (opc == 0);
2322 is_signed = extract32(opc, 1, 1);
2323 is_extended = (size < 3) && extract32(opc, 0, 1);
2326 if (rn == 31) {
2327 gen_check_sp_alignment(s);
2329 tcg_addr = read_cpu_reg_sp(s, rn, 1);
2330 offset = imm12 << size;
2331 tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2333 if (is_vector) {
2334 if (is_store) {
2335 do_fp_st(s, rt, tcg_addr, size);
2336 } else {
2337 do_fp_ld(s, rt, tcg_addr, size);
2339 } else {
2340 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2341 if (is_store) {
2342 do_gpr_st(s, tcg_rt, tcg_addr, size);
2343 } else {
2344 do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2349 /* Load/store register (all forms) */
2350 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2352 switch (extract32(insn, 24, 2)) {
2353 case 0:
2354 if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
2355 disas_ldst_reg_roffset(s, insn);
2356 } else {
2357 /* Load/store register (unscaled immediate)
2358 * Load/store immediate pre/post-indexed
2359 * Load/store register unprivileged
2361 disas_ldst_reg_imm9(s, insn);
2363 break;
2364 case 1:
2365 disas_ldst_reg_unsigned_imm(s, insn);
2366 break;
2367 default:
2368 unallocated_encoding(s);
2369 break;
2373 /* C3.3.1 AdvSIMD load/store multiple structures
2375 * 31 30 29 23 22 21 16 15 12 11 10 9 5 4 0
2376 * +---+---+---------------+---+-------------+--------+------+------+------+
2377 * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size | Rn | Rt |
2378 * +---+---+---------------+---+-------------+--------+------+------+------+
2380 * C3.3.2 AdvSIMD load/store multiple structures (post-indexed)
2382 * 31 30 29 23 22 21 20 16 15 12 11 10 9 5 4 0
2383 * +---+---+---------------+---+---+---------+--------+------+------+------+
2384 * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 | Rm | opcode | size | Rn | Rt |
2385 * +---+---+---------------+---+---+---------+--------+------+------+------+
2387 * Rt: first (or only) SIMD&FP register to be transferred
2388 * Rn: base address or SP
2389 * Rm (post-index only): post-index register (when !31) or size dependent #imm
2391 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2393 int rt = extract32(insn, 0, 5);
2394 int rn = extract32(insn, 5, 5);
2395 int size = extract32(insn, 10, 2);
2396 int opcode = extract32(insn, 12, 4);
2397 bool is_store = !extract32(insn, 22, 1);
2398 bool is_postidx = extract32(insn, 23, 1);
2399 bool is_q = extract32(insn, 30, 1);
2400 TCGv_i64 tcg_addr, tcg_rn;
2402 int ebytes = 1 << size;
2403 int elements = (is_q ? 128 : 64) / (8 << size);
2404 int rpt; /* num iterations */
2405 int selem; /* structure elements */
2406 int r;
2408 if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2409 unallocated_encoding(s);
2410 return;
2413 /* From the shared decode logic */
2414 switch (opcode) {
2415 case 0x0:
2416 rpt = 1;
2417 selem = 4;
2418 break;
2419 case 0x2:
2420 rpt = 4;
2421 selem = 1;
2422 break;
2423 case 0x4:
2424 rpt = 1;
2425 selem = 3;
2426 break;
2427 case 0x6:
2428 rpt = 3;
2429 selem = 1;
2430 break;
2431 case 0x7:
2432 rpt = 1;
2433 selem = 1;
2434 break;
2435 case 0x8:
2436 rpt = 1;
2437 selem = 2;
2438 break;
2439 case 0xa:
2440 rpt = 2;
2441 selem = 1;
2442 break;
2443 default:
2444 unallocated_encoding(s);
2445 return;
2448 if (size == 3 && !is_q && selem != 1) {
2449 /* reserved */
2450 unallocated_encoding(s);
2451 return;
2454 if (!fp_access_check(s)) {
2455 return;
2458 if (rn == 31) {
2459 gen_check_sp_alignment(s);
2462 tcg_rn = cpu_reg_sp(s, rn);
2463 tcg_addr = tcg_temp_new_i64();
2464 tcg_gen_mov_i64(tcg_addr, tcg_rn);
2466 for (r = 0; r < rpt; r++) {
2467 int e;
2468 for (e = 0; e < elements; e++) {
2469 int tt = (rt + r) % 32;
2470 int xs;
2471 for (xs = 0; xs < selem; xs++) {
2472 if (is_store) {
2473 do_vec_st(s, tt, e, tcg_addr, size);
2474 } else {
2475 do_vec_ld(s, tt, e, tcg_addr, size);
2477 /* For non-quad operations, setting a slice of the low
2478 * 64 bits of the register clears the high 64 bits (in
2479 * the ARM ARM pseudocode this is implicit in the fact
2480 * that 'rval' is a 64 bit wide variable). We optimize
2481 * by noticing that we only need to do this the first
2482 * time we touch a register.
2484 if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
2485 clear_vec_high(s, tt);
2488 tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2489 tt = (tt + 1) % 32;
2494 if (is_postidx) {
2495 int rm = extract32(insn, 16, 5);
2496 if (rm == 31) {
2497 tcg_gen_mov_i64(tcg_rn, tcg_addr);
2498 } else {
2499 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2502 tcg_temp_free_i64(tcg_addr);
2505 /* C3.3.3 AdvSIMD load/store single structure
2507 * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0
2508 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2509 * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size | Rn | Rt |
2510 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2512 * C3.3.4 AdvSIMD load/store single structure (post-indexed)
2514 * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0
2515 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2516 * | 0 | Q | 0 0 1 1 0 1 1 | L R | Rm | opc | S | size | Rn | Rt |
2517 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2519 * Rt: first (or only) SIMD&FP register to be transferred
2520 * Rn: base address or SP
2521 * Rm (post-index only): post-index register (when !31) or size dependent #imm
2522 * index = encoded in Q:S:size dependent on size
2524 * lane_size = encoded in R, opc
2525 * transfer width = encoded in opc, S, size
2527 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2529 int rt = extract32(insn, 0, 5);
2530 int rn = extract32(insn, 5, 5);
2531 int size = extract32(insn, 10, 2);
2532 int S = extract32(insn, 12, 1);
2533 int opc = extract32(insn, 13, 3);
2534 int R = extract32(insn, 21, 1);
2535 int is_load = extract32(insn, 22, 1);
2536 int is_postidx = extract32(insn, 23, 1);
2537 int is_q = extract32(insn, 30, 1);
2539 int scale = extract32(opc, 1, 2);
2540 int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
2541 bool replicate = false;
2542 int index = is_q << 3 | S << 2 | size;
2543 int ebytes, xs;
2544 TCGv_i64 tcg_addr, tcg_rn;
2546 switch (scale) {
2547 case 3:
2548 if (!is_load || S) {
2549 unallocated_encoding(s);
2550 return;
2552 scale = size;
2553 replicate = true;
2554 break;
2555 case 0:
2556 break;
2557 case 1:
2558 if (extract32(size, 0, 1)) {
2559 unallocated_encoding(s);
2560 return;
2562 index >>= 1;
2563 break;
2564 case 2:
2565 if (extract32(size, 1, 1)) {
2566 unallocated_encoding(s);
2567 return;
2569 if (!extract32(size, 0, 1)) {
2570 index >>= 2;
2571 } else {
2572 if (S) {
2573 unallocated_encoding(s);
2574 return;
2576 index >>= 3;
2577 scale = 3;
2579 break;
2580 default:
2581 g_assert_not_reached();
2584 if (!fp_access_check(s)) {
2585 return;
2588 ebytes = 1 << scale;
2590 if (rn == 31) {
2591 gen_check_sp_alignment(s);
2594 tcg_rn = cpu_reg_sp(s, rn);
2595 tcg_addr = tcg_temp_new_i64();
2596 tcg_gen_mov_i64(tcg_addr, tcg_rn);
2598 for (xs = 0; xs < selem; xs++) {
2599 if (replicate) {
2600 /* Load and replicate to all elements */
2601 uint64_t mulconst;
2602 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
2604 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
2605 get_mem_index(s), MO_TE + scale);
2606 switch (scale) {
2607 case 0:
2608 mulconst = 0x0101010101010101ULL;
2609 break;
2610 case 1:
2611 mulconst = 0x0001000100010001ULL;
2612 break;
2613 case 2:
2614 mulconst = 0x0000000100000001ULL;
2615 break;
2616 case 3:
2617 mulconst = 0;
2618 break;
2619 default:
2620 g_assert_not_reached();
2622 if (mulconst) {
2623 tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
2625 write_vec_element(s, tcg_tmp, rt, 0, MO_64);
2626 if (is_q) {
2627 write_vec_element(s, tcg_tmp, rt, 1, MO_64);
2628 } else {
2629 clear_vec_high(s, rt);
2631 tcg_temp_free_i64(tcg_tmp);
2632 } else {
2633 /* Load/store one element per register */
2634 if (is_load) {
2635 do_vec_ld(s, rt, index, tcg_addr, MO_TE + scale);
2636 } else {
2637 do_vec_st(s, rt, index, tcg_addr, MO_TE + scale);
2640 tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2641 rt = (rt + 1) % 32;
2644 if (is_postidx) {
2645 int rm = extract32(insn, 16, 5);
2646 if (rm == 31) {
2647 tcg_gen_mov_i64(tcg_rn, tcg_addr);
2648 } else {
2649 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2652 tcg_temp_free_i64(tcg_addr);
2655 /* C3.3 Loads and stores */
2656 static void disas_ldst(DisasContext *s, uint32_t insn)
2658 switch (extract32(insn, 24, 6)) {
2659 case 0x08: /* Load/store exclusive */
2660 disas_ldst_excl(s, insn);
2661 break;
2662 case 0x18: case 0x1c: /* Load register (literal) */
2663 disas_ld_lit(s, insn);
2664 break;
2665 case 0x28: case 0x29:
2666 case 0x2c: case 0x2d: /* Load/store pair (all forms) */
2667 disas_ldst_pair(s, insn);
2668 break;
2669 case 0x38: case 0x39:
2670 case 0x3c: case 0x3d: /* Load/store register (all forms) */
2671 disas_ldst_reg(s, insn);
2672 break;
2673 case 0x0c: /* AdvSIMD load/store multiple structures */
2674 disas_ldst_multiple_struct(s, insn);
2675 break;
2676 case 0x0d: /* AdvSIMD load/store single structure */
2677 disas_ldst_single_struct(s, insn);
2678 break;
2679 default:
2680 unallocated_encoding(s);
2681 break;
2685 /* C3.4.6 PC-rel. addressing
2686 * 31 30 29 28 24 23 5 4 0
2687 * +----+-------+-----------+-------------------+------+
2688 * | op | immlo | 1 0 0 0 0 | immhi | Rd |
2689 * +----+-------+-----------+-------------------+------+
2691 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
2693 unsigned int page, rd;
2694 uint64_t base;
2695 uint64_t offset;
2697 page = extract32(insn, 31, 1);
2698 /* SignExtend(immhi:immlo) -> offset */
2699 offset = sextract64(insn, 5, 19);
2700 offset = offset << 2 | extract32(insn, 29, 2);
2701 rd = extract32(insn, 0, 5);
2702 base = s->pc - 4;
2704 if (page) {
2705 /* ADRP (page based) */
2706 base &= ~0xfff;
2707 offset <<= 12;
2710 tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
2714 * C3.4.1 Add/subtract (immediate)
2716 * 31 30 29 28 24 23 22 21 10 9 5 4 0
2717 * +--+--+--+-----------+-----+-------------+-----+-----+
2718 * |sf|op| S| 1 0 0 0 1 |shift| imm12 | Rn | Rd |
2719 * +--+--+--+-----------+-----+-------------+-----+-----+
2721 * sf: 0 -> 32bit, 1 -> 64bit
2722 * op: 0 -> add , 1 -> sub
2723 * S: 1 -> set flags
2724 * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
2726 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
2728 int rd = extract32(insn, 0, 5);
2729 int rn = extract32(insn, 5, 5);
2730 uint64_t imm = extract32(insn, 10, 12);
2731 int shift = extract32(insn, 22, 2);
2732 bool setflags = extract32(insn, 29, 1);
2733 bool sub_op = extract32(insn, 30, 1);
2734 bool is_64bit = extract32(insn, 31, 1);
2736 TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2737 TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
2738 TCGv_i64 tcg_result;
2740 switch (shift) {
2741 case 0x0:
2742 break;
2743 case 0x1:
2744 imm <<= 12;
2745 break;
2746 default:
2747 unallocated_encoding(s);
2748 return;
2751 tcg_result = tcg_temp_new_i64();
2752 if (!setflags) {
2753 if (sub_op) {
2754 tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
2755 } else {
2756 tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
2758 } else {
2759 TCGv_i64 tcg_imm = tcg_const_i64(imm);
2760 if (sub_op) {
2761 gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2762 } else {
2763 gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2765 tcg_temp_free_i64(tcg_imm);
2768 if (is_64bit) {
2769 tcg_gen_mov_i64(tcg_rd, tcg_result);
2770 } else {
2771 tcg_gen_ext32u_i64(tcg_rd, tcg_result);
2774 tcg_temp_free_i64(tcg_result);
2777 /* The input should be a value in the bottom e bits (with higher
2778 * bits zero); returns that value replicated into every element
2779 * of size e in a 64 bit integer.
2781 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
2783 assert(e != 0);
2784 while (e < 64) {
2785 mask |= mask << e;
2786 e *= 2;
2788 return mask;
2791 /* Return a value with the bottom len bits set (where 0 < len <= 64) */
2792 static inline uint64_t bitmask64(unsigned int length)
2794 assert(length > 0 && length <= 64);
2795 return ~0ULL >> (64 - length);
2798 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we
2799 * only require the wmask. Returns false if the imms/immr/immn are a reserved
2800 * value (ie should cause a guest UNDEF exception), and true if they are
2801 * valid, in which case the decoded bit pattern is written to result.
2803 static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
2804 unsigned int imms, unsigned int immr)
2806 uint64_t mask;
2807 unsigned e, levels, s, r;
2808 int len;
2810 assert(immn < 2 && imms < 64 && immr < 64);
2812 /* The bit patterns we create here are 64 bit patterns which
2813 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
2814 * 64 bits each. Each element contains the same value: a run
2815 * of between 1 and e-1 non-zero bits, rotated within the
2816 * element by between 0 and e-1 bits.
2818 * The element size and run length are encoded into immn (1 bit)
2819 * and imms (6 bits) as follows:
2820 * 64 bit elements: immn = 1, imms = <length of run - 1>
2821 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
2822 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
2823 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1>
2824 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
2825 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
2826 * Notice that immn = 0, imms = 11111x is the only combination
2827 * not covered by one of the above options; this is reserved.
2828 * Further, <length of run - 1> all-ones is a reserved pattern.
2830 * In all cases the rotation is by immr % e (and immr is 6 bits).
2833 /* First determine the element size */
2834 len = 31 - clz32((immn << 6) | (~imms & 0x3f));
2835 if (len < 1) {
2836 /* This is the immn == 0, imms == 0x11111x case */
2837 return false;
2839 e = 1 << len;
2841 levels = e - 1;
2842 s = imms & levels;
2843 r = immr & levels;
2845 if (s == levels) {
2846 /* <length of run - 1> mustn't be all-ones. */
2847 return false;
2850 /* Create the value of one element: s+1 set bits rotated
2851 * by r within the element (which is e bits wide)...
2853 mask = bitmask64(s + 1);
2854 if (r) {
2855 mask = (mask >> r) | (mask << (e - r));
2856 mask &= bitmask64(e);
2858 /* ...then replicate the element over the whole 64 bit value */
2859 mask = bitfield_replicate(mask, e);
2860 *result = mask;
2861 return true;
2864 /* C3.4.4 Logical (immediate)
2865 * 31 30 29 28 23 22 21 16 15 10 9 5 4 0
2866 * +----+-----+-------------+---+------+------+------+------+
2867 * | sf | opc | 1 0 0 1 0 0 | N | immr | imms | Rn | Rd |
2868 * +----+-----+-------------+---+------+------+------+------+
2870 static void disas_logic_imm(DisasContext *s, uint32_t insn)
2872 unsigned int sf, opc, is_n, immr, imms, rn, rd;
2873 TCGv_i64 tcg_rd, tcg_rn;
2874 uint64_t wmask;
2875 bool is_and = false;
2877 sf = extract32(insn, 31, 1);
2878 opc = extract32(insn, 29, 2);
2879 is_n = extract32(insn, 22, 1);
2880 immr = extract32(insn, 16, 6);
2881 imms = extract32(insn, 10, 6);
2882 rn = extract32(insn, 5, 5);
2883 rd = extract32(insn, 0, 5);
2885 if (!sf && is_n) {
2886 unallocated_encoding(s);
2887 return;
2890 if (opc == 0x3) { /* ANDS */
2891 tcg_rd = cpu_reg(s, rd);
2892 } else {
2893 tcg_rd = cpu_reg_sp(s, rd);
2895 tcg_rn = cpu_reg(s, rn);
2897 if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
2898 /* some immediate field values are reserved */
2899 unallocated_encoding(s);
2900 return;
2903 if (!sf) {
2904 wmask &= 0xffffffff;
2907 switch (opc) {
2908 case 0x3: /* ANDS */
2909 case 0x0: /* AND */
2910 tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
2911 is_and = true;
2912 break;
2913 case 0x1: /* ORR */
2914 tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
2915 break;
2916 case 0x2: /* EOR */
2917 tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
2918 break;
2919 default:
2920 assert(FALSE); /* must handle all above */
2921 break;
2924 if (!sf && !is_and) {
2925 /* zero extend final result; we know we can skip this for AND
2926 * since the immediate had the high 32 bits clear.
2928 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2931 if (opc == 3) { /* ANDS */
2932 gen_logic_CC(sf, tcg_rd);
2937 * C3.4.5 Move wide (immediate)
2939 * 31 30 29 28 23 22 21 20 5 4 0
2940 * +--+-----+-------------+-----+----------------+------+
2941 * |sf| opc | 1 0 0 1 0 1 | hw | imm16 | Rd |
2942 * +--+-----+-------------+-----+----------------+------+
2944 * sf: 0 -> 32 bit, 1 -> 64 bit
2945 * opc: 00 -> N, 10 -> Z, 11 -> K
2946 * hw: shift/16 (0,16, and sf only 32, 48)
2948 static void disas_movw_imm(DisasContext *s, uint32_t insn)
2950 int rd = extract32(insn, 0, 5);
2951 uint64_t imm = extract32(insn, 5, 16);
2952 int sf = extract32(insn, 31, 1);
2953 int opc = extract32(insn, 29, 2);
2954 int pos = extract32(insn, 21, 2) << 4;
2955 TCGv_i64 tcg_rd = cpu_reg(s, rd);
2956 TCGv_i64 tcg_imm;
2958 if (!sf && (pos >= 32)) {
2959 unallocated_encoding(s);
2960 return;
2963 switch (opc) {
2964 case 0: /* MOVN */
2965 case 2: /* MOVZ */
2966 imm <<= pos;
2967 if (opc == 0) {
2968 imm = ~imm;
2970 if (!sf) {
2971 imm &= 0xffffffffu;
2973 tcg_gen_movi_i64(tcg_rd, imm);
2974 break;
2975 case 3: /* MOVK */
2976 tcg_imm = tcg_const_i64(imm);
2977 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
2978 tcg_temp_free_i64(tcg_imm);
2979 if (!sf) {
2980 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2982 break;
2983 default:
2984 unallocated_encoding(s);
2985 break;
2989 /* C3.4.2 Bitfield
2990 * 31 30 29 28 23 22 21 16 15 10 9 5 4 0
2991 * +----+-----+-------------+---+------+------+------+------+
2992 * | sf | opc | 1 0 0 1 1 0 | N | immr | imms | Rn | Rd |
2993 * +----+-----+-------------+---+------+------+------+------+
2995 static void disas_bitfield(DisasContext *s, uint32_t insn)
2997 unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
2998 TCGv_i64 tcg_rd, tcg_tmp;
3000 sf = extract32(insn, 31, 1);
3001 opc = extract32(insn, 29, 2);
3002 n = extract32(insn, 22, 1);
3003 ri = extract32(insn, 16, 6);
3004 si = extract32(insn, 10, 6);
3005 rn = extract32(insn, 5, 5);
3006 rd = extract32(insn, 0, 5);
3007 bitsize = sf ? 64 : 32;
3009 if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3010 unallocated_encoding(s);
3011 return;
3014 tcg_rd = cpu_reg(s, rd);
3015 tcg_tmp = read_cpu_reg(s, rn, sf);
3017 /* OPTME: probably worth recognizing common cases of ext{8,16,32}{u,s} */
3019 if (opc != 1) { /* SBFM or UBFM */
3020 tcg_gen_movi_i64(tcg_rd, 0);
3023 /* do the bit move operation */
3024 if (si >= ri) {
3025 /* Wd<s-r:0> = Wn<s:r> */
3026 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
3027 pos = 0;
3028 len = (si - ri) + 1;
3029 } else {
3030 /* Wd<32+s-r,32-r> = Wn<s:0> */
3031 pos = bitsize - ri;
3032 len = si + 1;
3035 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3037 if (opc == 0) { /* SBFM - sign extend the destination field */
3038 tcg_gen_shli_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3039 tcg_gen_sari_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3042 if (!sf) { /* zero extend final result */
3043 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3047 /* C3.4.3 Extract
3048 * 31 30 29 28 23 22 21 20 16 15 10 9 5 4 0
3049 * +----+------+-------------+---+----+------+--------+------+------+
3050 * | sf | op21 | 1 0 0 1 1 1 | N | o0 | Rm | imms | Rn | Rd |
3051 * +----+------+-------------+---+----+------+--------+------+------+
3053 static void disas_extract(DisasContext *s, uint32_t insn)
3055 unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3057 sf = extract32(insn, 31, 1);
3058 n = extract32(insn, 22, 1);
3059 rm = extract32(insn, 16, 5);
3060 imm = extract32(insn, 10, 6);
3061 rn = extract32(insn, 5, 5);
3062 rd = extract32(insn, 0, 5);
3063 op21 = extract32(insn, 29, 2);
3064 op0 = extract32(insn, 21, 1);
3065 bitsize = sf ? 64 : 32;
3067 if (sf != n || op21 || op0 || imm >= bitsize) {
3068 unallocated_encoding(s);
3069 } else {
3070 TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3072 tcg_rd = cpu_reg(s, rd);
3074 if (imm) {
3075 /* OPTME: we can special case rm==rn as a rotate */
3076 tcg_rm = read_cpu_reg(s, rm, sf);
3077 tcg_rn = read_cpu_reg(s, rn, sf);
3078 tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
3079 tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
3080 tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
3081 if (!sf) {
3082 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3084 } else {
3085 /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3086 * so an extract from bit 0 is a special case.
3088 if (sf) {
3089 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3090 } else {
3091 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3098 /* C3.4 Data processing - immediate */
3099 static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
3101 switch (extract32(insn, 23, 6)) {
3102 case 0x20: case 0x21: /* PC-rel. addressing */
3103 disas_pc_rel_adr(s, insn);
3104 break;
3105 case 0x22: case 0x23: /* Add/subtract (immediate) */
3106 disas_add_sub_imm(s, insn);
3107 break;
3108 case 0x24: /* Logical (immediate) */
3109 disas_logic_imm(s, insn);
3110 break;
3111 case 0x25: /* Move wide (immediate) */
3112 disas_movw_imm(s, insn);
3113 break;
3114 case 0x26: /* Bitfield */
3115 disas_bitfield(s, insn);
3116 break;
3117 case 0x27: /* Extract */
3118 disas_extract(s, insn);
3119 break;
3120 default:
3121 unallocated_encoding(s);
3122 break;
3126 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
3127 * Note that it is the caller's responsibility to ensure that the
3128 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
3129 * mandated semantics for out of range shifts.
3131 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
3132 enum a64_shift_type shift_type, TCGv_i64 shift_amount)
3134 switch (shift_type) {
3135 case A64_SHIFT_TYPE_LSL:
3136 tcg_gen_shl_i64(dst, src, shift_amount);
3137 break;
3138 case A64_SHIFT_TYPE_LSR:
3139 tcg_gen_shr_i64(dst, src, shift_amount);
3140 break;
3141 case A64_SHIFT_TYPE_ASR:
3142 if (!sf) {
3143 tcg_gen_ext32s_i64(dst, src);
3145 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
3146 break;
3147 case A64_SHIFT_TYPE_ROR:
3148 if (sf) {
3149 tcg_gen_rotr_i64(dst, src, shift_amount);
3150 } else {
3151 TCGv_i32 t0, t1;
3152 t0 = tcg_temp_new_i32();
3153 t1 = tcg_temp_new_i32();
3154 tcg_gen_extrl_i64_i32(t0, src);
3155 tcg_gen_extrl_i64_i32(t1, shift_amount);
3156 tcg_gen_rotr_i32(t0, t0, t1);
3157 tcg_gen_extu_i32_i64(dst, t0);
3158 tcg_temp_free_i32(t0);
3159 tcg_temp_free_i32(t1);
3161 break;
3162 default:
3163 assert(FALSE); /* all shift types should be handled */
3164 break;
3167 if (!sf) { /* zero extend final result */
3168 tcg_gen_ext32u_i64(dst, dst);
3172 /* Shift a TCGv src by immediate, put result in dst.
3173 * The shift amount must be in range (this should always be true as the
3174 * relevant instructions will UNDEF on bad shift immediates).
3176 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
3177 enum a64_shift_type shift_type, unsigned int shift_i)
3179 assert(shift_i < (sf ? 64 : 32));
3181 if (shift_i == 0) {
3182 tcg_gen_mov_i64(dst, src);
3183 } else {
3184 TCGv_i64 shift_const;
3186 shift_const = tcg_const_i64(shift_i);
3187 shift_reg(dst, src, sf, shift_type, shift_const);
3188 tcg_temp_free_i64(shift_const);
3192 /* C3.5.10 Logical (shifted register)
3193 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0
3194 * +----+-----+-----------+-------+---+------+--------+------+------+
3195 * | sf | opc | 0 1 0 1 0 | shift | N | Rm | imm6 | Rn | Rd |
3196 * +----+-----+-----------+-------+---+------+--------+------+------+
3198 static void disas_logic_reg(DisasContext *s, uint32_t insn)
3200 TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
3201 unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
3203 sf = extract32(insn, 31, 1);
3204 opc = extract32(insn, 29, 2);
3205 shift_type = extract32(insn, 22, 2);
3206 invert = extract32(insn, 21, 1);
3207 rm = extract32(insn, 16, 5);
3208 shift_amount = extract32(insn, 10, 6);
3209 rn = extract32(insn, 5, 5);
3210 rd = extract32(insn, 0, 5);
3212 if (!sf && (shift_amount & (1 << 5))) {
3213 unallocated_encoding(s);
3214 return;
3217 tcg_rd = cpu_reg(s, rd);
3219 if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
3220 /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
3221 * register-register MOV and MVN, so it is worth special casing.
3223 tcg_rm = cpu_reg(s, rm);
3224 if (invert) {
3225 tcg_gen_not_i64(tcg_rd, tcg_rm);
3226 if (!sf) {
3227 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3229 } else {
3230 if (sf) {
3231 tcg_gen_mov_i64(tcg_rd, tcg_rm);
3232 } else {
3233 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
3236 return;
3239 tcg_rm = read_cpu_reg(s, rm, sf);
3241 if (shift_amount) {
3242 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
3245 tcg_rn = cpu_reg(s, rn);
3247 switch (opc | (invert << 2)) {
3248 case 0: /* AND */
3249 case 3: /* ANDS */
3250 tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
3251 break;
3252 case 1: /* ORR */
3253 tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
3254 break;
3255 case 2: /* EOR */
3256 tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
3257 break;
3258 case 4: /* BIC */
3259 case 7: /* BICS */
3260 tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
3261 break;
3262 case 5: /* ORN */
3263 tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
3264 break;
3265 case 6: /* EON */
3266 tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
3267 break;
3268 default:
3269 assert(FALSE);
3270 break;
3273 if (!sf) {
3274 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3277 if (opc == 3) {
3278 gen_logic_CC(sf, tcg_rd);
3283 * C3.5.1 Add/subtract (extended register)
3285 * 31|30|29|28 24|23 22|21|20 16|15 13|12 10|9 5|4 0|
3286 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3287 * |sf|op| S| 0 1 0 1 1 | opt | 1| Rm |option| imm3 | Rn | Rd |
3288 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3290 * sf: 0 -> 32bit, 1 -> 64bit
3291 * op: 0 -> add , 1 -> sub
3292 * S: 1 -> set flags
3293 * opt: 00
3294 * option: extension type (see DecodeRegExtend)
3295 * imm3: optional shift to Rm
3297 * Rd = Rn + LSL(extend(Rm), amount)
3299 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3301 int rd = extract32(insn, 0, 5);
3302 int rn = extract32(insn, 5, 5);
3303 int imm3 = extract32(insn, 10, 3);
3304 int option = extract32(insn, 13, 3);
3305 int rm = extract32(insn, 16, 5);
3306 bool setflags = extract32(insn, 29, 1);
3307 bool sub_op = extract32(insn, 30, 1);
3308 bool sf = extract32(insn, 31, 1);
3310 TCGv_i64 tcg_rm, tcg_rn; /* temps */
3311 TCGv_i64 tcg_rd;
3312 TCGv_i64 tcg_result;
3314 if (imm3 > 4) {
3315 unallocated_encoding(s);
3316 return;
3319 /* non-flag setting ops may use SP */
3320 if (!setflags) {
3321 tcg_rd = cpu_reg_sp(s, rd);
3322 } else {
3323 tcg_rd = cpu_reg(s, rd);
3325 tcg_rn = read_cpu_reg_sp(s, rn, sf);
3327 tcg_rm = read_cpu_reg(s, rm, sf);
3328 ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
3330 tcg_result = tcg_temp_new_i64();
3332 if (!setflags) {
3333 if (sub_op) {
3334 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3335 } else {
3336 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3338 } else {
3339 if (sub_op) {
3340 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3341 } else {
3342 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3346 if (sf) {
3347 tcg_gen_mov_i64(tcg_rd, tcg_result);
3348 } else {
3349 tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3352 tcg_temp_free_i64(tcg_result);
3356 * C3.5.2 Add/subtract (shifted register)
3358 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0
3359 * +--+--+--+-----------+-----+--+-------+---------+------+------+
3360 * |sf|op| S| 0 1 0 1 1 |shift| 0| Rm | imm6 | Rn | Rd |
3361 * +--+--+--+-----------+-----+--+-------+---------+------+------+
3363 * sf: 0 -> 32bit, 1 -> 64bit
3364 * op: 0 -> add , 1 -> sub
3365 * S: 1 -> set flags
3366 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3367 * imm6: Shift amount to apply to Rm before the add/sub
3369 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3371 int rd = extract32(insn, 0, 5);
3372 int rn = extract32(insn, 5, 5);
3373 int imm6 = extract32(insn, 10, 6);
3374 int rm = extract32(insn, 16, 5);
3375 int shift_type = extract32(insn, 22, 2);
3376 bool setflags = extract32(insn, 29, 1);
3377 bool sub_op = extract32(insn, 30, 1);
3378 bool sf = extract32(insn, 31, 1);
3380 TCGv_i64 tcg_rd = cpu_reg(s, rd);
3381 TCGv_i64 tcg_rn, tcg_rm;
3382 TCGv_i64 tcg_result;
3384 if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3385 unallocated_encoding(s);
3386 return;
3389 tcg_rn = read_cpu_reg(s, rn, sf);
3390 tcg_rm = read_cpu_reg(s, rm, sf);
3392 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
3394 tcg_result = tcg_temp_new_i64();
3396 if (!setflags) {
3397 if (sub_op) {
3398 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3399 } else {
3400 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3402 } else {
3403 if (sub_op) {
3404 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3405 } else {
3406 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3410 if (sf) {
3411 tcg_gen_mov_i64(tcg_rd, tcg_result);
3412 } else {
3413 tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3416 tcg_temp_free_i64(tcg_result);
3419 /* C3.5.9 Data-processing (3 source)
3421 31 30 29 28 24 23 21 20 16 15 14 10 9 5 4 0
3422 +--+------+-----------+------+------+----+------+------+------+
3423 |sf| op54 | 1 1 0 1 1 | op31 | Rm | o0 | Ra | Rn | Rd |
3424 +--+------+-----------+------+------+----+------+------+------+
3427 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3429 int rd = extract32(insn, 0, 5);
3430 int rn = extract32(insn, 5, 5);
3431 int ra = extract32(insn, 10, 5);
3432 int rm = extract32(insn, 16, 5);
3433 int op_id = (extract32(insn, 29, 3) << 4) |
3434 (extract32(insn, 21, 3) << 1) |
3435 extract32(insn, 15, 1);
3436 bool sf = extract32(insn, 31, 1);
3437 bool is_sub = extract32(op_id, 0, 1);
3438 bool is_high = extract32(op_id, 2, 1);
3439 bool is_signed = false;
3440 TCGv_i64 tcg_op1;
3441 TCGv_i64 tcg_op2;
3442 TCGv_i64 tcg_tmp;
3444 /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
3445 switch (op_id) {
3446 case 0x42: /* SMADDL */
3447 case 0x43: /* SMSUBL */
3448 case 0x44: /* SMULH */
3449 is_signed = true;
3450 break;
3451 case 0x0: /* MADD (32bit) */
3452 case 0x1: /* MSUB (32bit) */
3453 case 0x40: /* MADD (64bit) */
3454 case 0x41: /* MSUB (64bit) */
3455 case 0x4a: /* UMADDL */
3456 case 0x4b: /* UMSUBL */
3457 case 0x4c: /* UMULH */
3458 break;
3459 default:
3460 unallocated_encoding(s);
3461 return;
3464 if (is_high) {
3465 TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
3466 TCGv_i64 tcg_rd = cpu_reg(s, rd);
3467 TCGv_i64 tcg_rn = cpu_reg(s, rn);
3468 TCGv_i64 tcg_rm = cpu_reg(s, rm);
3470 if (is_signed) {
3471 tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3472 } else {
3473 tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3476 tcg_temp_free_i64(low_bits);
3477 return;
3480 tcg_op1 = tcg_temp_new_i64();
3481 tcg_op2 = tcg_temp_new_i64();
3482 tcg_tmp = tcg_temp_new_i64();
3484 if (op_id < 0x42) {
3485 tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
3486 tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
3487 } else {
3488 if (is_signed) {
3489 tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
3490 tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
3491 } else {
3492 tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
3493 tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
3497 if (ra == 31 && !is_sub) {
3498 /* Special-case MADD with rA == XZR; it is the standard MUL alias */
3499 tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
3500 } else {
3501 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
3502 if (is_sub) {
3503 tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3504 } else {
3505 tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3509 if (!sf) {
3510 tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
3513 tcg_temp_free_i64(tcg_op1);
3514 tcg_temp_free_i64(tcg_op2);
3515 tcg_temp_free_i64(tcg_tmp);
3518 /* C3.5.3 - Add/subtract (with carry)
3519 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 10 9 5 4 0
3520 * +--+--+--+------------------------+------+---------+------+-----+
3521 * |sf|op| S| 1 1 0 1 0 0 0 0 | rm | opcode2 | Rn | Rd |
3522 * +--+--+--+------------------------+------+---------+------+-----+
3523 * [000000]
3526 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
3528 unsigned int sf, op, setflags, rm, rn, rd;
3529 TCGv_i64 tcg_y, tcg_rn, tcg_rd;
3531 if (extract32(insn, 10, 6) != 0) {
3532 unallocated_encoding(s);
3533 return;
3536 sf = extract32(insn, 31, 1);
3537 op = extract32(insn, 30, 1);
3538 setflags = extract32(insn, 29, 1);
3539 rm = extract32(insn, 16, 5);
3540 rn = extract32(insn, 5, 5);
3541 rd = extract32(insn, 0, 5);
3543 tcg_rd = cpu_reg(s, rd);
3544 tcg_rn = cpu_reg(s, rn);
3546 if (op) {
3547 tcg_y = new_tmp_a64(s);
3548 tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
3549 } else {
3550 tcg_y = cpu_reg(s, rm);
3553 if (setflags) {
3554 gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
3555 } else {
3556 gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
3560 /* C3.5.4 - C3.5.5 Conditional compare (immediate / register)
3561 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0
3562 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3563 * |sf|op| S| 1 1 0 1 0 0 1 0 |imm5/rm | cond |i/r |o2| Rn |o3|nzcv |
3564 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3565 * [1] y [0] [0]
3567 static void disas_cc(DisasContext *s, uint32_t insn)
3569 unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3570 TCGLabel *label_continue = NULL;
3571 TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
3573 if (!extract32(insn, 29, 1)) {
3574 unallocated_encoding(s);
3575 return;
3577 if (insn & (1 << 10 | 1 << 4)) {
3578 unallocated_encoding(s);
3579 return;
3581 sf = extract32(insn, 31, 1);
3582 op = extract32(insn, 30, 1);
3583 is_imm = extract32(insn, 11, 1);
3584 y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3585 cond = extract32(insn, 12, 4);
3586 rn = extract32(insn, 5, 5);
3587 nzcv = extract32(insn, 0, 4);
3589 if (cond < 0x0e) { /* not always */
3590 TCGLabel *label_match = gen_new_label();
3591 label_continue = gen_new_label();
3592 arm_gen_test_cc(cond, label_match);
3593 /* nomatch: */
3594 tcg_tmp = tcg_temp_new_i64();
3595 tcg_gen_movi_i64(tcg_tmp, nzcv << 28);
3596 gen_set_nzcv(tcg_tmp);
3597 tcg_temp_free_i64(tcg_tmp);
3598 tcg_gen_br(label_continue);
3599 gen_set_label(label_match);
3601 /* match, or condition is always */
3602 if (is_imm) {
3603 tcg_y = new_tmp_a64(s);
3604 tcg_gen_movi_i64(tcg_y, y);
3605 } else {
3606 tcg_y = cpu_reg(s, y);
3608 tcg_rn = cpu_reg(s, rn);
3610 tcg_tmp = tcg_temp_new_i64();
3611 if (op) {
3612 gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3613 } else {
3614 gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3616 tcg_temp_free_i64(tcg_tmp);
3618 if (cond < 0x0e) { /* continue */
3619 gen_set_label(label_continue);
3623 /* C3.5.6 Conditional select
3624 * 31 30 29 28 21 20 16 15 12 11 10 9 5 4 0
3625 * +----+----+---+-----------------+------+------+-----+------+------+
3626 * | sf | op | S | 1 1 0 1 0 1 0 0 | Rm | cond | op2 | Rn | Rd |
3627 * +----+----+---+-----------------+------+------+-----+------+------+
3629 static void disas_cond_select(DisasContext *s, uint32_t insn)
3631 unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
3632 TCGv_i64 tcg_rd, tcg_src;
3634 if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
3635 /* S == 1 or op2<1> == 1 */
3636 unallocated_encoding(s);
3637 return;
3639 sf = extract32(insn, 31, 1);
3640 else_inv = extract32(insn, 30, 1);
3641 rm = extract32(insn, 16, 5);
3642 cond = extract32(insn, 12, 4);
3643 else_inc = extract32(insn, 10, 1);
3644 rn = extract32(insn, 5, 5);
3645 rd = extract32(insn, 0, 5);
3647 if (rd == 31) {
3648 /* silly no-op write; until we use movcond we must special-case
3649 * this to avoid a dead temporary across basic blocks.
3651 return;
3654 tcg_rd = cpu_reg(s, rd);
3656 if (cond >= 0x0e) { /* condition "always" */
3657 tcg_src = read_cpu_reg(s, rn, sf);
3658 tcg_gen_mov_i64(tcg_rd, tcg_src);
3659 } else {
3660 /* OPTME: we could use movcond here, at the cost of duplicating
3661 * a lot of the arm_gen_test_cc() logic.
3663 TCGLabel *label_match = gen_new_label();
3664 TCGLabel *label_continue = gen_new_label();
3666 arm_gen_test_cc(cond, label_match);
3667 /* nomatch: */
3668 tcg_src = cpu_reg(s, rm);
3670 if (else_inv && else_inc) {
3671 tcg_gen_neg_i64(tcg_rd, tcg_src);
3672 } else if (else_inv) {
3673 tcg_gen_not_i64(tcg_rd, tcg_src);
3674 } else if (else_inc) {
3675 tcg_gen_addi_i64(tcg_rd, tcg_src, 1);
3676 } else {
3677 tcg_gen_mov_i64(tcg_rd, tcg_src);
3679 if (!sf) {
3680 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3682 tcg_gen_br(label_continue);
3683 /* match: */
3684 gen_set_label(label_match);
3685 tcg_src = read_cpu_reg(s, rn, sf);
3686 tcg_gen_mov_i64(tcg_rd, tcg_src);
3687 /* continue: */
3688 gen_set_label(label_continue);
3692 static void handle_clz(DisasContext *s, unsigned int sf,
3693 unsigned int rn, unsigned int rd)
3695 TCGv_i64 tcg_rd, tcg_rn;
3696 tcg_rd = cpu_reg(s, rd);
3697 tcg_rn = cpu_reg(s, rn);
3699 if (sf) {
3700 gen_helper_clz64(tcg_rd, tcg_rn);
3701 } else {
3702 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3703 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3704 gen_helper_clz(tcg_tmp32, tcg_tmp32);
3705 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3706 tcg_temp_free_i32(tcg_tmp32);
3710 static void handle_cls(DisasContext *s, unsigned int sf,
3711 unsigned int rn, unsigned int rd)
3713 TCGv_i64 tcg_rd, tcg_rn;
3714 tcg_rd = cpu_reg(s, rd);
3715 tcg_rn = cpu_reg(s, rn);
3717 if (sf) {
3718 gen_helper_cls64(tcg_rd, tcg_rn);
3719 } else {
3720 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3721 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3722 gen_helper_cls32(tcg_tmp32, tcg_tmp32);
3723 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3724 tcg_temp_free_i32(tcg_tmp32);
3728 static void handle_rbit(DisasContext *s, unsigned int sf,
3729 unsigned int rn, unsigned int rd)
3731 TCGv_i64 tcg_rd, tcg_rn;
3732 tcg_rd = cpu_reg(s, rd);
3733 tcg_rn = cpu_reg(s, rn);
3735 if (sf) {
3736 gen_helper_rbit64(tcg_rd, tcg_rn);
3737 } else {
3738 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3739 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3740 gen_helper_rbit(tcg_tmp32, tcg_tmp32);
3741 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3742 tcg_temp_free_i32(tcg_tmp32);
3746 /* C5.6.149 REV with sf==1, opcode==3 ("REV64") */
3747 static void handle_rev64(DisasContext *s, unsigned int sf,
3748 unsigned int rn, unsigned int rd)
3750 if (!sf) {
3751 unallocated_encoding(s);
3752 return;
3754 tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
3757 /* C5.6.149 REV with sf==0, opcode==2
3758 * C5.6.151 REV32 (sf==1, opcode==2)
3760 static void handle_rev32(DisasContext *s, unsigned int sf,
3761 unsigned int rn, unsigned int rd)
3763 TCGv_i64 tcg_rd = cpu_reg(s, rd);
3765 if (sf) {
3766 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3767 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3769 /* bswap32_i64 requires zero high word */
3770 tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
3771 tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
3772 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3773 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
3774 tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
3776 tcg_temp_free_i64(tcg_tmp);
3777 } else {
3778 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
3779 tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
3783 /* C5.6.150 REV16 (opcode==1) */
3784 static void handle_rev16(DisasContext *s, unsigned int sf,
3785 unsigned int rn, unsigned int rd)
3787 TCGv_i64 tcg_rd = cpu_reg(s, rd);
3788 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3789 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3791 tcg_gen_andi_i64(tcg_tmp, tcg_rn, 0xffff);
3792 tcg_gen_bswap16_i64(tcg_rd, tcg_tmp);
3794 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 16);
3795 tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3796 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3797 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 16, 16);
3799 if (sf) {
3800 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3801 tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3802 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3803 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 32, 16);
3805 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 48);
3806 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3807 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 48, 16);
3810 tcg_temp_free_i64(tcg_tmp);
3813 /* C3.5.7 Data-processing (1 source)
3814 * 31 30 29 28 21 20 16 15 10 9 5 4 0
3815 * +----+---+---+-----------------+---------+--------+------+------+
3816 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode | Rn | Rd |
3817 * +----+---+---+-----------------+---------+--------+------+------+
3819 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
3821 unsigned int sf, opcode, rn, rd;
3823 if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
3824 unallocated_encoding(s);
3825 return;
3828 sf = extract32(insn, 31, 1);
3829 opcode = extract32(insn, 10, 6);
3830 rn = extract32(insn, 5, 5);
3831 rd = extract32(insn, 0, 5);
3833 switch (opcode) {
3834 case 0: /* RBIT */
3835 handle_rbit(s, sf, rn, rd);
3836 break;
3837 case 1: /* REV16 */
3838 handle_rev16(s, sf, rn, rd);
3839 break;
3840 case 2: /* REV32 */
3841 handle_rev32(s, sf, rn, rd);
3842 break;
3843 case 3: /* REV64 */
3844 handle_rev64(s, sf, rn, rd);
3845 break;
3846 case 4: /* CLZ */
3847 handle_clz(s, sf, rn, rd);
3848 break;
3849 case 5: /* CLS */
3850 handle_cls(s, sf, rn, rd);
3851 break;
3855 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
3856 unsigned int rm, unsigned int rn, unsigned int rd)
3858 TCGv_i64 tcg_n, tcg_m, tcg_rd;
3859 tcg_rd = cpu_reg(s, rd);
3861 if (!sf && is_signed) {
3862 tcg_n = new_tmp_a64(s);
3863 tcg_m = new_tmp_a64(s);
3864 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
3865 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
3866 } else {
3867 tcg_n = read_cpu_reg(s, rn, sf);
3868 tcg_m = read_cpu_reg(s, rm, sf);
3871 if (is_signed) {
3872 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
3873 } else {
3874 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
3877 if (!sf) { /* zero extend final result */
3878 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3882 /* C5.6.115 LSLV, C5.6.118 LSRV, C5.6.17 ASRV, C5.6.154 RORV */
3883 static void handle_shift_reg(DisasContext *s,
3884 enum a64_shift_type shift_type, unsigned int sf,
3885 unsigned int rm, unsigned int rn, unsigned int rd)
3887 TCGv_i64 tcg_shift = tcg_temp_new_i64();
3888 TCGv_i64 tcg_rd = cpu_reg(s, rd);
3889 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3891 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
3892 shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
3893 tcg_temp_free_i64(tcg_shift);
3896 /* CRC32[BHWX], CRC32C[BHWX] */
3897 static void handle_crc32(DisasContext *s,
3898 unsigned int sf, unsigned int sz, bool crc32c,
3899 unsigned int rm, unsigned int rn, unsigned int rd)
3901 TCGv_i64 tcg_acc, tcg_val;
3902 TCGv_i32 tcg_bytes;
3904 if (!arm_dc_feature(s, ARM_FEATURE_CRC)
3905 || (sf == 1 && sz != 3)
3906 || (sf == 0 && sz == 3)) {
3907 unallocated_encoding(s);
3908 return;
3911 if (sz == 3) {
3912 tcg_val = cpu_reg(s, rm);
3913 } else {
3914 uint64_t mask;
3915 switch (sz) {
3916 case 0:
3917 mask = 0xFF;
3918 break;
3919 case 1:
3920 mask = 0xFFFF;
3921 break;
3922 case 2:
3923 mask = 0xFFFFFFFF;
3924 break;
3925 default:
3926 g_assert_not_reached();
3928 tcg_val = new_tmp_a64(s);
3929 tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
3932 tcg_acc = cpu_reg(s, rn);
3933 tcg_bytes = tcg_const_i32(1 << sz);
3935 if (crc32c) {
3936 gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
3937 } else {
3938 gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
3941 tcg_temp_free_i32(tcg_bytes);
3944 /* C3.5.8 Data-processing (2 source)
3945 * 31 30 29 28 21 20 16 15 10 9 5 4 0
3946 * +----+---+---+-----------------+------+--------+------+------+
3947 * | sf | 0 | S | 1 1 0 1 0 1 1 0 | Rm | opcode | Rn | Rd |
3948 * +----+---+---+-----------------+------+--------+------+------+
3950 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
3952 unsigned int sf, rm, opcode, rn, rd;
3953 sf = extract32(insn, 31, 1);
3954 rm = extract32(insn, 16, 5);
3955 opcode = extract32(insn, 10, 6);
3956 rn = extract32(insn, 5, 5);
3957 rd = extract32(insn, 0, 5);
3959 if (extract32(insn, 29, 1)) {
3960 unallocated_encoding(s);
3961 return;
3964 switch (opcode) {
3965 case 2: /* UDIV */
3966 handle_div(s, false, sf, rm, rn, rd);
3967 break;
3968 case 3: /* SDIV */
3969 handle_div(s, true, sf, rm, rn, rd);
3970 break;
3971 case 8: /* LSLV */
3972 handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
3973 break;
3974 case 9: /* LSRV */
3975 handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
3976 break;
3977 case 10: /* ASRV */
3978 handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
3979 break;
3980 case 11: /* RORV */
3981 handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
3982 break;
3983 case 16:
3984 case 17:
3985 case 18:
3986 case 19:
3987 case 20:
3988 case 21:
3989 case 22:
3990 case 23: /* CRC32 */
3992 int sz = extract32(opcode, 0, 2);
3993 bool crc32c = extract32(opcode, 2, 1);
3994 handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
3995 break;
3997 default:
3998 unallocated_encoding(s);
3999 break;
4003 /* C3.5 Data processing - register */
4004 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
4006 switch (extract32(insn, 24, 5)) {
4007 case 0x0a: /* Logical (shifted register) */
4008 disas_logic_reg(s, insn);
4009 break;
4010 case 0x0b: /* Add/subtract */
4011 if (insn & (1 << 21)) { /* (extended register) */
4012 disas_add_sub_ext_reg(s, insn);
4013 } else {
4014 disas_add_sub_reg(s, insn);
4016 break;
4017 case 0x1b: /* Data-processing (3 source) */
4018 disas_data_proc_3src(s, insn);
4019 break;
4020 case 0x1a:
4021 switch (extract32(insn, 21, 3)) {
4022 case 0x0: /* Add/subtract (with carry) */
4023 disas_adc_sbc(s, insn);
4024 break;
4025 case 0x2: /* Conditional compare */
4026 disas_cc(s, insn); /* both imm and reg forms */
4027 break;
4028 case 0x4: /* Conditional select */
4029 disas_cond_select(s, insn);
4030 break;
4031 case 0x6: /* Data-processing */
4032 if (insn & (1 << 30)) { /* (1 source) */
4033 disas_data_proc_1src(s, insn);
4034 } else { /* (2 source) */
4035 disas_data_proc_2src(s, insn);
4037 break;
4038 default:
4039 unallocated_encoding(s);
4040 break;
4042 break;
4043 default:
4044 unallocated_encoding(s);
4045 break;
4049 static void handle_fp_compare(DisasContext *s, bool is_double,
4050 unsigned int rn, unsigned int rm,
4051 bool cmp_with_zero, bool signal_all_nans)
4053 TCGv_i64 tcg_flags = tcg_temp_new_i64();
4054 TCGv_ptr fpst = get_fpstatus_ptr();
4056 if (is_double) {
4057 TCGv_i64 tcg_vn, tcg_vm;
4059 tcg_vn = read_fp_dreg(s, rn);
4060 if (cmp_with_zero) {
4061 tcg_vm = tcg_const_i64(0);
4062 } else {
4063 tcg_vm = read_fp_dreg(s, rm);
4065 if (signal_all_nans) {
4066 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4067 } else {
4068 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4070 tcg_temp_free_i64(tcg_vn);
4071 tcg_temp_free_i64(tcg_vm);
4072 } else {
4073 TCGv_i32 tcg_vn, tcg_vm;
4075 tcg_vn = read_fp_sreg(s, rn);
4076 if (cmp_with_zero) {
4077 tcg_vm = tcg_const_i32(0);
4078 } else {
4079 tcg_vm = read_fp_sreg(s, rm);
4081 if (signal_all_nans) {
4082 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4083 } else {
4084 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4086 tcg_temp_free_i32(tcg_vn);
4087 tcg_temp_free_i32(tcg_vm);
4090 tcg_temp_free_ptr(fpst);
4092 gen_set_nzcv(tcg_flags);
4094 tcg_temp_free_i64(tcg_flags);
4097 /* C3.6.22 Floating point compare
4098 * 31 30 29 28 24 23 22 21 20 16 15 14 13 10 9 5 4 0
4099 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4100 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | op | 1 0 0 0 | Rn | op2 |
4101 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4103 static void disas_fp_compare(DisasContext *s, uint32_t insn)
4105 unsigned int mos, type, rm, op, rn, opc, op2r;
4107 mos = extract32(insn, 29, 3);
4108 type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4109 rm = extract32(insn, 16, 5);
4110 op = extract32(insn, 14, 2);
4111 rn = extract32(insn, 5, 5);
4112 opc = extract32(insn, 3, 2);
4113 op2r = extract32(insn, 0, 3);
4115 if (mos || op || op2r || type > 1) {
4116 unallocated_encoding(s);
4117 return;
4120 if (!fp_access_check(s)) {
4121 return;
4124 handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
4127 /* C3.6.23 Floating point conditional compare
4128 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0
4129 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4130 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 0 1 | Rn | op | nzcv |
4131 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4133 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
4135 unsigned int mos, type, rm, cond, rn, op, nzcv;
4136 TCGv_i64 tcg_flags;
4137 TCGLabel *label_continue = NULL;
4139 mos = extract32(insn, 29, 3);
4140 type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4141 rm = extract32(insn, 16, 5);
4142 cond = extract32(insn, 12, 4);
4143 rn = extract32(insn, 5, 5);
4144 op = extract32(insn, 4, 1);
4145 nzcv = extract32(insn, 0, 4);
4147 if (mos || type > 1) {
4148 unallocated_encoding(s);
4149 return;
4152 if (!fp_access_check(s)) {
4153 return;
4156 if (cond < 0x0e) { /* not always */
4157 TCGLabel *label_match = gen_new_label();
4158 label_continue = gen_new_label();
4159 arm_gen_test_cc(cond, label_match);
4160 /* nomatch: */
4161 tcg_flags = tcg_const_i64(nzcv << 28);
4162 gen_set_nzcv(tcg_flags);
4163 tcg_temp_free_i64(tcg_flags);
4164 tcg_gen_br(label_continue);
4165 gen_set_label(label_match);
4168 handle_fp_compare(s, type, rn, rm, false, op);
4170 if (cond < 0x0e) {
4171 gen_set_label(label_continue);
4175 /* copy src FP register to dst FP register; type specifies single or double */
4176 static void gen_mov_fp2fp(DisasContext *s, int type, int dst, int src)
4178 if (type) {
4179 TCGv_i64 v = read_fp_dreg(s, src);
4180 write_fp_dreg(s, dst, v);
4181 tcg_temp_free_i64(v);
4182 } else {
4183 TCGv_i32 v = read_fp_sreg(s, src);
4184 write_fp_sreg(s, dst, v);
4185 tcg_temp_free_i32(v);
4189 /* C3.6.24 Floating point conditional select
4190 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
4191 * +---+---+---+-----------+------+---+------+------+-----+------+------+
4192 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 1 1 | Rn | Rd |
4193 * +---+---+---+-----------+------+---+------+------+-----+------+------+
4195 static void disas_fp_csel(DisasContext *s, uint32_t insn)
4197 unsigned int mos, type, rm, cond, rn, rd;
4198 TCGLabel *label_continue = NULL;
4200 mos = extract32(insn, 29, 3);
4201 type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4202 rm = extract32(insn, 16, 5);
4203 cond = extract32(insn, 12, 4);
4204 rn = extract32(insn, 5, 5);
4205 rd = extract32(insn, 0, 5);
4207 if (mos || type > 1) {
4208 unallocated_encoding(s);
4209 return;
4212 if (!fp_access_check(s)) {
4213 return;
4216 if (cond < 0x0e) { /* not always */
4217 TCGLabel *label_match = gen_new_label();
4218 label_continue = gen_new_label();
4219 arm_gen_test_cc(cond, label_match);
4220 /* nomatch: */
4221 gen_mov_fp2fp(s, type, rd, rm);
4222 tcg_gen_br(label_continue);
4223 gen_set_label(label_match);
4226 gen_mov_fp2fp(s, type, rd, rn);
4228 if (cond < 0x0e) { /* continue */
4229 gen_set_label(label_continue);
4233 /* C3.6.25 Floating-point data-processing (1 source) - single precision */
4234 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
4236 TCGv_ptr fpst;
4237 TCGv_i32 tcg_op;
4238 TCGv_i32 tcg_res;
4240 fpst = get_fpstatus_ptr();
4241 tcg_op = read_fp_sreg(s, rn);
4242 tcg_res = tcg_temp_new_i32();
4244 switch (opcode) {
4245 case 0x0: /* FMOV */
4246 tcg_gen_mov_i32(tcg_res, tcg_op);
4247 break;
4248 case 0x1: /* FABS */
4249 gen_helper_vfp_abss(tcg_res, tcg_op);
4250 break;
4251 case 0x2: /* FNEG */
4252 gen_helper_vfp_negs(tcg_res, tcg_op);
4253 break;
4254 case 0x3: /* FSQRT */
4255 gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
4256 break;
4257 case 0x8: /* FRINTN */
4258 case 0x9: /* FRINTP */
4259 case 0xa: /* FRINTM */
4260 case 0xb: /* FRINTZ */
4261 case 0xc: /* FRINTA */
4263 TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4265 gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4266 gen_helper_rints(tcg_res, tcg_op, fpst);
4268 gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4269 tcg_temp_free_i32(tcg_rmode);
4270 break;
4272 case 0xe: /* FRINTX */
4273 gen_helper_rints_exact(tcg_res, tcg_op, fpst);
4274 break;
4275 case 0xf: /* FRINTI */
4276 gen_helper_rints(tcg_res, tcg_op, fpst);
4277 break;
4278 default:
4279 abort();
4282 write_fp_sreg(s, rd, tcg_res);
4284 tcg_temp_free_ptr(fpst);
4285 tcg_temp_free_i32(tcg_op);
4286 tcg_temp_free_i32(tcg_res);
4289 /* C3.6.25 Floating-point data-processing (1 source) - double precision */
4290 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
4292 TCGv_ptr fpst;
4293 TCGv_i64 tcg_op;
4294 TCGv_i64 tcg_res;
4296 fpst = get_fpstatus_ptr();
4297 tcg_op = read_fp_dreg(s, rn);
4298 tcg_res = tcg_temp_new_i64();
4300 switch (opcode) {
4301 case 0x0: /* FMOV */
4302 tcg_gen_mov_i64(tcg_res, tcg_op);
4303 break;
4304 case 0x1: /* FABS */
4305 gen_helper_vfp_absd(tcg_res, tcg_op);
4306 break;
4307 case 0x2: /* FNEG */
4308 gen_helper_vfp_negd(tcg_res, tcg_op);
4309 break;
4310 case 0x3: /* FSQRT */
4311 gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
4312 break;
4313 case 0x8: /* FRINTN */
4314 case 0x9: /* FRINTP */
4315 case 0xa: /* FRINTM */
4316 case 0xb: /* FRINTZ */
4317 case 0xc: /* FRINTA */
4319 TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4321 gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4322 gen_helper_rintd(tcg_res, tcg_op, fpst);
4324 gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4325 tcg_temp_free_i32(tcg_rmode);
4326 break;
4328 case 0xe: /* FRINTX */
4329 gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
4330 break;
4331 case 0xf: /* FRINTI */
4332 gen_helper_rintd(tcg_res, tcg_op, fpst);
4333 break;
4334 default:
4335 abort();
4338 write_fp_dreg(s, rd, tcg_res);
4340 tcg_temp_free_ptr(fpst);
4341 tcg_temp_free_i64(tcg_op);
4342 tcg_temp_free_i64(tcg_res);
4345 static void handle_fp_fcvt(DisasContext *s, int opcode,
4346 int rd, int rn, int dtype, int ntype)
4348 switch (ntype) {
4349 case 0x0:
4351 TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4352 if (dtype == 1) {
4353 /* Single to double */
4354 TCGv_i64 tcg_rd = tcg_temp_new_i64();
4355 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
4356 write_fp_dreg(s, rd, tcg_rd);
4357 tcg_temp_free_i64(tcg_rd);
4358 } else {
4359 /* Single to half */
4360 TCGv_i32 tcg_rd = tcg_temp_new_i32();
4361 gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
4362 /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4363 write_fp_sreg(s, rd, tcg_rd);
4364 tcg_temp_free_i32(tcg_rd);
4366 tcg_temp_free_i32(tcg_rn);
4367 break;
4369 case 0x1:
4371 TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
4372 TCGv_i32 tcg_rd = tcg_temp_new_i32();
4373 if (dtype == 0) {
4374 /* Double to single */
4375 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
4376 } else {
4377 /* Double to half */
4378 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
4379 /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4381 write_fp_sreg(s, rd, tcg_rd);
4382 tcg_temp_free_i32(tcg_rd);
4383 tcg_temp_free_i64(tcg_rn);
4384 break;
4386 case 0x3:
4388 TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4389 tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
4390 if (dtype == 0) {
4391 /* Half to single */
4392 TCGv_i32 tcg_rd = tcg_temp_new_i32();
4393 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
4394 write_fp_sreg(s, rd, tcg_rd);
4395 tcg_temp_free_i32(tcg_rd);
4396 } else {
4397 /* Half to double */
4398 TCGv_i64 tcg_rd = tcg_temp_new_i64();
4399 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
4400 write_fp_dreg(s, rd, tcg_rd);
4401 tcg_temp_free_i64(tcg_rd);
4403 tcg_temp_free_i32(tcg_rn);
4404 break;
4406 default:
4407 abort();
4411 /* C3.6.25 Floating point data-processing (1 source)
4412 * 31 30 29 28 24 23 22 21 20 15 14 10 9 5 4 0
4413 * +---+---+---+-----------+------+---+--------+-----------+------+------+
4414 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 | Rn | Rd |
4415 * +---+---+---+-----------+------+---+--------+-----------+------+------+
4417 static void disas_fp_1src(DisasContext *s, uint32_t insn)
4419 int type = extract32(insn, 22, 2);
4420 int opcode = extract32(insn, 15, 6);
4421 int rn = extract32(insn, 5, 5);
4422 int rd = extract32(insn, 0, 5);
4424 switch (opcode) {
4425 case 0x4: case 0x5: case 0x7:
4427 /* FCVT between half, single and double precision */
4428 int dtype = extract32(opcode, 0, 2);
4429 if (type == 2 || dtype == type) {
4430 unallocated_encoding(s);
4431 return;
4433 if (!fp_access_check(s)) {
4434 return;
4437 handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
4438 break;
4440 case 0x0 ... 0x3:
4441 case 0x8 ... 0xc:
4442 case 0xe ... 0xf:
4443 /* 32-to-32 and 64-to-64 ops */
4444 switch (type) {
4445 case 0:
4446 if (!fp_access_check(s)) {
4447 return;
4450 handle_fp_1src_single(s, opcode, rd, rn);
4451 break;
4452 case 1:
4453 if (!fp_access_check(s)) {
4454 return;
4457 handle_fp_1src_double(s, opcode, rd, rn);
4458 break;
4459 default:
4460 unallocated_encoding(s);
4462 break;
4463 default:
4464 unallocated_encoding(s);
4465 break;
4469 /* C3.6.26 Floating-point data-processing (2 source) - single precision */
4470 static void handle_fp_2src_single(DisasContext *s, int opcode,
4471 int rd, int rn, int rm)
4473 TCGv_i32 tcg_op1;
4474 TCGv_i32 tcg_op2;
4475 TCGv_i32 tcg_res;
4476 TCGv_ptr fpst;
4478 tcg_res = tcg_temp_new_i32();
4479 fpst = get_fpstatus_ptr();
4480 tcg_op1 = read_fp_sreg(s, rn);
4481 tcg_op2 = read_fp_sreg(s, rm);
4483 switch (opcode) {
4484 case 0x0: /* FMUL */
4485 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4486 break;
4487 case 0x1: /* FDIV */
4488 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
4489 break;
4490 case 0x2: /* FADD */
4491 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
4492 break;
4493 case 0x3: /* FSUB */
4494 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
4495 break;
4496 case 0x4: /* FMAX */
4497 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
4498 break;
4499 case 0x5: /* FMIN */
4500 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
4501 break;
4502 case 0x6: /* FMAXNM */
4503 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
4504 break;
4505 case 0x7: /* FMINNM */
4506 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
4507 break;
4508 case 0x8: /* FNMUL */
4509 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4510 gen_helper_vfp_negs(tcg_res, tcg_res);
4511 break;
4514 write_fp_sreg(s, rd, tcg_res);
4516 tcg_temp_free_ptr(fpst);
4517 tcg_temp_free_i32(tcg_op1);
4518 tcg_temp_free_i32(tcg_op2);
4519 tcg_temp_free_i32(tcg_res);
4522 /* C3.6.26 Floating-point data-processing (2 source) - double precision */
4523 static void handle_fp_2src_double(DisasContext *s, int opcode,
4524 int rd, int rn, int rm)
4526 TCGv_i64 tcg_op1;
4527 TCGv_i64 tcg_op2;
4528 TCGv_i64 tcg_res;
4529 TCGv_ptr fpst;
4531 tcg_res = tcg_temp_new_i64();
4532 fpst = get_fpstatus_ptr();
4533 tcg_op1 = read_fp_dreg(s, rn);
4534 tcg_op2 = read_fp_dreg(s, rm);
4536 switch (opcode) {
4537 case 0x0: /* FMUL */
4538 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4539 break;
4540 case 0x1: /* FDIV */
4541 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
4542 break;
4543 case 0x2: /* FADD */
4544 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
4545 break;
4546 case 0x3: /* FSUB */
4547 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
4548 break;
4549 case 0x4: /* FMAX */
4550 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
4551 break;
4552 case 0x5: /* FMIN */
4553 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
4554 break;
4555 case 0x6: /* FMAXNM */
4556 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4557 break;
4558 case 0x7: /* FMINNM */
4559 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4560 break;
4561 case 0x8: /* FNMUL */
4562 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4563 gen_helper_vfp_negd(tcg_res, tcg_res);
4564 break;
4567 write_fp_dreg(s, rd, tcg_res);
4569 tcg_temp_free_ptr(fpst);
4570 tcg_temp_free_i64(tcg_op1);
4571 tcg_temp_free_i64(tcg_op2);
4572 tcg_temp_free_i64(tcg_res);
4575 /* C3.6.26 Floating point data-processing (2 source)
4576 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
4577 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4578 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | opcode | 1 0 | Rn | Rd |
4579 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4581 static void disas_fp_2src(DisasContext *s, uint32_t insn)
4583 int type = extract32(insn, 22, 2);
4584 int rd = extract32(insn, 0, 5);
4585 int rn = extract32(insn, 5, 5);
4586 int rm = extract32(insn, 16, 5);
4587 int opcode = extract32(insn, 12, 4);
4589 if (opcode > 8) {
4590 unallocated_encoding(s);
4591 return;
4594 switch (type) {
4595 case 0:
4596 if (!fp_access_check(s)) {
4597 return;
4599 handle_fp_2src_single(s, opcode, rd, rn, rm);
4600 break;
4601 case 1:
4602 if (!fp_access_check(s)) {
4603 return;
4605 handle_fp_2src_double(s, opcode, rd, rn, rm);
4606 break;
4607 default:
4608 unallocated_encoding(s);
4612 /* C3.6.27 Floating-point data-processing (3 source) - single precision */
4613 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
4614 int rd, int rn, int rm, int ra)
4616 TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
4617 TCGv_i32 tcg_res = tcg_temp_new_i32();
4618 TCGv_ptr fpst = get_fpstatus_ptr();
4620 tcg_op1 = read_fp_sreg(s, rn);
4621 tcg_op2 = read_fp_sreg(s, rm);
4622 tcg_op3 = read_fp_sreg(s, ra);
4624 /* These are fused multiply-add, and must be done as one
4625 * floating point operation with no rounding between the
4626 * multiplication and addition steps.
4627 * NB that doing the negations here as separate steps is
4628 * correct : an input NaN should come out with its sign bit
4629 * flipped if it is a negated-input.
4631 if (o1 == true) {
4632 gen_helper_vfp_negs(tcg_op3, tcg_op3);
4635 if (o0 != o1) {
4636 gen_helper_vfp_negs(tcg_op1, tcg_op1);
4639 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4641 write_fp_sreg(s, rd, tcg_res);
4643 tcg_temp_free_ptr(fpst);
4644 tcg_temp_free_i32(tcg_op1);
4645 tcg_temp_free_i32(tcg_op2);
4646 tcg_temp_free_i32(tcg_op3);
4647 tcg_temp_free_i32(tcg_res);
4650 /* C3.6.27 Floating-point data-processing (3 source) - double precision */
4651 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
4652 int rd, int rn, int rm, int ra)
4654 TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
4655 TCGv_i64 tcg_res = tcg_temp_new_i64();
4656 TCGv_ptr fpst = get_fpstatus_ptr();
4658 tcg_op1 = read_fp_dreg(s, rn);
4659 tcg_op2 = read_fp_dreg(s, rm);
4660 tcg_op3 = read_fp_dreg(s, ra);
4662 /* These are fused multiply-add, and must be done as one
4663 * floating point operation with no rounding between the
4664 * multiplication and addition steps.
4665 * NB that doing the negations here as separate steps is
4666 * correct : an input NaN should come out with its sign bit
4667 * flipped if it is a negated-input.
4669 if (o1 == true) {
4670 gen_helper_vfp_negd(tcg_op3, tcg_op3);
4673 if (o0 != o1) {
4674 gen_helper_vfp_negd(tcg_op1, tcg_op1);
4677 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4679 write_fp_dreg(s, rd, tcg_res);
4681 tcg_temp_free_ptr(fpst);
4682 tcg_temp_free_i64(tcg_op1);
4683 tcg_temp_free_i64(tcg_op2);
4684 tcg_temp_free_i64(tcg_op3);
4685 tcg_temp_free_i64(tcg_res);
4688 /* C3.6.27 Floating point data-processing (3 source)
4689 * 31 30 29 28 24 23 22 21 20 16 15 14 10 9 5 4 0
4690 * +---+---+---+-----------+------+----+------+----+------+------+------+
4691 * | M | 0 | S | 1 1 1 1 1 | type | o1 | Rm | o0 | Ra | Rn | Rd |
4692 * +---+---+---+-----------+------+----+------+----+------+------+------+
4694 static void disas_fp_3src(DisasContext *s, uint32_t insn)
4696 int type = extract32(insn, 22, 2);
4697 int rd = extract32(insn, 0, 5);
4698 int rn = extract32(insn, 5, 5);
4699 int ra = extract32(insn, 10, 5);
4700 int rm = extract32(insn, 16, 5);
4701 bool o0 = extract32(insn, 15, 1);
4702 bool o1 = extract32(insn, 21, 1);
4704 switch (type) {
4705 case 0:
4706 if (!fp_access_check(s)) {
4707 return;
4709 handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
4710 break;
4711 case 1:
4712 if (!fp_access_check(s)) {
4713 return;
4715 handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
4716 break;
4717 default:
4718 unallocated_encoding(s);
4722 /* C3.6.28 Floating point immediate
4723 * 31 30 29 28 24 23 22 21 20 13 12 10 9 5 4 0
4724 * +---+---+---+-----------+------+---+------------+-------+------+------+
4725 * | M | 0 | S | 1 1 1 1 0 | type | 1 | imm8 | 1 0 0 | imm5 | Rd |
4726 * +---+---+---+-----------+------+---+------------+-------+------+------+
4728 static void disas_fp_imm(DisasContext *s, uint32_t insn)
4730 int rd = extract32(insn, 0, 5);
4731 int imm8 = extract32(insn, 13, 8);
4732 int is_double = extract32(insn, 22, 2);
4733 uint64_t imm;
4734 TCGv_i64 tcg_res;
4736 if (is_double > 1) {
4737 unallocated_encoding(s);
4738 return;
4741 if (!fp_access_check(s)) {
4742 return;
4745 /* The imm8 encodes the sign bit, enough bits to represent
4746 * an exponent in the range 01....1xx to 10....0xx,
4747 * and the most significant 4 bits of the mantissa; see
4748 * VFPExpandImm() in the v8 ARM ARM.
4750 if (is_double) {
4751 imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4752 (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
4753 extract32(imm8, 0, 6);
4754 imm <<= 48;
4755 } else {
4756 imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4757 (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
4758 (extract32(imm8, 0, 6) << 3);
4759 imm <<= 16;
4762 tcg_res = tcg_const_i64(imm);
4763 write_fp_dreg(s, rd, tcg_res);
4764 tcg_temp_free_i64(tcg_res);
4767 /* Handle floating point <=> fixed point conversions. Note that we can
4768 * also deal with fp <=> integer conversions as a special case (scale == 64)
4769 * OPTME: consider handling that special case specially or at least skipping
4770 * the call to scalbn in the helpers for zero shifts.
4772 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
4773 bool itof, int rmode, int scale, int sf, int type)
4775 bool is_signed = !(opcode & 1);
4776 bool is_double = type;
4777 TCGv_ptr tcg_fpstatus;
4778 TCGv_i32 tcg_shift;
4780 tcg_fpstatus = get_fpstatus_ptr();
4782 tcg_shift = tcg_const_i32(64 - scale);
4784 if (itof) {
4785 TCGv_i64 tcg_int = cpu_reg(s, rn);
4786 if (!sf) {
4787 TCGv_i64 tcg_extend = new_tmp_a64(s);
4789 if (is_signed) {
4790 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
4791 } else {
4792 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
4795 tcg_int = tcg_extend;
4798 if (is_double) {
4799 TCGv_i64 tcg_double = tcg_temp_new_i64();
4800 if (is_signed) {
4801 gen_helper_vfp_sqtod(tcg_double, tcg_int,
4802 tcg_shift, tcg_fpstatus);
4803 } else {
4804 gen_helper_vfp_uqtod(tcg_double, tcg_int,
4805 tcg_shift, tcg_fpstatus);
4807 write_fp_dreg(s, rd, tcg_double);
4808 tcg_temp_free_i64(tcg_double);
4809 } else {
4810 TCGv_i32 tcg_single = tcg_temp_new_i32();
4811 if (is_signed) {
4812 gen_helper_vfp_sqtos(tcg_single, tcg_int,
4813 tcg_shift, tcg_fpstatus);
4814 } else {
4815 gen_helper_vfp_uqtos(tcg_single, tcg_int,
4816 tcg_shift, tcg_fpstatus);
4818 write_fp_sreg(s, rd, tcg_single);
4819 tcg_temp_free_i32(tcg_single);
4821 } else {
4822 TCGv_i64 tcg_int = cpu_reg(s, rd);
4823 TCGv_i32 tcg_rmode;
4825 if (extract32(opcode, 2, 1)) {
4826 /* There are too many rounding modes to all fit into rmode,
4827 * so FCVTA[US] is a special case.
4829 rmode = FPROUNDING_TIEAWAY;
4832 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
4834 gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4836 if (is_double) {
4837 TCGv_i64 tcg_double = read_fp_dreg(s, rn);
4838 if (is_signed) {
4839 if (!sf) {
4840 gen_helper_vfp_tosld(tcg_int, tcg_double,
4841 tcg_shift, tcg_fpstatus);
4842 } else {
4843 gen_helper_vfp_tosqd(tcg_int, tcg_double,
4844 tcg_shift, tcg_fpstatus);
4846 } else {
4847 if (!sf) {
4848 gen_helper_vfp_tould(tcg_int, tcg_double,
4849 tcg_shift, tcg_fpstatus);
4850 } else {
4851 gen_helper_vfp_touqd(tcg_int, tcg_double,
4852 tcg_shift, tcg_fpstatus);
4855 tcg_temp_free_i64(tcg_double);
4856 } else {
4857 TCGv_i32 tcg_single = read_fp_sreg(s, rn);
4858 if (sf) {
4859 if (is_signed) {
4860 gen_helper_vfp_tosqs(tcg_int, tcg_single,
4861 tcg_shift, tcg_fpstatus);
4862 } else {
4863 gen_helper_vfp_touqs(tcg_int, tcg_single,
4864 tcg_shift, tcg_fpstatus);
4866 } else {
4867 TCGv_i32 tcg_dest = tcg_temp_new_i32();
4868 if (is_signed) {
4869 gen_helper_vfp_tosls(tcg_dest, tcg_single,
4870 tcg_shift, tcg_fpstatus);
4871 } else {
4872 gen_helper_vfp_touls(tcg_dest, tcg_single,
4873 tcg_shift, tcg_fpstatus);
4875 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
4876 tcg_temp_free_i32(tcg_dest);
4878 tcg_temp_free_i32(tcg_single);
4881 gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4882 tcg_temp_free_i32(tcg_rmode);
4884 if (!sf) {
4885 tcg_gen_ext32u_i64(tcg_int, tcg_int);
4889 tcg_temp_free_ptr(tcg_fpstatus);
4890 tcg_temp_free_i32(tcg_shift);
4893 /* C3.6.29 Floating point <-> fixed point conversions
4894 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0
4895 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4896 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale | Rn | Rd |
4897 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4899 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
4901 int rd = extract32(insn, 0, 5);
4902 int rn = extract32(insn, 5, 5);
4903 int scale = extract32(insn, 10, 6);
4904 int opcode = extract32(insn, 16, 3);
4905 int rmode = extract32(insn, 19, 2);
4906 int type = extract32(insn, 22, 2);
4907 bool sbit = extract32(insn, 29, 1);
4908 bool sf = extract32(insn, 31, 1);
4909 bool itof;
4911 if (sbit || (type > 1)
4912 || (!sf && scale < 32)) {
4913 unallocated_encoding(s);
4914 return;
4917 switch ((rmode << 3) | opcode) {
4918 case 0x2: /* SCVTF */
4919 case 0x3: /* UCVTF */
4920 itof = true;
4921 break;
4922 case 0x18: /* FCVTZS */
4923 case 0x19: /* FCVTZU */
4924 itof = false;
4925 break;
4926 default:
4927 unallocated_encoding(s);
4928 return;
4931 if (!fp_access_check(s)) {
4932 return;
4935 handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
4938 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
4940 /* FMOV: gpr to or from float, double, or top half of quad fp reg,
4941 * without conversion.
4944 if (itof) {
4945 TCGv_i64 tcg_rn = cpu_reg(s, rn);
4947 switch (type) {
4948 case 0:
4950 /* 32 bit */
4951 TCGv_i64 tmp = tcg_temp_new_i64();
4952 tcg_gen_ext32u_i64(tmp, tcg_rn);
4953 tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64));
4954 tcg_gen_movi_i64(tmp, 0);
4955 tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
4956 tcg_temp_free_i64(tmp);
4957 break;
4959 case 1:
4961 /* 64 bit */
4962 TCGv_i64 tmp = tcg_const_i64(0);
4963 tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64));
4964 tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
4965 tcg_temp_free_i64(tmp);
4966 break;
4968 case 2:
4969 /* 64 bit to top half. */
4970 tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
4971 break;
4973 } else {
4974 TCGv_i64 tcg_rd = cpu_reg(s, rd);
4976 switch (type) {
4977 case 0:
4978 /* 32 bit */
4979 tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
4980 break;
4981 case 1:
4982 /* 64 bit */
4983 tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
4984 break;
4985 case 2:
4986 /* 64 bits from top half */
4987 tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
4988 break;
4993 /* C3.6.30 Floating point <-> integer conversions
4994 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0
4995 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
4996 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
4997 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
4999 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
5001 int rd = extract32(insn, 0, 5);
5002 int rn = extract32(insn, 5, 5);
5003 int opcode = extract32(insn, 16, 3);
5004 int rmode = extract32(insn, 19, 2);
5005 int type = extract32(insn, 22, 2);
5006 bool sbit = extract32(insn, 29, 1);
5007 bool sf = extract32(insn, 31, 1);
5009 if (sbit) {
5010 unallocated_encoding(s);
5011 return;
5014 if (opcode > 5) {
5015 /* FMOV */
5016 bool itof = opcode & 1;
5018 if (rmode >= 2) {
5019 unallocated_encoding(s);
5020 return;
5023 switch (sf << 3 | type << 1 | rmode) {
5024 case 0x0: /* 32 bit */
5025 case 0xa: /* 64 bit */
5026 case 0xd: /* 64 bit to top half of quad */
5027 break;
5028 default:
5029 /* all other sf/type/rmode combinations are invalid */
5030 unallocated_encoding(s);
5031 break;
5034 if (!fp_access_check(s)) {
5035 return;
5037 handle_fmov(s, rd, rn, type, itof);
5038 } else {
5039 /* actual FP conversions */
5040 bool itof = extract32(opcode, 1, 1);
5042 if (type > 1 || (rmode != 0 && opcode > 1)) {
5043 unallocated_encoding(s);
5044 return;
5047 if (!fp_access_check(s)) {
5048 return;
5050 handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
5054 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
5055 * 31 30 29 28 25 24 0
5056 * +---+---+---+---------+-----------------------------+
5057 * | | 0 | | 1 1 1 1 | |
5058 * +---+---+---+---------+-----------------------------+
5060 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
5062 if (extract32(insn, 24, 1)) {
5063 /* Floating point data-processing (3 source) */
5064 disas_fp_3src(s, insn);
5065 } else if (extract32(insn, 21, 1) == 0) {
5066 /* Floating point to fixed point conversions */
5067 disas_fp_fixed_conv(s, insn);
5068 } else {
5069 switch (extract32(insn, 10, 2)) {
5070 case 1:
5071 /* Floating point conditional compare */
5072 disas_fp_ccomp(s, insn);
5073 break;
5074 case 2:
5075 /* Floating point data-processing (2 source) */
5076 disas_fp_2src(s, insn);
5077 break;
5078 case 3:
5079 /* Floating point conditional select */
5080 disas_fp_csel(s, insn);
5081 break;
5082 case 0:
5083 switch (ctz32(extract32(insn, 12, 4))) {
5084 case 0: /* [15:12] == xxx1 */
5085 /* Floating point immediate */
5086 disas_fp_imm(s, insn);
5087 break;
5088 case 1: /* [15:12] == xx10 */
5089 /* Floating point compare */
5090 disas_fp_compare(s, insn);
5091 break;
5092 case 2: /* [15:12] == x100 */
5093 /* Floating point data-processing (1 source) */
5094 disas_fp_1src(s, insn);
5095 break;
5096 case 3: /* [15:12] == 1000 */
5097 unallocated_encoding(s);
5098 break;
5099 default: /* [15:12] == 0000 */
5100 /* Floating point <-> integer conversions */
5101 disas_fp_int_conv(s, insn);
5102 break;
5104 break;
5109 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
5110 int pos)
5112 /* Extract 64 bits from the middle of two concatenated 64 bit
5113 * vector register slices left:right. The extracted bits start
5114 * at 'pos' bits into the right (least significant) side.
5115 * We return the result in tcg_right, and guarantee not to
5116 * trash tcg_left.
5118 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5119 assert(pos > 0 && pos < 64);
5121 tcg_gen_shri_i64(tcg_right, tcg_right, pos);
5122 tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
5123 tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
5125 tcg_temp_free_i64(tcg_tmp);
5128 /* C3.6.1 EXT
5129 * 31 30 29 24 23 22 21 20 16 15 14 11 10 9 5 4 0
5130 * +---+---+-------------+-----+---+------+---+------+---+------+------+
5131 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 | Rm | 0 | imm4 | 0 | Rn | Rd |
5132 * +---+---+-------------+-----+---+------+---+------+---+------+------+
5134 static void disas_simd_ext(DisasContext *s, uint32_t insn)
5136 int is_q = extract32(insn, 30, 1);
5137 int op2 = extract32(insn, 22, 2);
5138 int imm4 = extract32(insn, 11, 4);
5139 int rm = extract32(insn, 16, 5);
5140 int rn = extract32(insn, 5, 5);
5141 int rd = extract32(insn, 0, 5);
5142 int pos = imm4 << 3;
5143 TCGv_i64 tcg_resl, tcg_resh;
5145 if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
5146 unallocated_encoding(s);
5147 return;
5150 if (!fp_access_check(s)) {
5151 return;
5154 tcg_resh = tcg_temp_new_i64();
5155 tcg_resl = tcg_temp_new_i64();
5157 /* Vd gets bits starting at pos bits into Vm:Vn. This is
5158 * either extracting 128 bits from a 128:128 concatenation, or
5159 * extracting 64 bits from a 64:64 concatenation.
5161 if (!is_q) {
5162 read_vec_element(s, tcg_resl, rn, 0, MO_64);
5163 if (pos != 0) {
5164 read_vec_element(s, tcg_resh, rm, 0, MO_64);
5165 do_ext64(s, tcg_resh, tcg_resl, pos);
5167 tcg_gen_movi_i64(tcg_resh, 0);
5168 } else {
5169 TCGv_i64 tcg_hh;
5170 typedef struct {
5171 int reg;
5172 int elt;
5173 } EltPosns;
5174 EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
5175 EltPosns *elt = eltposns;
5177 if (pos >= 64) {
5178 elt++;
5179 pos -= 64;
5182 read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
5183 elt++;
5184 read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
5185 elt++;
5186 if (pos != 0) {
5187 do_ext64(s, tcg_resh, tcg_resl, pos);
5188 tcg_hh = tcg_temp_new_i64();
5189 read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
5190 do_ext64(s, tcg_hh, tcg_resh, pos);
5191 tcg_temp_free_i64(tcg_hh);
5195 write_vec_element(s, tcg_resl, rd, 0, MO_64);
5196 tcg_temp_free_i64(tcg_resl);
5197 write_vec_element(s, tcg_resh, rd, 1, MO_64);
5198 tcg_temp_free_i64(tcg_resh);
5201 /* C3.6.2 TBL/TBX
5202 * 31 30 29 24 23 22 21 20 16 15 14 13 12 11 10 9 5 4 0
5203 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5204 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 | Rm | 0 | len | op | 0 0 | Rn | Rd |
5205 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5207 static void disas_simd_tb(DisasContext *s, uint32_t insn)
5209 int op2 = extract32(insn, 22, 2);
5210 int is_q = extract32(insn, 30, 1);
5211 int rm = extract32(insn, 16, 5);
5212 int rn = extract32(insn, 5, 5);
5213 int rd = extract32(insn, 0, 5);
5214 int is_tblx = extract32(insn, 12, 1);
5215 int len = extract32(insn, 13, 2);
5216 TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
5217 TCGv_i32 tcg_regno, tcg_numregs;
5219 if (op2 != 0) {
5220 unallocated_encoding(s);
5221 return;
5224 if (!fp_access_check(s)) {
5225 return;
5228 /* This does a table lookup: for every byte element in the input
5229 * we index into a table formed from up to four vector registers,
5230 * and then the output is the result of the lookups. Our helper
5231 * function does the lookup operation for a single 64 bit part of
5232 * the input.
5234 tcg_resl = tcg_temp_new_i64();
5235 tcg_resh = tcg_temp_new_i64();
5237 if (is_tblx) {
5238 read_vec_element(s, tcg_resl, rd, 0, MO_64);
5239 } else {
5240 tcg_gen_movi_i64(tcg_resl, 0);
5242 if (is_tblx && is_q) {
5243 read_vec_element(s, tcg_resh, rd, 1, MO_64);
5244 } else {
5245 tcg_gen_movi_i64(tcg_resh, 0);
5248 tcg_idx = tcg_temp_new_i64();
5249 tcg_regno = tcg_const_i32(rn);
5250 tcg_numregs = tcg_const_i32(len + 1);
5251 read_vec_element(s, tcg_idx, rm, 0, MO_64);
5252 gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
5253 tcg_regno, tcg_numregs);
5254 if (is_q) {
5255 read_vec_element(s, tcg_idx, rm, 1, MO_64);
5256 gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
5257 tcg_regno, tcg_numregs);
5259 tcg_temp_free_i64(tcg_idx);
5260 tcg_temp_free_i32(tcg_regno);
5261 tcg_temp_free_i32(tcg_numregs);
5263 write_vec_element(s, tcg_resl, rd, 0, MO_64);
5264 tcg_temp_free_i64(tcg_resl);
5265 write_vec_element(s, tcg_resh, rd, 1, MO_64);
5266 tcg_temp_free_i64(tcg_resh);
5269 /* C3.6.3 ZIP/UZP/TRN
5270 * 31 30 29 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0
5271 * +---+---+-------------+------+---+------+---+------------------+------+
5272 * | 0 | Q | 0 0 1 1 1 0 | size | 0 | Rm | 0 | opc | 1 0 | Rn | Rd |
5273 * +---+---+-------------+------+---+------+---+------------------+------+
5275 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
5277 int rd = extract32(insn, 0, 5);
5278 int rn = extract32(insn, 5, 5);
5279 int rm = extract32(insn, 16, 5);
5280 int size = extract32(insn, 22, 2);
5281 /* opc field bits [1:0] indicate ZIP/UZP/TRN;
5282 * bit 2 indicates 1 vs 2 variant of the insn.
5284 int opcode = extract32(insn, 12, 2);
5285 bool part = extract32(insn, 14, 1);
5286 bool is_q = extract32(insn, 30, 1);
5287 int esize = 8 << size;
5288 int i, ofs;
5289 int datasize = is_q ? 128 : 64;
5290 int elements = datasize / esize;
5291 TCGv_i64 tcg_res, tcg_resl, tcg_resh;
5293 if (opcode == 0 || (size == 3 && !is_q)) {
5294 unallocated_encoding(s);
5295 return;
5298 if (!fp_access_check(s)) {
5299 return;
5302 tcg_resl = tcg_const_i64(0);
5303 tcg_resh = tcg_const_i64(0);
5304 tcg_res = tcg_temp_new_i64();
5306 for (i = 0; i < elements; i++) {
5307 switch (opcode) {
5308 case 1: /* UZP1/2 */
5310 int midpoint = elements / 2;
5311 if (i < midpoint) {
5312 read_vec_element(s, tcg_res, rn, 2 * i + part, size);
5313 } else {
5314 read_vec_element(s, tcg_res, rm,
5315 2 * (i - midpoint) + part, size);
5317 break;
5319 case 2: /* TRN1/2 */
5320 if (i & 1) {
5321 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
5322 } else {
5323 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
5325 break;
5326 case 3: /* ZIP1/2 */
5328 int base = part * elements / 2;
5329 if (i & 1) {
5330 read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
5331 } else {
5332 read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
5334 break;
5336 default:
5337 g_assert_not_reached();
5340 ofs = i * esize;
5341 if (ofs < 64) {
5342 tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
5343 tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
5344 } else {
5345 tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
5346 tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
5350 tcg_temp_free_i64(tcg_res);
5352 write_vec_element(s, tcg_resl, rd, 0, MO_64);
5353 tcg_temp_free_i64(tcg_resl);
5354 write_vec_element(s, tcg_resh, rd, 1, MO_64);
5355 tcg_temp_free_i64(tcg_resh);
5358 static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
5359 int opc, bool is_min, TCGv_ptr fpst)
5361 /* Helper function for disas_simd_across_lanes: do a single precision
5362 * min/max operation on the specified two inputs,
5363 * and return the result in tcg_elt1.
5365 if (opc == 0xc) {
5366 if (is_min) {
5367 gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5368 } else {
5369 gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5371 } else {
5372 assert(opc == 0xf);
5373 if (is_min) {
5374 gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5375 } else {
5376 gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5381 /* C3.6.4 AdvSIMD across lanes
5382 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
5383 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5384 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd |
5385 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5387 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
5389 int rd = extract32(insn, 0, 5);
5390 int rn = extract32(insn, 5, 5);
5391 int size = extract32(insn, 22, 2);
5392 int opcode = extract32(insn, 12, 5);
5393 bool is_q = extract32(insn, 30, 1);
5394 bool is_u = extract32(insn, 29, 1);
5395 bool is_fp = false;
5396 bool is_min = false;
5397 int esize;
5398 int elements;
5399 int i;
5400 TCGv_i64 tcg_res, tcg_elt;
5402 switch (opcode) {
5403 case 0x1b: /* ADDV */
5404 if (is_u) {
5405 unallocated_encoding(s);
5406 return;
5408 /* fall through */
5409 case 0x3: /* SADDLV, UADDLV */
5410 case 0xa: /* SMAXV, UMAXV */
5411 case 0x1a: /* SMINV, UMINV */
5412 if (size == 3 || (size == 2 && !is_q)) {
5413 unallocated_encoding(s);
5414 return;
5416 break;
5417 case 0xc: /* FMAXNMV, FMINNMV */
5418 case 0xf: /* FMAXV, FMINV */
5419 if (!is_u || !is_q || extract32(size, 0, 1)) {
5420 unallocated_encoding(s);
5421 return;
5423 /* Bit 1 of size field encodes min vs max, and actual size is always
5424 * 32 bits: adjust the size variable so following code can rely on it
5426 is_min = extract32(size, 1, 1);
5427 is_fp = true;
5428 size = 2;
5429 break;
5430 default:
5431 unallocated_encoding(s);
5432 return;
5435 if (!fp_access_check(s)) {
5436 return;
5439 esize = 8 << size;
5440 elements = (is_q ? 128 : 64) / esize;
5442 tcg_res = tcg_temp_new_i64();
5443 tcg_elt = tcg_temp_new_i64();
5445 /* These instructions operate across all lanes of a vector
5446 * to produce a single result. We can guarantee that a 64
5447 * bit intermediate is sufficient:
5448 * + for [US]ADDLV the maximum element size is 32 bits, and
5449 * the result type is 64 bits
5450 * + for FMAX*V, FMIN*V, ADDV the intermediate type is the
5451 * same as the element size, which is 32 bits at most
5452 * For the integer operations we can choose to work at 64
5453 * or 32 bits and truncate at the end; for simplicity
5454 * we use 64 bits always. The floating point
5455 * ops do require 32 bit intermediates, though.
5457 if (!is_fp) {
5458 read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
5460 for (i = 1; i < elements; i++) {
5461 read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
5463 switch (opcode) {
5464 case 0x03: /* SADDLV / UADDLV */
5465 case 0x1b: /* ADDV */
5466 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
5467 break;
5468 case 0x0a: /* SMAXV / UMAXV */
5469 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
5470 tcg_res,
5471 tcg_res, tcg_elt, tcg_res, tcg_elt);
5472 break;
5473 case 0x1a: /* SMINV / UMINV */
5474 tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
5475 tcg_res,
5476 tcg_res, tcg_elt, tcg_res, tcg_elt);
5477 break;
5478 break;
5479 default:
5480 g_assert_not_reached();
5484 } else {
5485 /* Floating point ops which work on 32 bit (single) intermediates.
5486 * Note that correct NaN propagation requires that we do these
5487 * operations in exactly the order specified by the pseudocode.
5489 TCGv_i32 tcg_elt1 = tcg_temp_new_i32();
5490 TCGv_i32 tcg_elt2 = tcg_temp_new_i32();
5491 TCGv_i32 tcg_elt3 = tcg_temp_new_i32();
5492 TCGv_ptr fpst = get_fpstatus_ptr();
5494 assert(esize == 32);
5495 assert(elements == 4);
5497 read_vec_element(s, tcg_elt, rn, 0, MO_32);
5498 tcg_gen_extrl_i64_i32(tcg_elt1, tcg_elt);
5499 read_vec_element(s, tcg_elt, rn, 1, MO_32);
5500 tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5502 do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5504 read_vec_element(s, tcg_elt, rn, 2, MO_32);
5505 tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5506 read_vec_element(s, tcg_elt, rn, 3, MO_32);
5507 tcg_gen_extrl_i64_i32(tcg_elt3, tcg_elt);
5509 do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
5511 do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5513 tcg_gen_extu_i32_i64(tcg_res, tcg_elt1);
5514 tcg_temp_free_i32(tcg_elt1);
5515 tcg_temp_free_i32(tcg_elt2);
5516 tcg_temp_free_i32(tcg_elt3);
5517 tcg_temp_free_ptr(fpst);
5520 tcg_temp_free_i64(tcg_elt);
5522 /* Now truncate the result to the width required for the final output */
5523 if (opcode == 0x03) {
5524 /* SADDLV, UADDLV: result is 2*esize */
5525 size++;
5528 switch (size) {
5529 case 0:
5530 tcg_gen_ext8u_i64(tcg_res, tcg_res);
5531 break;
5532 case 1:
5533 tcg_gen_ext16u_i64(tcg_res, tcg_res);
5534 break;
5535 case 2:
5536 tcg_gen_ext32u_i64(tcg_res, tcg_res);
5537 break;
5538 case 3:
5539 break;
5540 default:
5541 g_assert_not_reached();
5544 write_fp_dreg(s, rd, tcg_res);
5545 tcg_temp_free_i64(tcg_res);
5548 /* C6.3.31 DUP (Element, Vector)
5550 * 31 30 29 21 20 16 15 10 9 5 4 0
5551 * +---+---+-------------------+--------+-------------+------+------+
5552 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd |
5553 * +---+---+-------------------+--------+-------------+------+------+
5555 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5557 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
5558 int imm5)
5560 int size = ctz32(imm5);
5561 int esize = 8 << size;
5562 int elements = (is_q ? 128 : 64) / esize;
5563 int index, i;
5564 TCGv_i64 tmp;
5566 if (size > 3 || (size == 3 && !is_q)) {
5567 unallocated_encoding(s);
5568 return;
5571 if (!fp_access_check(s)) {
5572 return;
5575 index = imm5 >> (size + 1);
5577 tmp = tcg_temp_new_i64();
5578 read_vec_element(s, tmp, rn, index, size);
5580 for (i = 0; i < elements; i++) {
5581 write_vec_element(s, tmp, rd, i, size);
5584 if (!is_q) {
5585 clear_vec_high(s, rd);
5588 tcg_temp_free_i64(tmp);
5591 /* C6.3.31 DUP (element, scalar)
5592 * 31 21 20 16 15 10 9 5 4 0
5593 * +-----------------------+--------+-------------+------+------+
5594 * | 0 1 0 1 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd |
5595 * +-----------------------+--------+-------------+------+------+
5597 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
5598 int imm5)
5600 int size = ctz32(imm5);
5601 int index;
5602 TCGv_i64 tmp;
5604 if (size > 3) {
5605 unallocated_encoding(s);
5606 return;
5609 if (!fp_access_check(s)) {
5610 return;
5613 index = imm5 >> (size + 1);
5615 /* This instruction just extracts the specified element and
5616 * zero-extends it into the bottom of the destination register.
5618 tmp = tcg_temp_new_i64();
5619 read_vec_element(s, tmp, rn, index, size);
5620 write_fp_dreg(s, rd, tmp);
5621 tcg_temp_free_i64(tmp);
5624 /* C6.3.32 DUP (General)
5626 * 31 30 29 21 20 16 15 10 9 5 4 0
5627 * +---+---+-------------------+--------+-------------+------+------+
5628 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 1 1 | Rn | Rd |
5629 * +---+---+-------------------+--------+-------------+------+------+
5631 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5633 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
5634 int imm5)
5636 int size = ctz32(imm5);
5637 int esize = 8 << size;
5638 int elements = (is_q ? 128 : 64)/esize;
5639 int i = 0;
5641 if (size > 3 || ((size == 3) && !is_q)) {
5642 unallocated_encoding(s);
5643 return;
5646 if (!fp_access_check(s)) {
5647 return;
5650 for (i = 0; i < elements; i++) {
5651 write_vec_element(s, cpu_reg(s, rn), rd, i, size);
5653 if (!is_q) {
5654 clear_vec_high(s, rd);
5658 /* C6.3.150 INS (Element)
5660 * 31 21 20 16 15 14 11 10 9 5 4 0
5661 * +-----------------------+--------+------------+---+------+------+
5662 * | 0 1 1 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
5663 * +-----------------------+--------+------------+---+------+------+
5665 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5666 * index: encoded in imm5<4:size+1>
5668 static void handle_simd_inse(DisasContext *s, int rd, int rn,
5669 int imm4, int imm5)
5671 int size = ctz32(imm5);
5672 int src_index, dst_index;
5673 TCGv_i64 tmp;
5675 if (size > 3) {
5676 unallocated_encoding(s);
5677 return;
5680 if (!fp_access_check(s)) {
5681 return;
5684 dst_index = extract32(imm5, 1+size, 5);
5685 src_index = extract32(imm4, size, 4);
5687 tmp = tcg_temp_new_i64();
5689 read_vec_element(s, tmp, rn, src_index, size);
5690 write_vec_element(s, tmp, rd, dst_index, size);
5692 tcg_temp_free_i64(tmp);
5696 /* C6.3.151 INS (General)
5698 * 31 21 20 16 15 10 9 5 4 0
5699 * +-----------------------+--------+-------------+------+------+
5700 * | 0 1 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 1 1 1 | Rn | Rd |
5701 * +-----------------------+--------+-------------+------+------+
5703 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5704 * index: encoded in imm5<4:size+1>
5706 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
5708 int size = ctz32(imm5);
5709 int idx;
5711 if (size > 3) {
5712 unallocated_encoding(s);
5713 return;
5716 if (!fp_access_check(s)) {
5717 return;
5720 idx = extract32(imm5, 1 + size, 4 - size);
5721 write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
5725 * C6.3.321 UMOV (General)
5726 * C6.3.237 SMOV (General)
5728 * 31 30 29 21 20 16 15 12 10 9 5 4 0
5729 * +---+---+-------------------+--------+-------------+------+------+
5730 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 1 U 1 1 | Rn | Rd |
5731 * +---+---+-------------------+--------+-------------+------+------+
5733 * U: unsigned when set
5734 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5736 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
5737 int rn, int rd, int imm5)
5739 int size = ctz32(imm5);
5740 int element;
5741 TCGv_i64 tcg_rd;
5743 /* Check for UnallocatedEncodings */
5744 if (is_signed) {
5745 if (size > 2 || (size == 2 && !is_q)) {
5746 unallocated_encoding(s);
5747 return;
5749 } else {
5750 if (size > 3
5751 || (size < 3 && is_q)
5752 || (size == 3 && !is_q)) {
5753 unallocated_encoding(s);
5754 return;
5758 if (!fp_access_check(s)) {
5759 return;
5762 element = extract32(imm5, 1+size, 4);
5764 tcg_rd = cpu_reg(s, rd);
5765 read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
5766 if (is_signed && !is_q) {
5767 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5771 /* C3.6.5 AdvSIMD copy
5772 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0
5773 * +---+---+----+-----------------+------+---+------+---+------+------+
5774 * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
5775 * +---+---+----+-----------------+------+---+------+---+------+------+
5777 static void disas_simd_copy(DisasContext *s, uint32_t insn)
5779 int rd = extract32(insn, 0, 5);
5780 int rn = extract32(insn, 5, 5);
5781 int imm4 = extract32(insn, 11, 4);
5782 int op = extract32(insn, 29, 1);
5783 int is_q = extract32(insn, 30, 1);
5784 int imm5 = extract32(insn, 16, 5);
5786 if (op) {
5787 if (is_q) {
5788 /* INS (element) */
5789 handle_simd_inse(s, rd, rn, imm4, imm5);
5790 } else {
5791 unallocated_encoding(s);
5793 } else {
5794 switch (imm4) {
5795 case 0:
5796 /* DUP (element - vector) */
5797 handle_simd_dupe(s, is_q, rd, rn, imm5);
5798 break;
5799 case 1:
5800 /* DUP (general) */
5801 handle_simd_dupg(s, is_q, rd, rn, imm5);
5802 break;
5803 case 3:
5804 if (is_q) {
5805 /* INS (general) */
5806 handle_simd_insg(s, rd, rn, imm5);
5807 } else {
5808 unallocated_encoding(s);
5810 break;
5811 case 5:
5812 case 7:
5813 /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
5814 handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
5815 break;
5816 default:
5817 unallocated_encoding(s);
5818 break;
5823 /* C3.6.6 AdvSIMD modified immediate
5824 * 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0
5825 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5826 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh | Rd |
5827 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5829 * There are a number of operations that can be carried out here:
5830 * MOVI - move (shifted) imm into register
5831 * MVNI - move inverted (shifted) imm into register
5832 * ORR - bitwise OR of (shifted) imm with register
5833 * BIC - bitwise clear of (shifted) imm with register
5835 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
5837 int rd = extract32(insn, 0, 5);
5838 int cmode = extract32(insn, 12, 4);
5839 int cmode_3_1 = extract32(cmode, 1, 3);
5840 int cmode_0 = extract32(cmode, 0, 1);
5841 int o2 = extract32(insn, 11, 1);
5842 uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
5843 bool is_neg = extract32(insn, 29, 1);
5844 bool is_q = extract32(insn, 30, 1);
5845 uint64_t imm = 0;
5846 TCGv_i64 tcg_rd, tcg_imm;
5847 int i;
5849 if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
5850 unallocated_encoding(s);
5851 return;
5854 if (!fp_access_check(s)) {
5855 return;
5858 /* See AdvSIMDExpandImm() in ARM ARM */
5859 switch (cmode_3_1) {
5860 case 0: /* Replicate(Zeros(24):imm8, 2) */
5861 case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
5862 case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
5863 case 3: /* Replicate(imm8:Zeros(24), 2) */
5865 int shift = cmode_3_1 * 8;
5866 imm = bitfield_replicate(abcdefgh << shift, 32);
5867 break;
5869 case 4: /* Replicate(Zeros(8):imm8, 4) */
5870 case 5: /* Replicate(imm8:Zeros(8), 4) */
5872 int shift = (cmode_3_1 & 0x1) * 8;
5873 imm = bitfield_replicate(abcdefgh << shift, 16);
5874 break;
5876 case 6:
5877 if (cmode_0) {
5878 /* Replicate(Zeros(8):imm8:Ones(16), 2) */
5879 imm = (abcdefgh << 16) | 0xffff;
5880 } else {
5881 /* Replicate(Zeros(16):imm8:Ones(8), 2) */
5882 imm = (abcdefgh << 8) | 0xff;
5884 imm = bitfield_replicate(imm, 32);
5885 break;
5886 case 7:
5887 if (!cmode_0 && !is_neg) {
5888 imm = bitfield_replicate(abcdefgh, 8);
5889 } else if (!cmode_0 && is_neg) {
5890 int i;
5891 imm = 0;
5892 for (i = 0; i < 8; i++) {
5893 if ((abcdefgh) & (1 << i)) {
5894 imm |= 0xffULL << (i * 8);
5897 } else if (cmode_0) {
5898 if (is_neg) {
5899 imm = (abcdefgh & 0x3f) << 48;
5900 if (abcdefgh & 0x80) {
5901 imm |= 0x8000000000000000ULL;
5903 if (abcdefgh & 0x40) {
5904 imm |= 0x3fc0000000000000ULL;
5905 } else {
5906 imm |= 0x4000000000000000ULL;
5908 } else {
5909 imm = (abcdefgh & 0x3f) << 19;
5910 if (abcdefgh & 0x80) {
5911 imm |= 0x80000000;
5913 if (abcdefgh & 0x40) {
5914 imm |= 0x3e000000;
5915 } else {
5916 imm |= 0x40000000;
5918 imm |= (imm << 32);
5921 break;
5924 if (cmode_3_1 != 7 && is_neg) {
5925 imm = ~imm;
5928 tcg_imm = tcg_const_i64(imm);
5929 tcg_rd = new_tmp_a64(s);
5931 for (i = 0; i < 2; i++) {
5932 int foffs = i ? fp_reg_hi_offset(s, rd) : fp_reg_offset(s, rd, MO_64);
5934 if (i == 1 && !is_q) {
5935 /* non-quad ops clear high half of vector */
5936 tcg_gen_movi_i64(tcg_rd, 0);
5937 } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
5938 tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
5939 if (is_neg) {
5940 /* AND (BIC) */
5941 tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
5942 } else {
5943 /* ORR */
5944 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
5946 } else {
5947 /* MOVI */
5948 tcg_gen_mov_i64(tcg_rd, tcg_imm);
5950 tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
5953 tcg_temp_free_i64(tcg_imm);
5956 /* C3.6.7 AdvSIMD scalar copy
5957 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0
5958 * +-----+----+-----------------+------+---+------+---+------+------+
5959 * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
5960 * +-----+----+-----------------+------+---+------+---+------+------+
5962 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
5964 int rd = extract32(insn, 0, 5);
5965 int rn = extract32(insn, 5, 5);
5966 int imm4 = extract32(insn, 11, 4);
5967 int imm5 = extract32(insn, 16, 5);
5968 int op = extract32(insn, 29, 1);
5970 if (op != 0 || imm4 != 0) {
5971 unallocated_encoding(s);
5972 return;
5975 /* DUP (element, scalar) */
5976 handle_simd_dupes(s, rd, rn, imm5);
5979 /* C3.6.8 AdvSIMD scalar pairwise
5980 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
5981 * +-----+---+-----------+------+-----------+--------+-----+------+------+
5982 * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd |
5983 * +-----+---+-----------+------+-----------+--------+-----+------+------+
5985 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
5987 int u = extract32(insn, 29, 1);
5988 int size = extract32(insn, 22, 2);
5989 int opcode = extract32(insn, 12, 5);
5990 int rn = extract32(insn, 5, 5);
5991 int rd = extract32(insn, 0, 5);
5992 TCGv_ptr fpst;
5994 /* For some ops (the FP ones), size[1] is part of the encoding.
5995 * For ADDP strictly it is not but size[1] is always 1 for valid
5996 * encodings.
5998 opcode |= (extract32(size, 1, 1) << 5);
6000 switch (opcode) {
6001 case 0x3b: /* ADDP */
6002 if (u || size != 3) {
6003 unallocated_encoding(s);
6004 return;
6006 if (!fp_access_check(s)) {
6007 return;
6010 TCGV_UNUSED_PTR(fpst);
6011 break;
6012 case 0xc: /* FMAXNMP */
6013 case 0xd: /* FADDP */
6014 case 0xf: /* FMAXP */
6015 case 0x2c: /* FMINNMP */
6016 case 0x2f: /* FMINP */
6017 /* FP op, size[0] is 32 or 64 bit */
6018 if (!u) {
6019 unallocated_encoding(s);
6020 return;
6022 if (!fp_access_check(s)) {
6023 return;
6026 size = extract32(size, 0, 1) ? 3 : 2;
6027 fpst = get_fpstatus_ptr();
6028 break;
6029 default:
6030 unallocated_encoding(s);
6031 return;
6034 if (size == 3) {
6035 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6036 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6037 TCGv_i64 tcg_res = tcg_temp_new_i64();
6039 read_vec_element(s, tcg_op1, rn, 0, MO_64);
6040 read_vec_element(s, tcg_op2, rn, 1, MO_64);
6042 switch (opcode) {
6043 case 0x3b: /* ADDP */
6044 tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
6045 break;
6046 case 0xc: /* FMAXNMP */
6047 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6048 break;
6049 case 0xd: /* FADDP */
6050 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6051 break;
6052 case 0xf: /* FMAXP */
6053 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6054 break;
6055 case 0x2c: /* FMINNMP */
6056 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6057 break;
6058 case 0x2f: /* FMINP */
6059 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6060 break;
6061 default:
6062 g_assert_not_reached();
6065 write_fp_dreg(s, rd, tcg_res);
6067 tcg_temp_free_i64(tcg_op1);
6068 tcg_temp_free_i64(tcg_op2);
6069 tcg_temp_free_i64(tcg_res);
6070 } else {
6071 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6072 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6073 TCGv_i32 tcg_res = tcg_temp_new_i32();
6075 read_vec_element_i32(s, tcg_op1, rn, 0, MO_32);
6076 read_vec_element_i32(s, tcg_op2, rn, 1, MO_32);
6078 switch (opcode) {
6079 case 0xc: /* FMAXNMP */
6080 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6081 break;
6082 case 0xd: /* FADDP */
6083 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6084 break;
6085 case 0xf: /* FMAXP */
6086 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6087 break;
6088 case 0x2c: /* FMINNMP */
6089 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6090 break;
6091 case 0x2f: /* FMINP */
6092 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6093 break;
6094 default:
6095 g_assert_not_reached();
6098 write_fp_sreg(s, rd, tcg_res);
6100 tcg_temp_free_i32(tcg_op1);
6101 tcg_temp_free_i32(tcg_op2);
6102 tcg_temp_free_i32(tcg_res);
6105 if (!TCGV_IS_UNUSED_PTR(fpst)) {
6106 tcg_temp_free_ptr(fpst);
6111 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
6113 * This code is handles the common shifting code and is used by both
6114 * the vector and scalar code.
6116 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6117 TCGv_i64 tcg_rnd, bool accumulate,
6118 bool is_u, int size, int shift)
6120 bool extended_result = false;
6121 bool round = !TCGV_IS_UNUSED_I64(tcg_rnd);
6122 int ext_lshift = 0;
6123 TCGv_i64 tcg_src_hi;
6125 if (round && size == 3) {
6126 extended_result = true;
6127 ext_lshift = 64 - shift;
6128 tcg_src_hi = tcg_temp_new_i64();
6129 } else if (shift == 64) {
6130 if (!accumulate && is_u) {
6131 /* result is zero */
6132 tcg_gen_movi_i64(tcg_res, 0);
6133 return;
6137 /* Deal with the rounding step */
6138 if (round) {
6139 if (extended_result) {
6140 TCGv_i64 tcg_zero = tcg_const_i64(0);
6141 if (!is_u) {
6142 /* take care of sign extending tcg_res */
6143 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
6144 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6145 tcg_src, tcg_src_hi,
6146 tcg_rnd, tcg_zero);
6147 } else {
6148 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6149 tcg_src, tcg_zero,
6150 tcg_rnd, tcg_zero);
6152 tcg_temp_free_i64(tcg_zero);
6153 } else {
6154 tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
6158 /* Now do the shift right */
6159 if (round && extended_result) {
6160 /* extended case, >64 bit precision required */
6161 if (ext_lshift == 0) {
6162 /* special case, only high bits matter */
6163 tcg_gen_mov_i64(tcg_src, tcg_src_hi);
6164 } else {
6165 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6166 tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
6167 tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
6169 } else {
6170 if (is_u) {
6171 if (shift == 64) {
6172 /* essentially shifting in 64 zeros */
6173 tcg_gen_movi_i64(tcg_src, 0);
6174 } else {
6175 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6177 } else {
6178 if (shift == 64) {
6179 /* effectively extending the sign-bit */
6180 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
6181 } else {
6182 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
6187 if (accumulate) {
6188 tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
6189 } else {
6190 tcg_gen_mov_i64(tcg_res, tcg_src);
6193 if (extended_result) {
6194 tcg_temp_free_i64(tcg_src_hi);
6198 /* Common SHL/SLI - Shift left with an optional insert */
6199 static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6200 bool insert, int shift)
6202 if (insert) { /* SLI */
6203 tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);
6204 } else { /* SHL */
6205 tcg_gen_shli_i64(tcg_res, tcg_src, shift);
6209 /* SRI: shift right with insert */
6210 static void handle_shri_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6211 int size, int shift)
6213 int esize = 8 << size;
6215 /* shift count same as element size is valid but does nothing;
6216 * special case to avoid potential shift by 64.
6218 if (shift != esize) {
6219 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6220 tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, 0, esize - shift);
6224 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
6225 static void handle_scalar_simd_shri(DisasContext *s,
6226 bool is_u, int immh, int immb,
6227 int opcode, int rn, int rd)
6229 const int size = 3;
6230 int immhb = immh << 3 | immb;
6231 int shift = 2 * (8 << size) - immhb;
6232 bool accumulate = false;
6233 bool round = false;
6234 bool insert = false;
6235 TCGv_i64 tcg_rn;
6236 TCGv_i64 tcg_rd;
6237 TCGv_i64 tcg_round;
6239 if (!extract32(immh, 3, 1)) {
6240 unallocated_encoding(s);
6241 return;
6244 if (!fp_access_check(s)) {
6245 return;
6248 switch (opcode) {
6249 case 0x02: /* SSRA / USRA (accumulate) */
6250 accumulate = true;
6251 break;
6252 case 0x04: /* SRSHR / URSHR (rounding) */
6253 round = true;
6254 break;
6255 case 0x06: /* SRSRA / URSRA (accum + rounding) */
6256 accumulate = round = true;
6257 break;
6258 case 0x08: /* SRI */
6259 insert = true;
6260 break;
6263 if (round) {
6264 uint64_t round_const = 1ULL << (shift - 1);
6265 tcg_round = tcg_const_i64(round_const);
6266 } else {
6267 TCGV_UNUSED_I64(tcg_round);
6270 tcg_rn = read_fp_dreg(s, rn);
6271 tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6273 if (insert) {
6274 handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
6275 } else {
6276 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6277 accumulate, is_u, size, shift);
6280 write_fp_dreg(s, rd, tcg_rd);
6282 tcg_temp_free_i64(tcg_rn);
6283 tcg_temp_free_i64(tcg_rd);
6284 if (round) {
6285 tcg_temp_free_i64(tcg_round);
6289 /* SHL/SLI - Scalar shift left */
6290 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
6291 int immh, int immb, int opcode,
6292 int rn, int rd)
6294 int size = 32 - clz32(immh) - 1;
6295 int immhb = immh << 3 | immb;
6296 int shift = immhb - (8 << size);
6297 TCGv_i64 tcg_rn = new_tmp_a64(s);
6298 TCGv_i64 tcg_rd = new_tmp_a64(s);
6300 if (!extract32(immh, 3, 1)) {
6301 unallocated_encoding(s);
6302 return;
6305 if (!fp_access_check(s)) {
6306 return;
6309 tcg_rn = read_fp_dreg(s, rn);
6310 tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6312 handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
6314 write_fp_dreg(s, rd, tcg_rd);
6316 tcg_temp_free_i64(tcg_rn);
6317 tcg_temp_free_i64(tcg_rd);
6320 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
6321 * (signed/unsigned) narrowing */
6322 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
6323 bool is_u_shift, bool is_u_narrow,
6324 int immh, int immb, int opcode,
6325 int rn, int rd)
6327 int immhb = immh << 3 | immb;
6328 int size = 32 - clz32(immh) - 1;
6329 int esize = 8 << size;
6330 int shift = (2 * esize) - immhb;
6331 int elements = is_scalar ? 1 : (64 / esize);
6332 bool round = extract32(opcode, 0, 1);
6333 TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
6334 TCGv_i64 tcg_rn, tcg_rd, tcg_round;
6335 TCGv_i32 tcg_rd_narrowed;
6336 TCGv_i64 tcg_final;
6338 static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
6339 { gen_helper_neon_narrow_sat_s8,
6340 gen_helper_neon_unarrow_sat8 },
6341 { gen_helper_neon_narrow_sat_s16,
6342 gen_helper_neon_unarrow_sat16 },
6343 { gen_helper_neon_narrow_sat_s32,
6344 gen_helper_neon_unarrow_sat32 },
6345 { NULL, NULL },
6347 static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
6348 gen_helper_neon_narrow_sat_u8,
6349 gen_helper_neon_narrow_sat_u16,
6350 gen_helper_neon_narrow_sat_u32,
6351 NULL
6353 NeonGenNarrowEnvFn *narrowfn;
6355 int i;
6357 assert(size < 4);
6359 if (extract32(immh, 3, 1)) {
6360 unallocated_encoding(s);
6361 return;
6364 if (!fp_access_check(s)) {
6365 return;
6368 if (is_u_shift) {
6369 narrowfn = unsigned_narrow_fns[size];
6370 } else {
6371 narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
6374 tcg_rn = tcg_temp_new_i64();
6375 tcg_rd = tcg_temp_new_i64();
6376 tcg_rd_narrowed = tcg_temp_new_i32();
6377 tcg_final = tcg_const_i64(0);
6379 if (round) {
6380 uint64_t round_const = 1ULL << (shift - 1);
6381 tcg_round = tcg_const_i64(round_const);
6382 } else {
6383 TCGV_UNUSED_I64(tcg_round);
6386 for (i = 0; i < elements; i++) {
6387 read_vec_element(s, tcg_rn, rn, i, ldop);
6388 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6389 false, is_u_shift, size+1, shift);
6390 narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
6391 tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
6392 tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
6395 if (!is_q) {
6396 clear_vec_high(s, rd);
6397 write_vec_element(s, tcg_final, rd, 0, MO_64);
6398 } else {
6399 write_vec_element(s, tcg_final, rd, 1, MO_64);
6402 if (round) {
6403 tcg_temp_free_i64(tcg_round);
6405 tcg_temp_free_i64(tcg_rn);
6406 tcg_temp_free_i64(tcg_rd);
6407 tcg_temp_free_i32(tcg_rd_narrowed);
6408 tcg_temp_free_i64(tcg_final);
6409 return;
6412 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
6413 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
6414 bool src_unsigned, bool dst_unsigned,
6415 int immh, int immb, int rn, int rd)
6417 int immhb = immh << 3 | immb;
6418 int size = 32 - clz32(immh) - 1;
6419 int shift = immhb - (8 << size);
6420 int pass;
6422 assert(immh != 0);
6423 assert(!(scalar && is_q));
6425 if (!scalar) {
6426 if (!is_q && extract32(immh, 3, 1)) {
6427 unallocated_encoding(s);
6428 return;
6431 /* Since we use the variable-shift helpers we must
6432 * replicate the shift count into each element of
6433 * the tcg_shift value.
6435 switch (size) {
6436 case 0:
6437 shift |= shift << 8;
6438 /* fall through */
6439 case 1:
6440 shift |= shift << 16;
6441 break;
6442 case 2:
6443 case 3:
6444 break;
6445 default:
6446 g_assert_not_reached();
6450 if (!fp_access_check(s)) {
6451 return;
6454 if (size == 3) {
6455 TCGv_i64 tcg_shift = tcg_const_i64(shift);
6456 static NeonGenTwo64OpEnvFn * const fns[2][2] = {
6457 { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
6458 { NULL, gen_helper_neon_qshl_u64 },
6460 NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
6461 int maxpass = is_q ? 2 : 1;
6463 for (pass = 0; pass < maxpass; pass++) {
6464 TCGv_i64 tcg_op = tcg_temp_new_i64();
6466 read_vec_element(s, tcg_op, rn, pass, MO_64);
6467 genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6468 write_vec_element(s, tcg_op, rd, pass, MO_64);
6470 tcg_temp_free_i64(tcg_op);
6472 tcg_temp_free_i64(tcg_shift);
6474 if (!is_q) {
6475 clear_vec_high(s, rd);
6477 } else {
6478 TCGv_i32 tcg_shift = tcg_const_i32(shift);
6479 static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
6481 { gen_helper_neon_qshl_s8,
6482 gen_helper_neon_qshl_s16,
6483 gen_helper_neon_qshl_s32 },
6484 { gen_helper_neon_qshlu_s8,
6485 gen_helper_neon_qshlu_s16,
6486 gen_helper_neon_qshlu_s32 }
6487 }, {
6488 { NULL, NULL, NULL },
6489 { gen_helper_neon_qshl_u8,
6490 gen_helper_neon_qshl_u16,
6491 gen_helper_neon_qshl_u32 }
6494 NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
6495 TCGMemOp memop = scalar ? size : MO_32;
6496 int maxpass = scalar ? 1 : is_q ? 4 : 2;
6498 for (pass = 0; pass < maxpass; pass++) {
6499 TCGv_i32 tcg_op = tcg_temp_new_i32();
6501 read_vec_element_i32(s, tcg_op, rn, pass, memop);
6502 genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6503 if (scalar) {
6504 switch (size) {
6505 case 0:
6506 tcg_gen_ext8u_i32(tcg_op, tcg_op);
6507 break;
6508 case 1:
6509 tcg_gen_ext16u_i32(tcg_op, tcg_op);
6510 break;
6511 case 2:
6512 break;
6513 default:
6514 g_assert_not_reached();
6516 write_fp_sreg(s, rd, tcg_op);
6517 } else {
6518 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6521 tcg_temp_free_i32(tcg_op);
6523 tcg_temp_free_i32(tcg_shift);
6525 if (!is_q && !scalar) {
6526 clear_vec_high(s, rd);
6531 /* Common vector code for handling integer to FP conversion */
6532 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
6533 int elements, int is_signed,
6534 int fracbits, int size)
6536 bool is_double = size == 3 ? true : false;
6537 TCGv_ptr tcg_fpst = get_fpstatus_ptr();
6538 TCGv_i32 tcg_shift = tcg_const_i32(fracbits);
6539 TCGv_i64 tcg_int = tcg_temp_new_i64();
6540 TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
6541 int pass;
6543 for (pass = 0; pass < elements; pass++) {
6544 read_vec_element(s, tcg_int, rn, pass, mop);
6546 if (is_double) {
6547 TCGv_i64 tcg_double = tcg_temp_new_i64();
6548 if (is_signed) {
6549 gen_helper_vfp_sqtod(tcg_double, tcg_int,
6550 tcg_shift, tcg_fpst);
6551 } else {
6552 gen_helper_vfp_uqtod(tcg_double, tcg_int,
6553 tcg_shift, tcg_fpst);
6555 if (elements == 1) {
6556 write_fp_dreg(s, rd, tcg_double);
6557 } else {
6558 write_vec_element(s, tcg_double, rd, pass, MO_64);
6560 tcg_temp_free_i64(tcg_double);
6561 } else {
6562 TCGv_i32 tcg_single = tcg_temp_new_i32();
6563 if (is_signed) {
6564 gen_helper_vfp_sqtos(tcg_single, tcg_int,
6565 tcg_shift, tcg_fpst);
6566 } else {
6567 gen_helper_vfp_uqtos(tcg_single, tcg_int,
6568 tcg_shift, tcg_fpst);
6570 if (elements == 1) {
6571 write_fp_sreg(s, rd, tcg_single);
6572 } else {
6573 write_vec_element_i32(s, tcg_single, rd, pass, MO_32);
6575 tcg_temp_free_i32(tcg_single);
6579 if (!is_double && elements == 2) {
6580 clear_vec_high(s, rd);
6583 tcg_temp_free_i64(tcg_int);
6584 tcg_temp_free_ptr(tcg_fpst);
6585 tcg_temp_free_i32(tcg_shift);
6588 /* UCVTF/SCVTF - Integer to FP conversion */
6589 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
6590 bool is_q, bool is_u,
6591 int immh, int immb, int opcode,
6592 int rn, int rd)
6594 bool is_double = extract32(immh, 3, 1);
6595 int size = is_double ? MO_64 : MO_32;
6596 int elements;
6597 int immhb = immh << 3 | immb;
6598 int fracbits = (is_double ? 128 : 64) - immhb;
6600 if (!extract32(immh, 2, 2)) {
6601 unallocated_encoding(s);
6602 return;
6605 if (is_scalar) {
6606 elements = 1;
6607 } else {
6608 elements = is_double ? 2 : is_q ? 4 : 2;
6609 if (is_double && !is_q) {
6610 unallocated_encoding(s);
6611 return;
6615 if (!fp_access_check(s)) {
6616 return;
6619 /* immh == 0 would be a failure of the decode logic */
6620 g_assert(immh);
6622 handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
6625 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
6626 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
6627 bool is_q, bool is_u,
6628 int immh, int immb, int rn, int rd)
6630 bool is_double = extract32(immh, 3, 1);
6631 int immhb = immh << 3 | immb;
6632 int fracbits = (is_double ? 128 : 64) - immhb;
6633 int pass;
6634 TCGv_ptr tcg_fpstatus;
6635 TCGv_i32 tcg_rmode, tcg_shift;
6637 if (!extract32(immh, 2, 2)) {
6638 unallocated_encoding(s);
6639 return;
6642 if (!is_scalar && !is_q && is_double) {
6643 unallocated_encoding(s);
6644 return;
6647 if (!fp_access_check(s)) {
6648 return;
6651 assert(!(is_scalar && is_q));
6653 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
6654 gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6655 tcg_fpstatus = get_fpstatus_ptr();
6656 tcg_shift = tcg_const_i32(fracbits);
6658 if (is_double) {
6659 int maxpass = is_scalar ? 1 : 2;
6661 for (pass = 0; pass < maxpass; pass++) {
6662 TCGv_i64 tcg_op = tcg_temp_new_i64();
6664 read_vec_element(s, tcg_op, rn, pass, MO_64);
6665 if (is_u) {
6666 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6667 } else {
6668 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6670 write_vec_element(s, tcg_op, rd, pass, MO_64);
6671 tcg_temp_free_i64(tcg_op);
6673 if (!is_q) {
6674 clear_vec_high(s, rd);
6676 } else {
6677 int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
6678 for (pass = 0; pass < maxpass; pass++) {
6679 TCGv_i32 tcg_op = tcg_temp_new_i32();
6681 read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
6682 if (is_u) {
6683 gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6684 } else {
6685 gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6687 if (is_scalar) {
6688 write_fp_sreg(s, rd, tcg_op);
6689 } else {
6690 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6692 tcg_temp_free_i32(tcg_op);
6694 if (!is_q && !is_scalar) {
6695 clear_vec_high(s, rd);
6699 tcg_temp_free_ptr(tcg_fpstatus);
6700 tcg_temp_free_i32(tcg_shift);
6701 gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6702 tcg_temp_free_i32(tcg_rmode);
6705 /* C3.6.9 AdvSIMD scalar shift by immediate
6706 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0
6707 * +-----+---+-------------+------+------+--------+---+------+------+
6708 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd |
6709 * +-----+---+-------------+------+------+--------+---+------+------+
6711 * This is the scalar version so it works on a fixed sized registers
6713 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
6715 int rd = extract32(insn, 0, 5);
6716 int rn = extract32(insn, 5, 5);
6717 int opcode = extract32(insn, 11, 5);
6718 int immb = extract32(insn, 16, 3);
6719 int immh = extract32(insn, 19, 4);
6720 bool is_u = extract32(insn, 29, 1);
6722 if (immh == 0) {
6723 unallocated_encoding(s);
6724 return;
6727 switch (opcode) {
6728 case 0x08: /* SRI */
6729 if (!is_u) {
6730 unallocated_encoding(s);
6731 return;
6733 /* fall through */
6734 case 0x00: /* SSHR / USHR */
6735 case 0x02: /* SSRA / USRA */
6736 case 0x04: /* SRSHR / URSHR */
6737 case 0x06: /* SRSRA / URSRA */
6738 handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
6739 break;
6740 case 0x0a: /* SHL / SLI */
6741 handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
6742 break;
6743 case 0x1c: /* SCVTF, UCVTF */
6744 handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
6745 opcode, rn, rd);
6746 break;
6747 case 0x10: /* SQSHRUN, SQSHRUN2 */
6748 case 0x11: /* SQRSHRUN, SQRSHRUN2 */
6749 if (!is_u) {
6750 unallocated_encoding(s);
6751 return;
6753 handle_vec_simd_sqshrn(s, true, false, false, true,
6754 immh, immb, opcode, rn, rd);
6755 break;
6756 case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
6757 case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
6758 handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
6759 immh, immb, opcode, rn, rd);
6760 break;
6761 case 0xc: /* SQSHLU */
6762 if (!is_u) {
6763 unallocated_encoding(s);
6764 return;
6766 handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
6767 break;
6768 case 0xe: /* SQSHL, UQSHL */
6769 handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
6770 break;
6771 case 0x1f: /* FCVTZS, FCVTZU */
6772 handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
6773 break;
6774 default:
6775 unallocated_encoding(s);
6776 break;
6780 /* C3.6.10 AdvSIMD scalar three different
6781 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
6782 * +-----+---+-----------+------+---+------+--------+-----+------+------+
6783 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd |
6784 * +-----+---+-----------+------+---+------+--------+-----+------+------+
6786 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
6788 bool is_u = extract32(insn, 29, 1);
6789 int size = extract32(insn, 22, 2);
6790 int opcode = extract32(insn, 12, 4);
6791 int rm = extract32(insn, 16, 5);
6792 int rn = extract32(insn, 5, 5);
6793 int rd = extract32(insn, 0, 5);
6795 if (is_u) {
6796 unallocated_encoding(s);
6797 return;
6800 switch (opcode) {
6801 case 0x9: /* SQDMLAL, SQDMLAL2 */
6802 case 0xb: /* SQDMLSL, SQDMLSL2 */
6803 case 0xd: /* SQDMULL, SQDMULL2 */
6804 if (size == 0 || size == 3) {
6805 unallocated_encoding(s);
6806 return;
6808 break;
6809 default:
6810 unallocated_encoding(s);
6811 return;
6814 if (!fp_access_check(s)) {
6815 return;
6818 if (size == 2) {
6819 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6820 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6821 TCGv_i64 tcg_res = tcg_temp_new_i64();
6823 read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
6824 read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
6826 tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
6827 gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
6829 switch (opcode) {
6830 case 0xd: /* SQDMULL, SQDMULL2 */
6831 break;
6832 case 0xb: /* SQDMLSL, SQDMLSL2 */
6833 tcg_gen_neg_i64(tcg_res, tcg_res);
6834 /* fall through */
6835 case 0x9: /* SQDMLAL, SQDMLAL2 */
6836 read_vec_element(s, tcg_op1, rd, 0, MO_64);
6837 gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
6838 tcg_res, tcg_op1);
6839 break;
6840 default:
6841 g_assert_not_reached();
6844 write_fp_dreg(s, rd, tcg_res);
6846 tcg_temp_free_i64(tcg_op1);
6847 tcg_temp_free_i64(tcg_op2);
6848 tcg_temp_free_i64(tcg_res);
6849 } else {
6850 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6851 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6852 TCGv_i64 tcg_res = tcg_temp_new_i64();
6854 read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
6855 read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
6857 gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
6858 gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
6860 switch (opcode) {
6861 case 0xd: /* SQDMULL, SQDMULL2 */
6862 break;
6863 case 0xb: /* SQDMLSL, SQDMLSL2 */
6864 gen_helper_neon_negl_u32(tcg_res, tcg_res);
6865 /* fall through */
6866 case 0x9: /* SQDMLAL, SQDMLAL2 */
6868 TCGv_i64 tcg_op3 = tcg_temp_new_i64();
6869 read_vec_element(s, tcg_op3, rd, 0, MO_32);
6870 gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
6871 tcg_res, tcg_op3);
6872 tcg_temp_free_i64(tcg_op3);
6873 break;
6875 default:
6876 g_assert_not_reached();
6879 tcg_gen_ext32u_i64(tcg_res, tcg_res);
6880 write_fp_dreg(s, rd, tcg_res);
6882 tcg_temp_free_i32(tcg_op1);
6883 tcg_temp_free_i32(tcg_op2);
6884 tcg_temp_free_i64(tcg_res);
6888 static void handle_3same_64(DisasContext *s, int opcode, bool u,
6889 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
6891 /* Handle 64x64->64 opcodes which are shared between the scalar
6892 * and vector 3-same groups. We cover every opcode where size == 3
6893 * is valid in either the three-reg-same (integer, not pairwise)
6894 * or scalar-three-reg-same groups. (Some opcodes are not yet
6895 * implemented.)
6897 TCGCond cond;
6899 switch (opcode) {
6900 case 0x1: /* SQADD */
6901 if (u) {
6902 gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6903 } else {
6904 gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6906 break;
6907 case 0x5: /* SQSUB */
6908 if (u) {
6909 gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6910 } else {
6911 gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6913 break;
6914 case 0x6: /* CMGT, CMHI */
6915 /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
6916 * We implement this using setcond (test) and then negating.
6918 cond = u ? TCG_COND_GTU : TCG_COND_GT;
6919 do_cmop:
6920 tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
6921 tcg_gen_neg_i64(tcg_rd, tcg_rd);
6922 break;
6923 case 0x7: /* CMGE, CMHS */
6924 cond = u ? TCG_COND_GEU : TCG_COND_GE;
6925 goto do_cmop;
6926 case 0x11: /* CMTST, CMEQ */
6927 if (u) {
6928 cond = TCG_COND_EQ;
6929 goto do_cmop;
6931 /* CMTST : test is "if (X & Y != 0)". */
6932 tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
6933 tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0);
6934 tcg_gen_neg_i64(tcg_rd, tcg_rd);
6935 break;
6936 case 0x8: /* SSHL, USHL */
6937 if (u) {
6938 gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
6939 } else {
6940 gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
6942 break;
6943 case 0x9: /* SQSHL, UQSHL */
6944 if (u) {
6945 gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6946 } else {
6947 gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6949 break;
6950 case 0xa: /* SRSHL, URSHL */
6951 if (u) {
6952 gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
6953 } else {
6954 gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
6956 break;
6957 case 0xb: /* SQRSHL, UQRSHL */
6958 if (u) {
6959 gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6960 } else {
6961 gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6963 break;
6964 case 0x10: /* ADD, SUB */
6965 if (u) {
6966 tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
6967 } else {
6968 tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
6970 break;
6971 default:
6972 g_assert_not_reached();
6976 /* Handle the 3-same-operands float operations; shared by the scalar
6977 * and vector encodings. The caller must filter out any encodings
6978 * not allocated for the encoding it is dealing with.
6980 static void handle_3same_float(DisasContext *s, int size, int elements,
6981 int fpopcode, int rd, int rn, int rm)
6983 int pass;
6984 TCGv_ptr fpst = get_fpstatus_ptr();
6986 for (pass = 0; pass < elements; pass++) {
6987 if (size) {
6988 /* Double */
6989 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6990 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6991 TCGv_i64 tcg_res = tcg_temp_new_i64();
6993 read_vec_element(s, tcg_op1, rn, pass, MO_64);
6994 read_vec_element(s, tcg_op2, rm, pass, MO_64);
6996 switch (fpopcode) {
6997 case 0x39: /* FMLS */
6998 /* As usual for ARM, separate negation for fused multiply-add */
6999 gen_helper_vfp_negd(tcg_op1, tcg_op1);
7000 /* fall through */
7001 case 0x19: /* FMLA */
7002 read_vec_element(s, tcg_res, rd, pass, MO_64);
7003 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
7004 tcg_res, fpst);
7005 break;
7006 case 0x18: /* FMAXNM */
7007 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7008 break;
7009 case 0x1a: /* FADD */
7010 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7011 break;
7012 case 0x1b: /* FMULX */
7013 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
7014 break;
7015 case 0x1c: /* FCMEQ */
7016 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7017 break;
7018 case 0x1e: /* FMAX */
7019 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7020 break;
7021 case 0x1f: /* FRECPS */
7022 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7023 break;
7024 case 0x38: /* FMINNM */
7025 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7026 break;
7027 case 0x3a: /* FSUB */
7028 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7029 break;
7030 case 0x3e: /* FMIN */
7031 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7032 break;
7033 case 0x3f: /* FRSQRTS */
7034 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7035 break;
7036 case 0x5b: /* FMUL */
7037 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
7038 break;
7039 case 0x5c: /* FCMGE */
7040 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7041 break;
7042 case 0x5d: /* FACGE */
7043 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7044 break;
7045 case 0x5f: /* FDIV */
7046 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
7047 break;
7048 case 0x7a: /* FABD */
7049 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7050 gen_helper_vfp_absd(tcg_res, tcg_res);
7051 break;
7052 case 0x7c: /* FCMGT */
7053 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7054 break;
7055 case 0x7d: /* FACGT */
7056 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7057 break;
7058 default:
7059 g_assert_not_reached();
7062 write_vec_element(s, tcg_res, rd, pass, MO_64);
7064 tcg_temp_free_i64(tcg_res);
7065 tcg_temp_free_i64(tcg_op1);
7066 tcg_temp_free_i64(tcg_op2);
7067 } else {
7068 /* Single */
7069 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7070 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7071 TCGv_i32 tcg_res = tcg_temp_new_i32();
7073 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
7074 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
7076 switch (fpopcode) {
7077 case 0x39: /* FMLS */
7078 /* As usual for ARM, separate negation for fused multiply-add */
7079 gen_helper_vfp_negs(tcg_op1, tcg_op1);
7080 /* fall through */
7081 case 0x19: /* FMLA */
7082 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7083 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
7084 tcg_res, fpst);
7085 break;
7086 case 0x1a: /* FADD */
7087 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7088 break;
7089 case 0x1b: /* FMULX */
7090 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
7091 break;
7092 case 0x1c: /* FCMEQ */
7093 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7094 break;
7095 case 0x1e: /* FMAX */
7096 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7097 break;
7098 case 0x1f: /* FRECPS */
7099 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7100 break;
7101 case 0x18: /* FMAXNM */
7102 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7103 break;
7104 case 0x38: /* FMINNM */
7105 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7106 break;
7107 case 0x3a: /* FSUB */
7108 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7109 break;
7110 case 0x3e: /* FMIN */
7111 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7112 break;
7113 case 0x3f: /* FRSQRTS */
7114 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7115 break;
7116 case 0x5b: /* FMUL */
7117 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
7118 break;
7119 case 0x5c: /* FCMGE */
7120 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7121 break;
7122 case 0x5d: /* FACGE */
7123 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7124 break;
7125 case 0x5f: /* FDIV */
7126 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
7127 break;
7128 case 0x7a: /* FABD */
7129 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7130 gen_helper_vfp_abss(tcg_res, tcg_res);
7131 break;
7132 case 0x7c: /* FCMGT */
7133 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7134 break;
7135 case 0x7d: /* FACGT */
7136 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7137 break;
7138 default:
7139 g_assert_not_reached();
7142 if (elements == 1) {
7143 /* scalar single so clear high part */
7144 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7146 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
7147 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
7148 tcg_temp_free_i64(tcg_tmp);
7149 } else {
7150 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7153 tcg_temp_free_i32(tcg_res);
7154 tcg_temp_free_i32(tcg_op1);
7155 tcg_temp_free_i32(tcg_op2);
7159 tcg_temp_free_ptr(fpst);
7161 if ((elements << size) < 4) {
7162 /* scalar, or non-quad vector op */
7163 clear_vec_high(s, rd);
7167 /* C3.6.11 AdvSIMD scalar three same
7168 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0
7169 * +-----+---+-----------+------+---+------+--------+---+------+------+
7170 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd |
7171 * +-----+---+-----------+------+---+------+--------+---+------+------+
7173 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
7175 int rd = extract32(insn, 0, 5);
7176 int rn = extract32(insn, 5, 5);
7177 int opcode = extract32(insn, 11, 5);
7178 int rm = extract32(insn, 16, 5);
7179 int size = extract32(insn, 22, 2);
7180 bool u = extract32(insn, 29, 1);
7181 TCGv_i64 tcg_rd;
7183 if (opcode >= 0x18) {
7184 /* Floating point: U, size[1] and opcode indicate operation */
7185 int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
7186 switch (fpopcode) {
7187 case 0x1b: /* FMULX */
7188 case 0x1f: /* FRECPS */
7189 case 0x3f: /* FRSQRTS */
7190 case 0x5d: /* FACGE */
7191 case 0x7d: /* FACGT */
7192 case 0x1c: /* FCMEQ */
7193 case 0x5c: /* FCMGE */
7194 case 0x7c: /* FCMGT */
7195 case 0x7a: /* FABD */
7196 break;
7197 default:
7198 unallocated_encoding(s);
7199 return;
7202 if (!fp_access_check(s)) {
7203 return;
7206 handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
7207 return;
7210 switch (opcode) {
7211 case 0x1: /* SQADD, UQADD */
7212 case 0x5: /* SQSUB, UQSUB */
7213 case 0x9: /* SQSHL, UQSHL */
7214 case 0xb: /* SQRSHL, UQRSHL */
7215 break;
7216 case 0x8: /* SSHL, USHL */
7217 case 0xa: /* SRSHL, URSHL */
7218 case 0x6: /* CMGT, CMHI */
7219 case 0x7: /* CMGE, CMHS */
7220 case 0x11: /* CMTST, CMEQ */
7221 case 0x10: /* ADD, SUB (vector) */
7222 if (size != 3) {
7223 unallocated_encoding(s);
7224 return;
7226 break;
7227 case 0x16: /* SQDMULH, SQRDMULH (vector) */
7228 if (size != 1 && size != 2) {
7229 unallocated_encoding(s);
7230 return;
7232 break;
7233 default:
7234 unallocated_encoding(s);
7235 return;
7238 if (!fp_access_check(s)) {
7239 return;
7242 tcg_rd = tcg_temp_new_i64();
7244 if (size == 3) {
7245 TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
7246 TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
7248 handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
7249 tcg_temp_free_i64(tcg_rn);
7250 tcg_temp_free_i64(tcg_rm);
7251 } else {
7252 /* Do a single operation on the lowest element in the vector.
7253 * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
7254 * no side effects for all these operations.
7255 * OPTME: special-purpose helpers would avoid doing some
7256 * unnecessary work in the helper for the 8 and 16 bit cases.
7258 NeonGenTwoOpEnvFn *genenvfn;
7259 TCGv_i32 tcg_rn = tcg_temp_new_i32();
7260 TCGv_i32 tcg_rm = tcg_temp_new_i32();
7261 TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
7263 read_vec_element_i32(s, tcg_rn, rn, 0, size);
7264 read_vec_element_i32(s, tcg_rm, rm, 0, size);
7266 switch (opcode) {
7267 case 0x1: /* SQADD, UQADD */
7269 static NeonGenTwoOpEnvFn * const fns[3][2] = {
7270 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
7271 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
7272 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
7274 genenvfn = fns[size][u];
7275 break;
7277 case 0x5: /* SQSUB, UQSUB */
7279 static NeonGenTwoOpEnvFn * const fns[3][2] = {
7280 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
7281 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
7282 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
7284 genenvfn = fns[size][u];
7285 break;
7287 case 0x9: /* SQSHL, UQSHL */
7289 static NeonGenTwoOpEnvFn * const fns[3][2] = {
7290 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
7291 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
7292 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
7294 genenvfn = fns[size][u];
7295 break;
7297 case 0xb: /* SQRSHL, UQRSHL */
7299 static NeonGenTwoOpEnvFn * const fns[3][2] = {
7300 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
7301 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
7302 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
7304 genenvfn = fns[size][u];
7305 break;
7307 case 0x16: /* SQDMULH, SQRDMULH */
7309 static NeonGenTwoOpEnvFn * const fns[2][2] = {
7310 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
7311 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
7313 assert(size == 1 || size == 2);
7314 genenvfn = fns[size - 1][u];
7315 break;
7317 default:
7318 g_assert_not_reached();
7321 genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
7322 tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
7323 tcg_temp_free_i32(tcg_rd32);
7324 tcg_temp_free_i32(tcg_rn);
7325 tcg_temp_free_i32(tcg_rm);
7328 write_fp_dreg(s, rd, tcg_rd);
7330 tcg_temp_free_i64(tcg_rd);
7333 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
7334 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
7335 TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
7337 /* Handle 64->64 opcodes which are shared between the scalar and
7338 * vector 2-reg-misc groups. We cover every integer opcode where size == 3
7339 * is valid in either group and also the double-precision fp ops.
7340 * The caller only need provide tcg_rmode and tcg_fpstatus if the op
7341 * requires them.
7343 TCGCond cond;
7345 switch (opcode) {
7346 case 0x4: /* CLS, CLZ */
7347 if (u) {
7348 gen_helper_clz64(tcg_rd, tcg_rn);
7349 } else {
7350 gen_helper_cls64(tcg_rd, tcg_rn);
7352 break;
7353 case 0x5: /* NOT */
7354 /* This opcode is shared with CNT and RBIT but we have earlier
7355 * enforced that size == 3 if and only if this is the NOT insn.
7357 tcg_gen_not_i64(tcg_rd, tcg_rn);
7358 break;
7359 case 0x7: /* SQABS, SQNEG */
7360 if (u) {
7361 gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
7362 } else {
7363 gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
7365 break;
7366 case 0xa: /* CMLT */
7367 /* 64 bit integer comparison against zero, result is
7368 * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
7369 * subtracting 1.
7371 cond = TCG_COND_LT;
7372 do_cmop:
7373 tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
7374 tcg_gen_neg_i64(tcg_rd, tcg_rd);
7375 break;
7376 case 0x8: /* CMGT, CMGE */
7377 cond = u ? TCG_COND_GE : TCG_COND_GT;
7378 goto do_cmop;
7379 case 0x9: /* CMEQ, CMLE */
7380 cond = u ? TCG_COND_LE : TCG_COND_EQ;
7381 goto do_cmop;
7382 case 0xb: /* ABS, NEG */
7383 if (u) {
7384 tcg_gen_neg_i64(tcg_rd, tcg_rn);
7385 } else {
7386 TCGv_i64 tcg_zero = tcg_const_i64(0);
7387 tcg_gen_neg_i64(tcg_rd, tcg_rn);
7388 tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
7389 tcg_rn, tcg_rd);
7390 tcg_temp_free_i64(tcg_zero);
7392 break;
7393 case 0x2f: /* FABS */
7394 gen_helper_vfp_absd(tcg_rd, tcg_rn);
7395 break;
7396 case 0x6f: /* FNEG */
7397 gen_helper_vfp_negd(tcg_rd, tcg_rn);
7398 break;
7399 case 0x7f: /* FSQRT */
7400 gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
7401 break;
7402 case 0x1a: /* FCVTNS */
7403 case 0x1b: /* FCVTMS */
7404 case 0x1c: /* FCVTAS */
7405 case 0x3a: /* FCVTPS */
7406 case 0x3b: /* FCVTZS */
7408 TCGv_i32 tcg_shift = tcg_const_i32(0);
7409 gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7410 tcg_temp_free_i32(tcg_shift);
7411 break;
7413 case 0x5a: /* FCVTNU */
7414 case 0x5b: /* FCVTMU */
7415 case 0x5c: /* FCVTAU */
7416 case 0x7a: /* FCVTPU */
7417 case 0x7b: /* FCVTZU */
7419 TCGv_i32 tcg_shift = tcg_const_i32(0);
7420 gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7421 tcg_temp_free_i32(tcg_shift);
7422 break;
7424 case 0x18: /* FRINTN */
7425 case 0x19: /* FRINTM */
7426 case 0x38: /* FRINTP */
7427 case 0x39: /* FRINTZ */
7428 case 0x58: /* FRINTA */
7429 case 0x79: /* FRINTI */
7430 gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
7431 break;
7432 case 0x59: /* FRINTX */
7433 gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
7434 break;
7435 default:
7436 g_assert_not_reached();
7440 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
7441 bool is_scalar, bool is_u, bool is_q,
7442 int size, int rn, int rd)
7444 bool is_double = (size == 3);
7445 TCGv_ptr fpst;
7447 if (!fp_access_check(s)) {
7448 return;
7451 fpst = get_fpstatus_ptr();
7453 if (is_double) {
7454 TCGv_i64 tcg_op = tcg_temp_new_i64();
7455 TCGv_i64 tcg_zero = tcg_const_i64(0);
7456 TCGv_i64 tcg_res = tcg_temp_new_i64();
7457 NeonGenTwoDoubleOPFn *genfn;
7458 bool swap = false;
7459 int pass;
7461 switch (opcode) {
7462 case 0x2e: /* FCMLT (zero) */
7463 swap = true;
7464 /* fallthrough */
7465 case 0x2c: /* FCMGT (zero) */
7466 genfn = gen_helper_neon_cgt_f64;
7467 break;
7468 case 0x2d: /* FCMEQ (zero) */
7469 genfn = gen_helper_neon_ceq_f64;
7470 break;
7471 case 0x6d: /* FCMLE (zero) */
7472 swap = true;
7473 /* fall through */
7474 case 0x6c: /* FCMGE (zero) */
7475 genfn = gen_helper_neon_cge_f64;
7476 break;
7477 default:
7478 g_assert_not_reached();
7481 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7482 read_vec_element(s, tcg_op, rn, pass, MO_64);
7483 if (swap) {
7484 genfn(tcg_res, tcg_zero, tcg_op, fpst);
7485 } else {
7486 genfn(tcg_res, tcg_op, tcg_zero, fpst);
7488 write_vec_element(s, tcg_res, rd, pass, MO_64);
7490 if (is_scalar) {
7491 clear_vec_high(s, rd);
7494 tcg_temp_free_i64(tcg_res);
7495 tcg_temp_free_i64(tcg_zero);
7496 tcg_temp_free_i64(tcg_op);
7497 } else {
7498 TCGv_i32 tcg_op = tcg_temp_new_i32();
7499 TCGv_i32 tcg_zero = tcg_const_i32(0);
7500 TCGv_i32 tcg_res = tcg_temp_new_i32();
7501 NeonGenTwoSingleOPFn *genfn;
7502 bool swap = false;
7503 int pass, maxpasses;
7505 switch (opcode) {
7506 case 0x2e: /* FCMLT (zero) */
7507 swap = true;
7508 /* fall through */
7509 case 0x2c: /* FCMGT (zero) */
7510 genfn = gen_helper_neon_cgt_f32;
7511 break;
7512 case 0x2d: /* FCMEQ (zero) */
7513 genfn = gen_helper_neon_ceq_f32;
7514 break;
7515 case 0x6d: /* FCMLE (zero) */
7516 swap = true;
7517 /* fall through */
7518 case 0x6c: /* FCMGE (zero) */
7519 genfn = gen_helper_neon_cge_f32;
7520 break;
7521 default:
7522 g_assert_not_reached();
7525 if (is_scalar) {
7526 maxpasses = 1;
7527 } else {
7528 maxpasses = is_q ? 4 : 2;
7531 for (pass = 0; pass < maxpasses; pass++) {
7532 read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7533 if (swap) {
7534 genfn(tcg_res, tcg_zero, tcg_op, fpst);
7535 } else {
7536 genfn(tcg_res, tcg_op, tcg_zero, fpst);
7538 if (is_scalar) {
7539 write_fp_sreg(s, rd, tcg_res);
7540 } else {
7541 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7544 tcg_temp_free_i32(tcg_res);
7545 tcg_temp_free_i32(tcg_zero);
7546 tcg_temp_free_i32(tcg_op);
7547 if (!is_q && !is_scalar) {
7548 clear_vec_high(s, rd);
7552 tcg_temp_free_ptr(fpst);
7555 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
7556 bool is_scalar, bool is_u, bool is_q,
7557 int size, int rn, int rd)
7559 bool is_double = (size == 3);
7560 TCGv_ptr fpst = get_fpstatus_ptr();
7562 if (is_double) {
7563 TCGv_i64 tcg_op = tcg_temp_new_i64();
7564 TCGv_i64 tcg_res = tcg_temp_new_i64();
7565 int pass;
7567 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7568 read_vec_element(s, tcg_op, rn, pass, MO_64);
7569 switch (opcode) {
7570 case 0x3d: /* FRECPE */
7571 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
7572 break;
7573 case 0x3f: /* FRECPX */
7574 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
7575 break;
7576 case 0x7d: /* FRSQRTE */
7577 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
7578 break;
7579 default:
7580 g_assert_not_reached();
7582 write_vec_element(s, tcg_res, rd, pass, MO_64);
7584 if (is_scalar) {
7585 clear_vec_high(s, rd);
7588 tcg_temp_free_i64(tcg_res);
7589 tcg_temp_free_i64(tcg_op);
7590 } else {
7591 TCGv_i32 tcg_op = tcg_temp_new_i32();
7592 TCGv_i32 tcg_res = tcg_temp_new_i32();
7593 int pass, maxpasses;
7595 if (is_scalar) {
7596 maxpasses = 1;
7597 } else {
7598 maxpasses = is_q ? 4 : 2;
7601 for (pass = 0; pass < maxpasses; pass++) {
7602 read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7604 switch (opcode) {
7605 case 0x3c: /* URECPE */
7606 gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
7607 break;
7608 case 0x3d: /* FRECPE */
7609 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
7610 break;
7611 case 0x3f: /* FRECPX */
7612 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
7613 break;
7614 case 0x7d: /* FRSQRTE */
7615 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
7616 break;
7617 default:
7618 g_assert_not_reached();
7621 if (is_scalar) {
7622 write_fp_sreg(s, rd, tcg_res);
7623 } else {
7624 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7627 tcg_temp_free_i32(tcg_res);
7628 tcg_temp_free_i32(tcg_op);
7629 if (!is_q && !is_scalar) {
7630 clear_vec_high(s, rd);
7633 tcg_temp_free_ptr(fpst);
7636 static void handle_2misc_narrow(DisasContext *s, bool scalar,
7637 int opcode, bool u, bool is_q,
7638 int size, int rn, int rd)
7640 /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
7641 * in the source becomes a size element in the destination).
7643 int pass;
7644 TCGv_i32 tcg_res[2];
7645 int destelt = is_q ? 2 : 0;
7646 int passes = scalar ? 1 : 2;
7648 if (scalar) {
7649 tcg_res[1] = tcg_const_i32(0);
7652 for (pass = 0; pass < passes; pass++) {
7653 TCGv_i64 tcg_op = tcg_temp_new_i64();
7654 NeonGenNarrowFn *genfn = NULL;
7655 NeonGenNarrowEnvFn *genenvfn = NULL;
7657 if (scalar) {
7658 read_vec_element(s, tcg_op, rn, pass, size + 1);
7659 } else {
7660 read_vec_element(s, tcg_op, rn, pass, MO_64);
7662 tcg_res[pass] = tcg_temp_new_i32();
7664 switch (opcode) {
7665 case 0x12: /* XTN, SQXTUN */
7667 static NeonGenNarrowFn * const xtnfns[3] = {
7668 gen_helper_neon_narrow_u8,
7669 gen_helper_neon_narrow_u16,
7670 tcg_gen_extrl_i64_i32,
7672 static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
7673 gen_helper_neon_unarrow_sat8,
7674 gen_helper_neon_unarrow_sat16,
7675 gen_helper_neon_unarrow_sat32,
7677 if (u) {
7678 genenvfn = sqxtunfns[size];
7679 } else {
7680 genfn = xtnfns[size];
7682 break;
7684 case 0x14: /* SQXTN, UQXTN */
7686 static NeonGenNarrowEnvFn * const fns[3][2] = {
7687 { gen_helper_neon_narrow_sat_s8,
7688 gen_helper_neon_narrow_sat_u8 },
7689 { gen_helper_neon_narrow_sat_s16,
7690 gen_helper_neon_narrow_sat_u16 },
7691 { gen_helper_neon_narrow_sat_s32,
7692 gen_helper_neon_narrow_sat_u32 },
7694 genenvfn = fns[size][u];
7695 break;
7697 case 0x16: /* FCVTN, FCVTN2 */
7698 /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
7699 if (size == 2) {
7700 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
7701 } else {
7702 TCGv_i32 tcg_lo = tcg_temp_new_i32();
7703 TCGv_i32 tcg_hi = tcg_temp_new_i32();
7704 tcg_gen_extrl_i64_i32(tcg_lo, tcg_op);
7705 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env);
7706 tcg_gen_shri_i64(tcg_op, tcg_op, 32);
7707 tcg_gen_extrl_i64_i32(tcg_hi, tcg_op);
7708 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env);
7709 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
7710 tcg_temp_free_i32(tcg_lo);
7711 tcg_temp_free_i32(tcg_hi);
7713 break;
7714 case 0x56: /* FCVTXN, FCVTXN2 */
7715 /* 64 bit to 32 bit float conversion
7716 * with von Neumann rounding (round to odd)
7718 assert(size == 2);
7719 gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
7720 break;
7721 default:
7722 g_assert_not_reached();
7725 if (genfn) {
7726 genfn(tcg_res[pass], tcg_op);
7727 } else if (genenvfn) {
7728 genenvfn(tcg_res[pass], cpu_env, tcg_op);
7731 tcg_temp_free_i64(tcg_op);
7734 for (pass = 0; pass < 2; pass++) {
7735 write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
7736 tcg_temp_free_i32(tcg_res[pass]);
7738 if (!is_q) {
7739 clear_vec_high(s, rd);
7743 /* Remaining saturating accumulating ops */
7744 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
7745 bool is_q, int size, int rn, int rd)
7747 bool is_double = (size == 3);
7749 if (is_double) {
7750 TCGv_i64 tcg_rn = tcg_temp_new_i64();
7751 TCGv_i64 tcg_rd = tcg_temp_new_i64();
7752 int pass;
7754 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7755 read_vec_element(s, tcg_rn, rn, pass, MO_64);
7756 read_vec_element(s, tcg_rd, rd, pass, MO_64);
7758 if (is_u) { /* USQADD */
7759 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7760 } else { /* SUQADD */
7761 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7763 write_vec_element(s, tcg_rd, rd, pass, MO_64);
7765 if (is_scalar) {
7766 clear_vec_high(s, rd);
7769 tcg_temp_free_i64(tcg_rd);
7770 tcg_temp_free_i64(tcg_rn);
7771 } else {
7772 TCGv_i32 tcg_rn = tcg_temp_new_i32();
7773 TCGv_i32 tcg_rd = tcg_temp_new_i32();
7774 int pass, maxpasses;
7776 if (is_scalar) {
7777 maxpasses = 1;
7778 } else {
7779 maxpasses = is_q ? 4 : 2;
7782 for (pass = 0; pass < maxpasses; pass++) {
7783 if (is_scalar) {
7784 read_vec_element_i32(s, tcg_rn, rn, pass, size);
7785 read_vec_element_i32(s, tcg_rd, rd, pass, size);
7786 } else {
7787 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
7788 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
7791 if (is_u) { /* USQADD */
7792 switch (size) {
7793 case 0:
7794 gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7795 break;
7796 case 1:
7797 gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7798 break;
7799 case 2:
7800 gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7801 break;
7802 default:
7803 g_assert_not_reached();
7805 } else { /* SUQADD */
7806 switch (size) {
7807 case 0:
7808 gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7809 break;
7810 case 1:
7811 gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7812 break;
7813 case 2:
7814 gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7815 break;
7816 default:
7817 g_assert_not_reached();
7821 if (is_scalar) {
7822 TCGv_i64 tcg_zero = tcg_const_i64(0);
7823 write_vec_element(s, tcg_zero, rd, 0, MO_64);
7824 tcg_temp_free_i64(tcg_zero);
7826 write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
7829 if (!is_q) {
7830 clear_vec_high(s, rd);
7833 tcg_temp_free_i32(tcg_rd);
7834 tcg_temp_free_i32(tcg_rn);
7838 /* C3.6.12 AdvSIMD scalar two reg misc
7839 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
7840 * +-----+---+-----------+------+-----------+--------+-----+------+------+
7841 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd |
7842 * +-----+---+-----------+------+-----------+--------+-----+------+------+
7844 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
7846 int rd = extract32(insn, 0, 5);
7847 int rn = extract32(insn, 5, 5);
7848 int opcode = extract32(insn, 12, 5);
7849 int size = extract32(insn, 22, 2);
7850 bool u = extract32(insn, 29, 1);
7851 bool is_fcvt = false;
7852 int rmode;
7853 TCGv_i32 tcg_rmode;
7854 TCGv_ptr tcg_fpstatus;
7856 switch (opcode) {
7857 case 0x3: /* USQADD / SUQADD*/
7858 if (!fp_access_check(s)) {
7859 return;
7861 handle_2misc_satacc(s, true, u, false, size, rn, rd);
7862 return;
7863 case 0x7: /* SQABS / SQNEG */
7864 break;
7865 case 0xa: /* CMLT */
7866 if (u) {
7867 unallocated_encoding(s);
7868 return;
7870 /* fall through */
7871 case 0x8: /* CMGT, CMGE */
7872 case 0x9: /* CMEQ, CMLE */
7873 case 0xb: /* ABS, NEG */
7874 if (size != 3) {
7875 unallocated_encoding(s);
7876 return;
7878 break;
7879 case 0x12: /* SQXTUN */
7880 if (!u) {
7881 unallocated_encoding(s);
7882 return;
7884 /* fall through */
7885 case 0x14: /* SQXTN, UQXTN */
7886 if (size == 3) {
7887 unallocated_encoding(s);
7888 return;
7890 if (!fp_access_check(s)) {
7891 return;
7893 handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
7894 return;
7895 case 0xc ... 0xf:
7896 case 0x16 ... 0x1d:
7897 case 0x1f:
7898 /* Floating point: U, size[1] and opcode indicate operation;
7899 * size[0] indicates single or double precision.
7901 opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
7902 size = extract32(size, 0, 1) ? 3 : 2;
7903 switch (opcode) {
7904 case 0x2c: /* FCMGT (zero) */
7905 case 0x2d: /* FCMEQ (zero) */
7906 case 0x2e: /* FCMLT (zero) */
7907 case 0x6c: /* FCMGE (zero) */
7908 case 0x6d: /* FCMLE (zero) */
7909 handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
7910 return;
7911 case 0x1d: /* SCVTF */
7912 case 0x5d: /* UCVTF */
7914 bool is_signed = (opcode == 0x1d);
7915 if (!fp_access_check(s)) {
7916 return;
7918 handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
7919 return;
7921 case 0x3d: /* FRECPE */
7922 case 0x3f: /* FRECPX */
7923 case 0x7d: /* FRSQRTE */
7924 if (!fp_access_check(s)) {
7925 return;
7927 handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
7928 return;
7929 case 0x1a: /* FCVTNS */
7930 case 0x1b: /* FCVTMS */
7931 case 0x3a: /* FCVTPS */
7932 case 0x3b: /* FCVTZS */
7933 case 0x5a: /* FCVTNU */
7934 case 0x5b: /* FCVTMU */
7935 case 0x7a: /* FCVTPU */
7936 case 0x7b: /* FCVTZU */
7937 is_fcvt = true;
7938 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
7939 break;
7940 case 0x1c: /* FCVTAS */
7941 case 0x5c: /* FCVTAU */
7942 /* TIEAWAY doesn't fit in the usual rounding mode encoding */
7943 is_fcvt = true;
7944 rmode = FPROUNDING_TIEAWAY;
7945 break;
7946 case 0x56: /* FCVTXN, FCVTXN2 */
7947 if (size == 2) {
7948 unallocated_encoding(s);
7949 return;
7951 if (!fp_access_check(s)) {
7952 return;
7954 handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
7955 return;
7956 default:
7957 unallocated_encoding(s);
7958 return;
7960 break;
7961 default:
7962 unallocated_encoding(s);
7963 return;
7966 if (!fp_access_check(s)) {
7967 return;
7970 if (is_fcvt) {
7971 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
7972 gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
7973 tcg_fpstatus = get_fpstatus_ptr();
7974 } else {
7975 TCGV_UNUSED_I32(tcg_rmode);
7976 TCGV_UNUSED_PTR(tcg_fpstatus);
7979 if (size == 3) {
7980 TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
7981 TCGv_i64 tcg_rd = tcg_temp_new_i64();
7983 handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
7984 write_fp_dreg(s, rd, tcg_rd);
7985 tcg_temp_free_i64(tcg_rd);
7986 tcg_temp_free_i64(tcg_rn);
7987 } else {
7988 TCGv_i32 tcg_rn = tcg_temp_new_i32();
7989 TCGv_i32 tcg_rd = tcg_temp_new_i32();
7991 read_vec_element_i32(s, tcg_rn, rn, 0, size);
7993 switch (opcode) {
7994 case 0x7: /* SQABS, SQNEG */
7996 NeonGenOneOpEnvFn *genfn;
7997 static NeonGenOneOpEnvFn * const fns[3][2] = {
7998 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
7999 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
8000 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
8002 genfn = fns[size][u];
8003 genfn(tcg_rd, cpu_env, tcg_rn);
8004 break;
8006 case 0x1a: /* FCVTNS */
8007 case 0x1b: /* FCVTMS */
8008 case 0x1c: /* FCVTAS */
8009 case 0x3a: /* FCVTPS */
8010 case 0x3b: /* FCVTZS */
8012 TCGv_i32 tcg_shift = tcg_const_i32(0);
8013 gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8014 tcg_temp_free_i32(tcg_shift);
8015 break;
8017 case 0x5a: /* FCVTNU */
8018 case 0x5b: /* FCVTMU */
8019 case 0x5c: /* FCVTAU */
8020 case 0x7a: /* FCVTPU */
8021 case 0x7b: /* FCVTZU */
8023 TCGv_i32 tcg_shift = tcg_const_i32(0);
8024 gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8025 tcg_temp_free_i32(tcg_shift);
8026 break;
8028 default:
8029 g_assert_not_reached();
8032 write_fp_sreg(s, rd, tcg_rd);
8033 tcg_temp_free_i32(tcg_rd);
8034 tcg_temp_free_i32(tcg_rn);
8037 if (is_fcvt) {
8038 gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8039 tcg_temp_free_i32(tcg_rmode);
8040 tcg_temp_free_ptr(tcg_fpstatus);
8044 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
8045 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
8046 int immh, int immb, int opcode, int rn, int rd)
8048 int size = 32 - clz32(immh) - 1;
8049 int immhb = immh << 3 | immb;
8050 int shift = 2 * (8 << size) - immhb;
8051 bool accumulate = false;
8052 bool round = false;
8053 bool insert = false;
8054 int dsize = is_q ? 128 : 64;
8055 int esize = 8 << size;
8056 int elements = dsize/esize;
8057 TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
8058 TCGv_i64 tcg_rn = new_tmp_a64(s);
8059 TCGv_i64 tcg_rd = new_tmp_a64(s);
8060 TCGv_i64 tcg_round;
8061 int i;
8063 if (extract32(immh, 3, 1) && !is_q) {
8064 unallocated_encoding(s);
8065 return;
8068 if (size > 3 && !is_q) {
8069 unallocated_encoding(s);
8070 return;
8073 if (!fp_access_check(s)) {
8074 return;
8077 switch (opcode) {
8078 case 0x02: /* SSRA / USRA (accumulate) */
8079 accumulate = true;
8080 break;
8081 case 0x04: /* SRSHR / URSHR (rounding) */
8082 round = true;
8083 break;
8084 case 0x06: /* SRSRA / URSRA (accum + rounding) */
8085 accumulate = round = true;
8086 break;
8087 case 0x08: /* SRI */
8088 insert = true;
8089 break;
8092 if (round) {
8093 uint64_t round_const = 1ULL << (shift - 1);
8094 tcg_round = tcg_const_i64(round_const);
8095 } else {
8096 TCGV_UNUSED_I64(tcg_round);
8099 for (i = 0; i < elements; i++) {
8100 read_vec_element(s, tcg_rn, rn, i, memop);
8101 if (accumulate || insert) {
8102 read_vec_element(s, tcg_rd, rd, i, memop);
8105 if (insert) {
8106 handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
8107 } else {
8108 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8109 accumulate, is_u, size, shift);
8112 write_vec_element(s, tcg_rd, rd, i, size);
8115 if (!is_q) {
8116 clear_vec_high(s, rd);
8119 if (round) {
8120 tcg_temp_free_i64(tcg_round);
8124 /* SHL/SLI - Vector shift left */
8125 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
8126 int immh, int immb, int opcode, int rn, int rd)
8128 int size = 32 - clz32(immh) - 1;
8129 int immhb = immh << 3 | immb;
8130 int shift = immhb - (8 << size);
8131 int dsize = is_q ? 128 : 64;
8132 int esize = 8 << size;
8133 int elements = dsize/esize;
8134 TCGv_i64 tcg_rn = new_tmp_a64(s);
8135 TCGv_i64 tcg_rd = new_tmp_a64(s);
8136 int i;
8138 if (extract32(immh, 3, 1) && !is_q) {
8139 unallocated_encoding(s);
8140 return;
8143 if (size > 3 && !is_q) {
8144 unallocated_encoding(s);
8145 return;
8148 if (!fp_access_check(s)) {
8149 return;
8152 for (i = 0; i < elements; i++) {
8153 read_vec_element(s, tcg_rn, rn, i, size);
8154 if (insert) {
8155 read_vec_element(s, tcg_rd, rd, i, size);
8158 handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
8160 write_vec_element(s, tcg_rd, rd, i, size);
8163 if (!is_q) {
8164 clear_vec_high(s, rd);
8168 /* USHLL/SHLL - Vector shift left with widening */
8169 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
8170 int immh, int immb, int opcode, int rn, int rd)
8172 int size = 32 - clz32(immh) - 1;
8173 int immhb = immh << 3 | immb;
8174 int shift = immhb - (8 << size);
8175 int dsize = 64;
8176 int esize = 8 << size;
8177 int elements = dsize/esize;
8178 TCGv_i64 tcg_rn = new_tmp_a64(s);
8179 TCGv_i64 tcg_rd = new_tmp_a64(s);
8180 int i;
8182 if (size >= 3) {
8183 unallocated_encoding(s);
8184 return;
8187 if (!fp_access_check(s)) {
8188 return;
8191 /* For the LL variants the store is larger than the load,
8192 * so if rd == rn we would overwrite parts of our input.
8193 * So load everything right now and use shifts in the main loop.
8195 read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
8197 for (i = 0; i < elements; i++) {
8198 tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
8199 ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
8200 tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
8201 write_vec_element(s, tcg_rd, rd, i, size + 1);
8205 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
8206 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
8207 int immh, int immb, int opcode, int rn, int rd)
8209 int immhb = immh << 3 | immb;
8210 int size = 32 - clz32(immh) - 1;
8211 int dsize = 64;
8212 int esize = 8 << size;
8213 int elements = dsize/esize;
8214 int shift = (2 * esize) - immhb;
8215 bool round = extract32(opcode, 0, 1);
8216 TCGv_i64 tcg_rn, tcg_rd, tcg_final;
8217 TCGv_i64 tcg_round;
8218 int i;
8220 if (extract32(immh, 3, 1)) {
8221 unallocated_encoding(s);
8222 return;
8225 if (!fp_access_check(s)) {
8226 return;
8229 tcg_rn = tcg_temp_new_i64();
8230 tcg_rd = tcg_temp_new_i64();
8231 tcg_final = tcg_temp_new_i64();
8232 read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
8234 if (round) {
8235 uint64_t round_const = 1ULL << (shift - 1);
8236 tcg_round = tcg_const_i64(round_const);
8237 } else {
8238 TCGV_UNUSED_I64(tcg_round);
8241 for (i = 0; i < elements; i++) {
8242 read_vec_element(s, tcg_rn, rn, i, size+1);
8243 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8244 false, true, size+1, shift);
8246 tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8249 if (!is_q) {
8250 clear_vec_high(s, rd);
8251 write_vec_element(s, tcg_final, rd, 0, MO_64);
8252 } else {
8253 write_vec_element(s, tcg_final, rd, 1, MO_64);
8256 if (round) {
8257 tcg_temp_free_i64(tcg_round);
8259 tcg_temp_free_i64(tcg_rn);
8260 tcg_temp_free_i64(tcg_rd);
8261 tcg_temp_free_i64(tcg_final);
8262 return;
8266 /* C3.6.14 AdvSIMD shift by immediate
8267 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0
8268 * +---+---+---+-------------+------+------+--------+---+------+------+
8269 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd |
8270 * +---+---+---+-------------+------+------+--------+---+------+------+
8272 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
8274 int rd = extract32(insn, 0, 5);
8275 int rn = extract32(insn, 5, 5);
8276 int opcode = extract32(insn, 11, 5);
8277 int immb = extract32(insn, 16, 3);
8278 int immh = extract32(insn, 19, 4);
8279 bool is_u = extract32(insn, 29, 1);
8280 bool is_q = extract32(insn, 30, 1);
8282 switch (opcode) {
8283 case 0x08: /* SRI */
8284 if (!is_u) {
8285 unallocated_encoding(s);
8286 return;
8288 /* fall through */
8289 case 0x00: /* SSHR / USHR */
8290 case 0x02: /* SSRA / USRA (accumulate) */
8291 case 0x04: /* SRSHR / URSHR (rounding) */
8292 case 0x06: /* SRSRA / URSRA (accum + rounding) */
8293 handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
8294 break;
8295 case 0x0a: /* SHL / SLI */
8296 handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8297 break;
8298 case 0x10: /* SHRN */
8299 case 0x11: /* RSHRN / SQRSHRUN */
8300 if (is_u) {
8301 handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
8302 opcode, rn, rd);
8303 } else {
8304 handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
8306 break;
8307 case 0x12: /* SQSHRN / UQSHRN */
8308 case 0x13: /* SQRSHRN / UQRSHRN */
8309 handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
8310 opcode, rn, rd);
8311 break;
8312 case 0x14: /* SSHLL / USHLL */
8313 handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8314 break;
8315 case 0x1c: /* SCVTF / UCVTF */
8316 handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
8317 opcode, rn, rd);
8318 break;
8319 case 0xc: /* SQSHLU */
8320 if (!is_u) {
8321 unallocated_encoding(s);
8322 return;
8324 handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
8325 break;
8326 case 0xe: /* SQSHL, UQSHL */
8327 handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
8328 break;
8329 case 0x1f: /* FCVTZS/ FCVTZU */
8330 handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
8331 return;
8332 default:
8333 unallocated_encoding(s);
8334 return;
8338 /* Generate code to do a "long" addition or subtraction, ie one done in
8339 * TCGv_i64 on vector lanes twice the width specified by size.
8341 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
8342 TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
8344 static NeonGenTwo64OpFn * const fns[3][2] = {
8345 { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
8346 { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
8347 { tcg_gen_add_i64, tcg_gen_sub_i64 },
8349 NeonGenTwo64OpFn *genfn;
8350 assert(size < 3);
8352 genfn = fns[size][is_sub];
8353 genfn(tcg_res, tcg_op1, tcg_op2);
8356 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
8357 int opcode, int rd, int rn, int rm)
8359 /* 3-reg-different widening insns: 64 x 64 -> 128 */
8360 TCGv_i64 tcg_res[2];
8361 int pass, accop;
8363 tcg_res[0] = tcg_temp_new_i64();
8364 tcg_res[1] = tcg_temp_new_i64();
8366 /* Does this op do an adding accumulate, a subtracting accumulate,
8367 * or no accumulate at all?
8369 switch (opcode) {
8370 case 5:
8371 case 8:
8372 case 9:
8373 accop = 1;
8374 break;
8375 case 10:
8376 case 11:
8377 accop = -1;
8378 break;
8379 default:
8380 accop = 0;
8381 break;
8384 if (accop != 0) {
8385 read_vec_element(s, tcg_res[0], rd, 0, MO_64);
8386 read_vec_element(s, tcg_res[1], rd, 1, MO_64);
8389 /* size == 2 means two 32x32->64 operations; this is worth special
8390 * casing because we can generally handle it inline.
8392 if (size == 2) {
8393 for (pass = 0; pass < 2; pass++) {
8394 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8395 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8396 TCGv_i64 tcg_passres;
8397 TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
8399 int elt = pass + is_q * 2;
8401 read_vec_element(s, tcg_op1, rn, elt, memop);
8402 read_vec_element(s, tcg_op2, rm, elt, memop);
8404 if (accop == 0) {
8405 tcg_passres = tcg_res[pass];
8406 } else {
8407 tcg_passres = tcg_temp_new_i64();
8410 switch (opcode) {
8411 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8412 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
8413 break;
8414 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8415 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
8416 break;
8417 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8418 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8420 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
8421 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
8423 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
8424 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
8425 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
8426 tcg_passres,
8427 tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
8428 tcg_temp_free_i64(tcg_tmp1);
8429 tcg_temp_free_i64(tcg_tmp2);
8430 break;
8432 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8433 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8434 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8435 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8436 break;
8437 case 9: /* SQDMLAL, SQDMLAL2 */
8438 case 11: /* SQDMLSL, SQDMLSL2 */
8439 case 13: /* SQDMULL, SQDMULL2 */
8440 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8441 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
8442 tcg_passres, tcg_passres);
8443 break;
8444 default:
8445 g_assert_not_reached();
8448 if (opcode == 9 || opcode == 11) {
8449 /* saturating accumulate ops */
8450 if (accop < 0) {
8451 tcg_gen_neg_i64(tcg_passres, tcg_passres);
8453 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
8454 tcg_res[pass], tcg_passres);
8455 } else if (accop > 0) {
8456 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8457 } else if (accop < 0) {
8458 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8461 if (accop != 0) {
8462 tcg_temp_free_i64(tcg_passres);
8465 tcg_temp_free_i64(tcg_op1);
8466 tcg_temp_free_i64(tcg_op2);
8468 } else {
8469 /* size 0 or 1, generally helper functions */
8470 for (pass = 0; pass < 2; pass++) {
8471 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8472 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8473 TCGv_i64 tcg_passres;
8474 int elt = pass + is_q * 2;
8476 read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
8477 read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
8479 if (accop == 0) {
8480 tcg_passres = tcg_res[pass];
8481 } else {
8482 tcg_passres = tcg_temp_new_i64();
8485 switch (opcode) {
8486 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8487 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8489 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
8490 static NeonGenWidenFn * const widenfns[2][2] = {
8491 { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8492 { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8494 NeonGenWidenFn *widenfn = widenfns[size][is_u];
8496 widenfn(tcg_op2_64, tcg_op2);
8497 widenfn(tcg_passres, tcg_op1);
8498 gen_neon_addl(size, (opcode == 2), tcg_passres,
8499 tcg_passres, tcg_op2_64);
8500 tcg_temp_free_i64(tcg_op2_64);
8501 break;
8503 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8504 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8505 if (size == 0) {
8506 if (is_u) {
8507 gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
8508 } else {
8509 gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
8511 } else {
8512 if (is_u) {
8513 gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
8514 } else {
8515 gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
8518 break;
8519 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8520 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8521 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8522 if (size == 0) {
8523 if (is_u) {
8524 gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
8525 } else {
8526 gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
8528 } else {
8529 if (is_u) {
8530 gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
8531 } else {
8532 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8535 break;
8536 case 9: /* SQDMLAL, SQDMLAL2 */
8537 case 11: /* SQDMLSL, SQDMLSL2 */
8538 case 13: /* SQDMULL, SQDMULL2 */
8539 assert(size == 1);
8540 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8541 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
8542 tcg_passres, tcg_passres);
8543 break;
8544 case 14: /* PMULL */
8545 assert(size == 0);
8546 gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
8547 break;
8548 default:
8549 g_assert_not_reached();
8551 tcg_temp_free_i32(tcg_op1);
8552 tcg_temp_free_i32(tcg_op2);
8554 if (accop != 0) {
8555 if (opcode == 9 || opcode == 11) {
8556 /* saturating accumulate ops */
8557 if (accop < 0) {
8558 gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
8560 gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
8561 tcg_res[pass],
8562 tcg_passres);
8563 } else {
8564 gen_neon_addl(size, (accop < 0), tcg_res[pass],
8565 tcg_res[pass], tcg_passres);
8567 tcg_temp_free_i64(tcg_passres);
8572 write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8573 write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8574 tcg_temp_free_i64(tcg_res[0]);
8575 tcg_temp_free_i64(tcg_res[1]);
8578 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
8579 int opcode, int rd, int rn, int rm)
8581 TCGv_i64 tcg_res[2];
8582 int part = is_q ? 2 : 0;
8583 int pass;
8585 for (pass = 0; pass < 2; pass++) {
8586 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8587 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8588 TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
8589 static NeonGenWidenFn * const widenfns[3][2] = {
8590 { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8591 { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8592 { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
8594 NeonGenWidenFn *widenfn = widenfns[size][is_u];
8596 read_vec_element(s, tcg_op1, rn, pass, MO_64);
8597 read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
8598 widenfn(tcg_op2_wide, tcg_op2);
8599 tcg_temp_free_i32(tcg_op2);
8600 tcg_res[pass] = tcg_temp_new_i64();
8601 gen_neon_addl(size, (opcode == 3),
8602 tcg_res[pass], tcg_op1, tcg_op2_wide);
8603 tcg_temp_free_i64(tcg_op1);
8604 tcg_temp_free_i64(tcg_op2_wide);
8607 for (pass = 0; pass < 2; pass++) {
8608 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8609 tcg_temp_free_i64(tcg_res[pass]);
8613 static void do_narrow_high_u32(TCGv_i32 res, TCGv_i64 in)
8615 tcg_gen_shri_i64(in, in, 32);
8616 tcg_gen_extrl_i64_i32(res, in);
8619 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
8621 tcg_gen_addi_i64(in, in, 1U << 31);
8622 do_narrow_high_u32(res, in);
8625 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
8626 int opcode, int rd, int rn, int rm)
8628 TCGv_i32 tcg_res[2];
8629 int part = is_q ? 2 : 0;
8630 int pass;
8632 for (pass = 0; pass < 2; pass++) {
8633 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8634 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8635 TCGv_i64 tcg_wideres = tcg_temp_new_i64();
8636 static NeonGenNarrowFn * const narrowfns[3][2] = {
8637 { gen_helper_neon_narrow_high_u8,
8638 gen_helper_neon_narrow_round_high_u8 },
8639 { gen_helper_neon_narrow_high_u16,
8640 gen_helper_neon_narrow_round_high_u16 },
8641 { do_narrow_high_u32, do_narrow_round_high_u32 },
8643 NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
8645 read_vec_element(s, tcg_op1, rn, pass, MO_64);
8646 read_vec_element(s, tcg_op2, rm, pass, MO_64);
8648 gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
8650 tcg_temp_free_i64(tcg_op1);
8651 tcg_temp_free_i64(tcg_op2);
8653 tcg_res[pass] = tcg_temp_new_i32();
8654 gennarrow(tcg_res[pass], tcg_wideres);
8655 tcg_temp_free_i64(tcg_wideres);
8658 for (pass = 0; pass < 2; pass++) {
8659 write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
8660 tcg_temp_free_i32(tcg_res[pass]);
8662 if (!is_q) {
8663 clear_vec_high(s, rd);
8667 static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
8669 /* PMULL of 64 x 64 -> 128 is an odd special case because it
8670 * is the only three-reg-diff instruction which produces a
8671 * 128-bit wide result from a single operation. However since
8672 * it's possible to calculate the two halves more or less
8673 * separately we just use two helper calls.
8675 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8676 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8677 TCGv_i64 tcg_res = tcg_temp_new_i64();
8679 read_vec_element(s, tcg_op1, rn, is_q, MO_64);
8680 read_vec_element(s, tcg_op2, rm, is_q, MO_64);
8681 gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
8682 write_vec_element(s, tcg_res, rd, 0, MO_64);
8683 gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
8684 write_vec_element(s, tcg_res, rd, 1, MO_64);
8686 tcg_temp_free_i64(tcg_op1);
8687 tcg_temp_free_i64(tcg_op2);
8688 tcg_temp_free_i64(tcg_res);
8691 /* C3.6.15 AdvSIMD three different
8692 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
8693 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8694 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd |
8695 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8697 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
8699 /* Instructions in this group fall into three basic classes
8700 * (in each case with the operation working on each element in
8701 * the input vectors):
8702 * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
8703 * 128 bit input)
8704 * (2) wide 64 x 128 -> 128
8705 * (3) narrowing 128 x 128 -> 64
8706 * Here we do initial decode, catch unallocated cases and
8707 * dispatch to separate functions for each class.
8709 int is_q = extract32(insn, 30, 1);
8710 int is_u = extract32(insn, 29, 1);
8711 int size = extract32(insn, 22, 2);
8712 int opcode = extract32(insn, 12, 4);
8713 int rm = extract32(insn, 16, 5);
8714 int rn = extract32(insn, 5, 5);
8715 int rd = extract32(insn, 0, 5);
8717 switch (opcode) {
8718 case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
8719 case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
8720 /* 64 x 128 -> 128 */
8721 if (size == 3) {
8722 unallocated_encoding(s);
8723 return;
8725 if (!fp_access_check(s)) {
8726 return;
8728 handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
8729 break;
8730 case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
8731 case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
8732 /* 128 x 128 -> 64 */
8733 if (size == 3) {
8734 unallocated_encoding(s);
8735 return;
8737 if (!fp_access_check(s)) {
8738 return;
8740 handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
8741 break;
8742 case 14: /* PMULL, PMULL2 */
8743 if (is_u || size == 1 || size == 2) {
8744 unallocated_encoding(s);
8745 return;
8747 if (size == 3) {
8748 if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
8749 unallocated_encoding(s);
8750 return;
8752 if (!fp_access_check(s)) {
8753 return;
8755 handle_pmull_64(s, is_q, rd, rn, rm);
8756 return;
8758 goto is_widening;
8759 case 9: /* SQDMLAL, SQDMLAL2 */
8760 case 11: /* SQDMLSL, SQDMLSL2 */
8761 case 13: /* SQDMULL, SQDMULL2 */
8762 if (is_u || size == 0) {
8763 unallocated_encoding(s);
8764 return;
8766 /* fall through */
8767 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8768 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8769 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8770 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8771 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8772 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8773 case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
8774 /* 64 x 64 -> 128 */
8775 if (size == 3) {
8776 unallocated_encoding(s);
8777 return;
8779 is_widening:
8780 if (!fp_access_check(s)) {
8781 return;
8784 handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
8785 break;
8786 default:
8787 /* opcode 15 not allocated */
8788 unallocated_encoding(s);
8789 break;
8793 /* Logic op (opcode == 3) subgroup of C3.6.16. */
8794 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
8796 int rd = extract32(insn, 0, 5);
8797 int rn = extract32(insn, 5, 5);
8798 int rm = extract32(insn, 16, 5);
8799 int size = extract32(insn, 22, 2);
8800 bool is_u = extract32(insn, 29, 1);
8801 bool is_q = extract32(insn, 30, 1);
8802 TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
8803 int pass;
8805 if (!fp_access_check(s)) {
8806 return;
8809 tcg_op1 = tcg_temp_new_i64();
8810 tcg_op2 = tcg_temp_new_i64();
8811 tcg_res[0] = tcg_temp_new_i64();
8812 tcg_res[1] = tcg_temp_new_i64();
8814 for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
8815 read_vec_element(s, tcg_op1, rn, pass, MO_64);
8816 read_vec_element(s, tcg_op2, rm, pass, MO_64);
8818 if (!is_u) {
8819 switch (size) {
8820 case 0: /* AND */
8821 tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2);
8822 break;
8823 case 1: /* BIC */
8824 tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2);
8825 break;
8826 case 2: /* ORR */
8827 tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2);
8828 break;
8829 case 3: /* ORN */
8830 tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2);
8831 break;
8833 } else {
8834 if (size != 0) {
8835 /* B* ops need res loaded to operate on */
8836 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8839 switch (size) {
8840 case 0: /* EOR */
8841 tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
8842 break;
8843 case 1: /* BSL bitwise select */
8844 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_op2);
8845 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_res[pass]);
8846 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op1);
8847 break;
8848 case 2: /* BIT, bitwise insert if true */
8849 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
8850 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_op2);
8851 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
8852 break;
8853 case 3: /* BIF, bitwise insert if false */
8854 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
8855 tcg_gen_andc_i64(tcg_op1, tcg_op1, tcg_op2);
8856 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
8857 break;
8862 write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8863 if (!is_q) {
8864 tcg_gen_movi_i64(tcg_res[1], 0);
8866 write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8868 tcg_temp_free_i64(tcg_op1);
8869 tcg_temp_free_i64(tcg_op2);
8870 tcg_temp_free_i64(tcg_res[0]);
8871 tcg_temp_free_i64(tcg_res[1]);
8874 /* Helper functions for 32 bit comparisons */
8875 static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8877 tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
8880 static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8882 tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
8885 static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8887 tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
8890 static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8892 tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
8895 /* Pairwise op subgroup of C3.6.16.
8897 * This is called directly or via the handle_3same_float for float pairwise
8898 * operations where the opcode and size are calculated differently.
8900 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
8901 int size, int rn, int rm, int rd)
8903 TCGv_ptr fpst;
8904 int pass;
8906 /* Floating point operations need fpst */
8907 if (opcode >= 0x58) {
8908 fpst = get_fpstatus_ptr();
8909 } else {
8910 TCGV_UNUSED_PTR(fpst);
8913 if (!fp_access_check(s)) {
8914 return;
8917 /* These operations work on the concatenated rm:rn, with each pair of
8918 * adjacent elements being operated on to produce an element in the result.
8920 if (size == 3) {
8921 TCGv_i64 tcg_res[2];
8923 for (pass = 0; pass < 2; pass++) {
8924 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8925 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8926 int passreg = (pass == 0) ? rn : rm;
8928 read_vec_element(s, tcg_op1, passreg, 0, MO_64);
8929 read_vec_element(s, tcg_op2, passreg, 1, MO_64);
8930 tcg_res[pass] = tcg_temp_new_i64();
8932 switch (opcode) {
8933 case 0x17: /* ADDP */
8934 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
8935 break;
8936 case 0x58: /* FMAXNMP */
8937 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
8938 break;
8939 case 0x5a: /* FADDP */
8940 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
8941 break;
8942 case 0x5e: /* FMAXP */
8943 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
8944 break;
8945 case 0x78: /* FMINNMP */
8946 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
8947 break;
8948 case 0x7e: /* FMINP */
8949 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
8950 break;
8951 default:
8952 g_assert_not_reached();
8955 tcg_temp_free_i64(tcg_op1);
8956 tcg_temp_free_i64(tcg_op2);
8959 for (pass = 0; pass < 2; pass++) {
8960 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8961 tcg_temp_free_i64(tcg_res[pass]);
8963 } else {
8964 int maxpass = is_q ? 4 : 2;
8965 TCGv_i32 tcg_res[4];
8967 for (pass = 0; pass < maxpass; pass++) {
8968 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8969 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8970 NeonGenTwoOpFn *genfn = NULL;
8971 int passreg = pass < (maxpass / 2) ? rn : rm;
8972 int passelt = (is_q && (pass & 1)) ? 2 : 0;
8974 read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
8975 read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
8976 tcg_res[pass] = tcg_temp_new_i32();
8978 switch (opcode) {
8979 case 0x17: /* ADDP */
8981 static NeonGenTwoOpFn * const fns[3] = {
8982 gen_helper_neon_padd_u8,
8983 gen_helper_neon_padd_u16,
8984 tcg_gen_add_i32,
8986 genfn = fns[size];
8987 break;
8989 case 0x14: /* SMAXP, UMAXP */
8991 static NeonGenTwoOpFn * const fns[3][2] = {
8992 { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
8993 { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
8994 { gen_max_s32, gen_max_u32 },
8996 genfn = fns[size][u];
8997 break;
8999 case 0x15: /* SMINP, UMINP */
9001 static NeonGenTwoOpFn * const fns[3][2] = {
9002 { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
9003 { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
9004 { gen_min_s32, gen_min_u32 },
9006 genfn = fns[size][u];
9007 break;
9009 /* The FP operations are all on single floats (32 bit) */
9010 case 0x58: /* FMAXNMP */
9011 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9012 break;
9013 case 0x5a: /* FADDP */
9014 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9015 break;
9016 case 0x5e: /* FMAXP */
9017 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9018 break;
9019 case 0x78: /* FMINNMP */
9020 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9021 break;
9022 case 0x7e: /* FMINP */
9023 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9024 break;
9025 default:
9026 g_assert_not_reached();
9029 /* FP ops called directly, otherwise call now */
9030 if (genfn) {
9031 genfn(tcg_res[pass], tcg_op1, tcg_op2);
9034 tcg_temp_free_i32(tcg_op1);
9035 tcg_temp_free_i32(tcg_op2);
9038 for (pass = 0; pass < maxpass; pass++) {
9039 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9040 tcg_temp_free_i32(tcg_res[pass]);
9042 if (!is_q) {
9043 clear_vec_high(s, rd);
9047 if (!TCGV_IS_UNUSED_PTR(fpst)) {
9048 tcg_temp_free_ptr(fpst);
9052 /* Floating point op subgroup of C3.6.16. */
9053 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
9055 /* For floating point ops, the U, size[1] and opcode bits
9056 * together indicate the operation. size[0] indicates single
9057 * or double.
9059 int fpopcode = extract32(insn, 11, 5)
9060 | (extract32(insn, 23, 1) << 5)
9061 | (extract32(insn, 29, 1) << 6);
9062 int is_q = extract32(insn, 30, 1);
9063 int size = extract32(insn, 22, 1);
9064 int rm = extract32(insn, 16, 5);
9065 int rn = extract32(insn, 5, 5);
9066 int rd = extract32(insn, 0, 5);
9068 int datasize = is_q ? 128 : 64;
9069 int esize = 32 << size;
9070 int elements = datasize / esize;
9072 if (size == 1 && !is_q) {
9073 unallocated_encoding(s);
9074 return;
9077 switch (fpopcode) {
9078 case 0x58: /* FMAXNMP */
9079 case 0x5a: /* FADDP */
9080 case 0x5e: /* FMAXP */
9081 case 0x78: /* FMINNMP */
9082 case 0x7e: /* FMINP */
9083 if (size && !is_q) {
9084 unallocated_encoding(s);
9085 return;
9087 handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
9088 rn, rm, rd);
9089 return;
9090 case 0x1b: /* FMULX */
9091 case 0x1f: /* FRECPS */
9092 case 0x3f: /* FRSQRTS */
9093 case 0x5d: /* FACGE */
9094 case 0x7d: /* FACGT */
9095 case 0x19: /* FMLA */
9096 case 0x39: /* FMLS */
9097 case 0x18: /* FMAXNM */
9098 case 0x1a: /* FADD */
9099 case 0x1c: /* FCMEQ */
9100 case 0x1e: /* FMAX */
9101 case 0x38: /* FMINNM */
9102 case 0x3a: /* FSUB */
9103 case 0x3e: /* FMIN */
9104 case 0x5b: /* FMUL */
9105 case 0x5c: /* FCMGE */
9106 case 0x5f: /* FDIV */
9107 case 0x7a: /* FABD */
9108 case 0x7c: /* FCMGT */
9109 if (!fp_access_check(s)) {
9110 return;
9113 handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
9114 return;
9115 default:
9116 unallocated_encoding(s);
9117 return;
9121 /* Integer op subgroup of C3.6.16. */
9122 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
9124 int is_q = extract32(insn, 30, 1);
9125 int u = extract32(insn, 29, 1);
9126 int size = extract32(insn, 22, 2);
9127 int opcode = extract32(insn, 11, 5);
9128 int rm = extract32(insn, 16, 5);
9129 int rn = extract32(insn, 5, 5);
9130 int rd = extract32(insn, 0, 5);
9131 int pass;
9133 switch (opcode) {
9134 case 0x13: /* MUL, PMUL */
9135 if (u && size != 0) {
9136 unallocated_encoding(s);
9137 return;
9139 /* fall through */
9140 case 0x0: /* SHADD, UHADD */
9141 case 0x2: /* SRHADD, URHADD */
9142 case 0x4: /* SHSUB, UHSUB */
9143 case 0xc: /* SMAX, UMAX */
9144 case 0xd: /* SMIN, UMIN */
9145 case 0xe: /* SABD, UABD */
9146 case 0xf: /* SABA, UABA */
9147 case 0x12: /* MLA, MLS */
9148 if (size == 3) {
9149 unallocated_encoding(s);
9150 return;
9152 break;
9153 case 0x16: /* SQDMULH, SQRDMULH */
9154 if (size == 0 || size == 3) {
9155 unallocated_encoding(s);
9156 return;
9158 break;
9159 default:
9160 if (size == 3 && !is_q) {
9161 unallocated_encoding(s);
9162 return;
9164 break;
9167 if (!fp_access_check(s)) {
9168 return;
9171 if (size == 3) {
9172 assert(is_q);
9173 for (pass = 0; pass < 2; pass++) {
9174 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9175 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9176 TCGv_i64 tcg_res = tcg_temp_new_i64();
9178 read_vec_element(s, tcg_op1, rn, pass, MO_64);
9179 read_vec_element(s, tcg_op2, rm, pass, MO_64);
9181 handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
9183 write_vec_element(s, tcg_res, rd, pass, MO_64);
9185 tcg_temp_free_i64(tcg_res);
9186 tcg_temp_free_i64(tcg_op1);
9187 tcg_temp_free_i64(tcg_op2);
9189 } else {
9190 for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
9191 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9192 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9193 TCGv_i32 tcg_res = tcg_temp_new_i32();
9194 NeonGenTwoOpFn *genfn = NULL;
9195 NeonGenTwoOpEnvFn *genenvfn = NULL;
9197 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9198 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9200 switch (opcode) {
9201 case 0x0: /* SHADD, UHADD */
9203 static NeonGenTwoOpFn * const fns[3][2] = {
9204 { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
9205 { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
9206 { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
9208 genfn = fns[size][u];
9209 break;
9211 case 0x1: /* SQADD, UQADD */
9213 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9214 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9215 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9216 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9218 genenvfn = fns[size][u];
9219 break;
9221 case 0x2: /* SRHADD, URHADD */
9223 static NeonGenTwoOpFn * const fns[3][2] = {
9224 { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
9225 { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
9226 { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
9228 genfn = fns[size][u];
9229 break;
9231 case 0x4: /* SHSUB, UHSUB */
9233 static NeonGenTwoOpFn * const fns[3][2] = {
9234 { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
9235 { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
9236 { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
9238 genfn = fns[size][u];
9239 break;
9241 case 0x5: /* SQSUB, UQSUB */
9243 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9244 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9245 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9246 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9248 genenvfn = fns[size][u];
9249 break;
9251 case 0x6: /* CMGT, CMHI */
9253 static NeonGenTwoOpFn * const fns[3][2] = {
9254 { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
9255 { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
9256 { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
9258 genfn = fns[size][u];
9259 break;
9261 case 0x7: /* CMGE, CMHS */
9263 static NeonGenTwoOpFn * const fns[3][2] = {
9264 { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
9265 { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
9266 { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
9268 genfn = fns[size][u];
9269 break;
9271 case 0x8: /* SSHL, USHL */
9273 static NeonGenTwoOpFn * const fns[3][2] = {
9274 { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
9275 { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
9276 { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
9278 genfn = fns[size][u];
9279 break;
9281 case 0x9: /* SQSHL, UQSHL */
9283 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9284 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9285 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9286 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9288 genenvfn = fns[size][u];
9289 break;
9291 case 0xa: /* SRSHL, URSHL */
9293 static NeonGenTwoOpFn * const fns[3][2] = {
9294 { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
9295 { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
9296 { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
9298 genfn = fns[size][u];
9299 break;
9301 case 0xb: /* SQRSHL, UQRSHL */
9303 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9304 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9305 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9306 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9308 genenvfn = fns[size][u];
9309 break;
9311 case 0xc: /* SMAX, UMAX */
9313 static NeonGenTwoOpFn * const fns[3][2] = {
9314 { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
9315 { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
9316 { gen_max_s32, gen_max_u32 },
9318 genfn = fns[size][u];
9319 break;
9322 case 0xd: /* SMIN, UMIN */
9324 static NeonGenTwoOpFn * const fns[3][2] = {
9325 { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
9326 { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
9327 { gen_min_s32, gen_min_u32 },
9329 genfn = fns[size][u];
9330 break;
9332 case 0xe: /* SABD, UABD */
9333 case 0xf: /* SABA, UABA */
9335 static NeonGenTwoOpFn * const fns[3][2] = {
9336 { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
9337 { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
9338 { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
9340 genfn = fns[size][u];
9341 break;
9343 case 0x10: /* ADD, SUB */
9345 static NeonGenTwoOpFn * const fns[3][2] = {
9346 { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9347 { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9348 { tcg_gen_add_i32, tcg_gen_sub_i32 },
9350 genfn = fns[size][u];
9351 break;
9353 case 0x11: /* CMTST, CMEQ */
9355 static NeonGenTwoOpFn * const fns[3][2] = {
9356 { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 },
9357 { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 },
9358 { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 },
9360 genfn = fns[size][u];
9361 break;
9363 case 0x13: /* MUL, PMUL */
9364 if (u) {
9365 /* PMUL */
9366 assert(size == 0);
9367 genfn = gen_helper_neon_mul_p8;
9368 break;
9370 /* fall through : MUL */
9371 case 0x12: /* MLA, MLS */
9373 static NeonGenTwoOpFn * const fns[3] = {
9374 gen_helper_neon_mul_u8,
9375 gen_helper_neon_mul_u16,
9376 tcg_gen_mul_i32,
9378 genfn = fns[size];
9379 break;
9381 case 0x16: /* SQDMULH, SQRDMULH */
9383 static NeonGenTwoOpEnvFn * const fns[2][2] = {
9384 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9385 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9387 assert(size == 1 || size == 2);
9388 genenvfn = fns[size - 1][u];
9389 break;
9391 default:
9392 g_assert_not_reached();
9395 if (genenvfn) {
9396 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
9397 } else {
9398 genfn(tcg_res, tcg_op1, tcg_op2);
9401 if (opcode == 0xf || opcode == 0x12) {
9402 /* SABA, UABA, MLA, MLS: accumulating ops */
9403 static NeonGenTwoOpFn * const fns[3][2] = {
9404 { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9405 { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9406 { tcg_gen_add_i32, tcg_gen_sub_i32 },
9408 bool is_sub = (opcode == 0x12 && u); /* MLS */
9410 genfn = fns[size][is_sub];
9411 read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
9412 genfn(tcg_res, tcg_op1, tcg_res);
9415 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9417 tcg_temp_free_i32(tcg_res);
9418 tcg_temp_free_i32(tcg_op1);
9419 tcg_temp_free_i32(tcg_op2);
9423 if (!is_q) {
9424 clear_vec_high(s, rd);
9428 /* C3.6.16 AdvSIMD three same
9429 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0
9430 * +---+---+---+-----------+------+---+------+--------+---+------+------+
9431 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd |
9432 * +---+---+---+-----------+------+---+------+--------+---+------+------+
9434 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
9436 int opcode = extract32(insn, 11, 5);
9438 switch (opcode) {
9439 case 0x3: /* logic ops */
9440 disas_simd_3same_logic(s, insn);
9441 break;
9442 case 0x17: /* ADDP */
9443 case 0x14: /* SMAXP, UMAXP */
9444 case 0x15: /* SMINP, UMINP */
9446 /* Pairwise operations */
9447 int is_q = extract32(insn, 30, 1);
9448 int u = extract32(insn, 29, 1);
9449 int size = extract32(insn, 22, 2);
9450 int rm = extract32(insn, 16, 5);
9451 int rn = extract32(insn, 5, 5);
9452 int rd = extract32(insn, 0, 5);
9453 if (opcode == 0x17) {
9454 if (u || (size == 3 && !is_q)) {
9455 unallocated_encoding(s);
9456 return;
9458 } else {
9459 if (size == 3) {
9460 unallocated_encoding(s);
9461 return;
9464 handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
9465 break;
9467 case 0x18 ... 0x31:
9468 /* floating point ops, sz[1] and U are part of opcode */
9469 disas_simd_3same_float(s, insn);
9470 break;
9471 default:
9472 disas_simd_3same_int(s, insn);
9473 break;
9477 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
9478 int size, int rn, int rd)
9480 /* Handle 2-reg-misc ops which are widening (so each size element
9481 * in the source becomes a 2*size element in the destination.
9482 * The only instruction like this is FCVTL.
9484 int pass;
9486 if (size == 3) {
9487 /* 32 -> 64 bit fp conversion */
9488 TCGv_i64 tcg_res[2];
9489 int srcelt = is_q ? 2 : 0;
9491 for (pass = 0; pass < 2; pass++) {
9492 TCGv_i32 tcg_op = tcg_temp_new_i32();
9493 tcg_res[pass] = tcg_temp_new_i64();
9495 read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
9496 gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
9497 tcg_temp_free_i32(tcg_op);
9499 for (pass = 0; pass < 2; pass++) {
9500 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9501 tcg_temp_free_i64(tcg_res[pass]);
9503 } else {
9504 /* 16 -> 32 bit fp conversion */
9505 int srcelt = is_q ? 4 : 0;
9506 TCGv_i32 tcg_res[4];
9508 for (pass = 0; pass < 4; pass++) {
9509 tcg_res[pass] = tcg_temp_new_i32();
9511 read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
9512 gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
9513 cpu_env);
9515 for (pass = 0; pass < 4; pass++) {
9516 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9517 tcg_temp_free_i32(tcg_res[pass]);
9522 static void handle_rev(DisasContext *s, int opcode, bool u,
9523 bool is_q, int size, int rn, int rd)
9525 int op = (opcode << 1) | u;
9526 int opsz = op + size;
9527 int grp_size = 3 - opsz;
9528 int dsize = is_q ? 128 : 64;
9529 int i;
9531 if (opsz >= 3) {
9532 unallocated_encoding(s);
9533 return;
9536 if (!fp_access_check(s)) {
9537 return;
9540 if (size == 0) {
9541 /* Special case bytes, use bswap op on each group of elements */
9542 int groups = dsize / (8 << grp_size);
9544 for (i = 0; i < groups; i++) {
9545 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9547 read_vec_element(s, tcg_tmp, rn, i, grp_size);
9548 switch (grp_size) {
9549 case MO_16:
9550 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
9551 break;
9552 case MO_32:
9553 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
9554 break;
9555 case MO_64:
9556 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
9557 break;
9558 default:
9559 g_assert_not_reached();
9561 write_vec_element(s, tcg_tmp, rd, i, grp_size);
9562 tcg_temp_free_i64(tcg_tmp);
9564 if (!is_q) {
9565 clear_vec_high(s, rd);
9567 } else {
9568 int revmask = (1 << grp_size) - 1;
9569 int esize = 8 << size;
9570 int elements = dsize / esize;
9571 TCGv_i64 tcg_rn = tcg_temp_new_i64();
9572 TCGv_i64 tcg_rd = tcg_const_i64(0);
9573 TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
9575 for (i = 0; i < elements; i++) {
9576 int e_rev = (i & 0xf) ^ revmask;
9577 int off = e_rev * esize;
9578 read_vec_element(s, tcg_rn, rn, i, size);
9579 if (off >= 64) {
9580 tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
9581 tcg_rn, off - 64, esize);
9582 } else {
9583 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
9586 write_vec_element(s, tcg_rd, rd, 0, MO_64);
9587 write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
9589 tcg_temp_free_i64(tcg_rd_hi);
9590 tcg_temp_free_i64(tcg_rd);
9591 tcg_temp_free_i64(tcg_rn);
9595 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
9596 bool is_q, int size, int rn, int rd)
9598 /* Implement the pairwise operations from 2-misc:
9599 * SADDLP, UADDLP, SADALP, UADALP.
9600 * These all add pairs of elements in the input to produce a
9601 * double-width result element in the output (possibly accumulating).
9603 bool accum = (opcode == 0x6);
9604 int maxpass = is_q ? 2 : 1;
9605 int pass;
9606 TCGv_i64 tcg_res[2];
9608 if (size == 2) {
9609 /* 32 + 32 -> 64 op */
9610 TCGMemOp memop = size + (u ? 0 : MO_SIGN);
9612 for (pass = 0; pass < maxpass; pass++) {
9613 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9614 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9616 tcg_res[pass] = tcg_temp_new_i64();
9618 read_vec_element(s, tcg_op1, rn, pass * 2, memop);
9619 read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
9620 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9621 if (accum) {
9622 read_vec_element(s, tcg_op1, rd, pass, MO_64);
9623 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9626 tcg_temp_free_i64(tcg_op1);
9627 tcg_temp_free_i64(tcg_op2);
9629 } else {
9630 for (pass = 0; pass < maxpass; pass++) {
9631 TCGv_i64 tcg_op = tcg_temp_new_i64();
9632 NeonGenOneOpFn *genfn;
9633 static NeonGenOneOpFn * const fns[2][2] = {
9634 { gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 },
9635 { gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 },
9638 genfn = fns[size][u];
9640 tcg_res[pass] = tcg_temp_new_i64();
9642 read_vec_element(s, tcg_op, rn, pass, MO_64);
9643 genfn(tcg_res[pass], tcg_op);
9645 if (accum) {
9646 read_vec_element(s, tcg_op, rd, pass, MO_64);
9647 if (size == 0) {
9648 gen_helper_neon_addl_u16(tcg_res[pass],
9649 tcg_res[pass], tcg_op);
9650 } else {
9651 gen_helper_neon_addl_u32(tcg_res[pass],
9652 tcg_res[pass], tcg_op);
9655 tcg_temp_free_i64(tcg_op);
9658 if (!is_q) {
9659 tcg_res[1] = tcg_const_i64(0);
9661 for (pass = 0; pass < 2; pass++) {
9662 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9663 tcg_temp_free_i64(tcg_res[pass]);
9667 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
9669 /* Implement SHLL and SHLL2 */
9670 int pass;
9671 int part = is_q ? 2 : 0;
9672 TCGv_i64 tcg_res[2];
9674 for (pass = 0; pass < 2; pass++) {
9675 static NeonGenWidenFn * const widenfns[3] = {
9676 gen_helper_neon_widen_u8,
9677 gen_helper_neon_widen_u16,
9678 tcg_gen_extu_i32_i64,
9680 NeonGenWidenFn *widenfn = widenfns[size];
9681 TCGv_i32 tcg_op = tcg_temp_new_i32();
9683 read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
9684 tcg_res[pass] = tcg_temp_new_i64();
9685 widenfn(tcg_res[pass], tcg_op);
9686 tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
9688 tcg_temp_free_i32(tcg_op);
9691 for (pass = 0; pass < 2; pass++) {
9692 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9693 tcg_temp_free_i64(tcg_res[pass]);
9697 /* C3.6.17 AdvSIMD two reg misc
9698 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
9699 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9700 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd |
9701 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9703 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
9705 int size = extract32(insn, 22, 2);
9706 int opcode = extract32(insn, 12, 5);
9707 bool u = extract32(insn, 29, 1);
9708 bool is_q = extract32(insn, 30, 1);
9709 int rn = extract32(insn, 5, 5);
9710 int rd = extract32(insn, 0, 5);
9711 bool need_fpstatus = false;
9712 bool need_rmode = false;
9713 int rmode = -1;
9714 TCGv_i32 tcg_rmode;
9715 TCGv_ptr tcg_fpstatus;
9717 switch (opcode) {
9718 case 0x0: /* REV64, REV32 */
9719 case 0x1: /* REV16 */
9720 handle_rev(s, opcode, u, is_q, size, rn, rd);
9721 return;
9722 case 0x5: /* CNT, NOT, RBIT */
9723 if (u && size == 0) {
9724 /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */
9725 size = 3;
9726 break;
9727 } else if (u && size == 1) {
9728 /* RBIT */
9729 break;
9730 } else if (!u && size == 0) {
9731 /* CNT */
9732 break;
9734 unallocated_encoding(s);
9735 return;
9736 case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
9737 case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
9738 if (size == 3) {
9739 unallocated_encoding(s);
9740 return;
9742 if (!fp_access_check(s)) {
9743 return;
9746 handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
9747 return;
9748 case 0x4: /* CLS, CLZ */
9749 if (size == 3) {
9750 unallocated_encoding(s);
9751 return;
9753 break;
9754 case 0x2: /* SADDLP, UADDLP */
9755 case 0x6: /* SADALP, UADALP */
9756 if (size == 3) {
9757 unallocated_encoding(s);
9758 return;
9760 if (!fp_access_check(s)) {
9761 return;
9763 handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
9764 return;
9765 case 0x13: /* SHLL, SHLL2 */
9766 if (u == 0 || size == 3) {
9767 unallocated_encoding(s);
9768 return;
9770 if (!fp_access_check(s)) {
9771 return;
9773 handle_shll(s, is_q, size, rn, rd);
9774 return;
9775 case 0xa: /* CMLT */
9776 if (u == 1) {
9777 unallocated_encoding(s);
9778 return;
9780 /* fall through */
9781 case 0x8: /* CMGT, CMGE */
9782 case 0x9: /* CMEQ, CMLE */
9783 case 0xb: /* ABS, NEG */
9784 if (size == 3 && !is_q) {
9785 unallocated_encoding(s);
9786 return;
9788 break;
9789 case 0x3: /* SUQADD, USQADD */
9790 if (size == 3 && !is_q) {
9791 unallocated_encoding(s);
9792 return;
9794 if (!fp_access_check(s)) {
9795 return;
9797 handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
9798 return;
9799 case 0x7: /* SQABS, SQNEG */
9800 if (size == 3 && !is_q) {
9801 unallocated_encoding(s);
9802 return;
9804 break;
9805 case 0xc ... 0xf:
9806 case 0x16 ... 0x1d:
9807 case 0x1f:
9809 /* Floating point: U, size[1] and opcode indicate operation;
9810 * size[0] indicates single or double precision.
9812 int is_double = extract32(size, 0, 1);
9813 opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
9814 size = is_double ? 3 : 2;
9815 switch (opcode) {
9816 case 0x2f: /* FABS */
9817 case 0x6f: /* FNEG */
9818 if (size == 3 && !is_q) {
9819 unallocated_encoding(s);
9820 return;
9822 break;
9823 case 0x1d: /* SCVTF */
9824 case 0x5d: /* UCVTF */
9826 bool is_signed = (opcode == 0x1d) ? true : false;
9827 int elements = is_double ? 2 : is_q ? 4 : 2;
9828 if (is_double && !is_q) {
9829 unallocated_encoding(s);
9830 return;
9832 if (!fp_access_check(s)) {
9833 return;
9835 handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
9836 return;
9838 case 0x2c: /* FCMGT (zero) */
9839 case 0x2d: /* FCMEQ (zero) */
9840 case 0x2e: /* FCMLT (zero) */
9841 case 0x6c: /* FCMGE (zero) */
9842 case 0x6d: /* FCMLE (zero) */
9843 if (size == 3 && !is_q) {
9844 unallocated_encoding(s);
9845 return;
9847 handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
9848 return;
9849 case 0x7f: /* FSQRT */
9850 if (size == 3 && !is_q) {
9851 unallocated_encoding(s);
9852 return;
9854 break;
9855 case 0x1a: /* FCVTNS */
9856 case 0x1b: /* FCVTMS */
9857 case 0x3a: /* FCVTPS */
9858 case 0x3b: /* FCVTZS */
9859 case 0x5a: /* FCVTNU */
9860 case 0x5b: /* FCVTMU */
9861 case 0x7a: /* FCVTPU */
9862 case 0x7b: /* FCVTZU */
9863 need_fpstatus = true;
9864 need_rmode = true;
9865 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9866 if (size == 3 && !is_q) {
9867 unallocated_encoding(s);
9868 return;
9870 break;
9871 case 0x5c: /* FCVTAU */
9872 case 0x1c: /* FCVTAS */
9873 need_fpstatus = true;
9874 need_rmode = true;
9875 rmode = FPROUNDING_TIEAWAY;
9876 if (size == 3 && !is_q) {
9877 unallocated_encoding(s);
9878 return;
9880 break;
9881 case 0x3c: /* URECPE */
9882 if (size == 3) {
9883 unallocated_encoding(s);
9884 return;
9886 /* fall through */
9887 case 0x3d: /* FRECPE */
9888 case 0x7d: /* FRSQRTE */
9889 if (size == 3 && !is_q) {
9890 unallocated_encoding(s);
9891 return;
9893 if (!fp_access_check(s)) {
9894 return;
9896 handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
9897 return;
9898 case 0x56: /* FCVTXN, FCVTXN2 */
9899 if (size == 2) {
9900 unallocated_encoding(s);
9901 return;
9903 /* fall through */
9904 case 0x16: /* FCVTN, FCVTN2 */
9905 /* handle_2misc_narrow does a 2*size -> size operation, but these
9906 * instructions encode the source size rather than dest size.
9908 if (!fp_access_check(s)) {
9909 return;
9911 handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
9912 return;
9913 case 0x17: /* FCVTL, FCVTL2 */
9914 if (!fp_access_check(s)) {
9915 return;
9917 handle_2misc_widening(s, opcode, is_q, size, rn, rd);
9918 return;
9919 case 0x18: /* FRINTN */
9920 case 0x19: /* FRINTM */
9921 case 0x38: /* FRINTP */
9922 case 0x39: /* FRINTZ */
9923 need_rmode = true;
9924 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9925 /* fall through */
9926 case 0x59: /* FRINTX */
9927 case 0x79: /* FRINTI */
9928 need_fpstatus = true;
9929 if (size == 3 && !is_q) {
9930 unallocated_encoding(s);
9931 return;
9933 break;
9934 case 0x58: /* FRINTA */
9935 need_rmode = true;
9936 rmode = FPROUNDING_TIEAWAY;
9937 need_fpstatus = true;
9938 if (size == 3 && !is_q) {
9939 unallocated_encoding(s);
9940 return;
9942 break;
9943 case 0x7c: /* URSQRTE */
9944 if (size == 3) {
9945 unallocated_encoding(s);
9946 return;
9948 need_fpstatus = true;
9949 break;
9950 default:
9951 unallocated_encoding(s);
9952 return;
9954 break;
9956 default:
9957 unallocated_encoding(s);
9958 return;
9961 if (!fp_access_check(s)) {
9962 return;
9965 if (need_fpstatus) {
9966 tcg_fpstatus = get_fpstatus_ptr();
9967 } else {
9968 TCGV_UNUSED_PTR(tcg_fpstatus);
9970 if (need_rmode) {
9971 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
9972 gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
9973 } else {
9974 TCGV_UNUSED_I32(tcg_rmode);
9977 if (size == 3) {
9978 /* All 64-bit element operations can be shared with scalar 2misc */
9979 int pass;
9981 for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
9982 TCGv_i64 tcg_op = tcg_temp_new_i64();
9983 TCGv_i64 tcg_res = tcg_temp_new_i64();
9985 read_vec_element(s, tcg_op, rn, pass, MO_64);
9987 handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
9988 tcg_rmode, tcg_fpstatus);
9990 write_vec_element(s, tcg_res, rd, pass, MO_64);
9992 tcg_temp_free_i64(tcg_res);
9993 tcg_temp_free_i64(tcg_op);
9995 } else {
9996 int pass;
9998 for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
9999 TCGv_i32 tcg_op = tcg_temp_new_i32();
10000 TCGv_i32 tcg_res = tcg_temp_new_i32();
10001 TCGCond cond;
10003 read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
10005 if (size == 2) {
10006 /* Special cases for 32 bit elements */
10007 switch (opcode) {
10008 case 0xa: /* CMLT */
10009 /* 32 bit integer comparison against zero, result is
10010 * test ? (2^32 - 1) : 0. We implement via setcond(test)
10011 * and inverting.
10013 cond = TCG_COND_LT;
10014 do_cmop:
10015 tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
10016 tcg_gen_neg_i32(tcg_res, tcg_res);
10017 break;
10018 case 0x8: /* CMGT, CMGE */
10019 cond = u ? TCG_COND_GE : TCG_COND_GT;
10020 goto do_cmop;
10021 case 0x9: /* CMEQ, CMLE */
10022 cond = u ? TCG_COND_LE : TCG_COND_EQ;
10023 goto do_cmop;
10024 case 0x4: /* CLS */
10025 if (u) {
10026 gen_helper_clz32(tcg_res, tcg_op);
10027 } else {
10028 gen_helper_cls32(tcg_res, tcg_op);
10030 break;
10031 case 0x7: /* SQABS, SQNEG */
10032 if (u) {
10033 gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
10034 } else {
10035 gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
10037 break;
10038 case 0xb: /* ABS, NEG */
10039 if (u) {
10040 tcg_gen_neg_i32(tcg_res, tcg_op);
10041 } else {
10042 TCGv_i32 tcg_zero = tcg_const_i32(0);
10043 tcg_gen_neg_i32(tcg_res, tcg_op);
10044 tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
10045 tcg_zero, tcg_op, tcg_res);
10046 tcg_temp_free_i32(tcg_zero);
10048 break;
10049 case 0x2f: /* FABS */
10050 gen_helper_vfp_abss(tcg_res, tcg_op);
10051 break;
10052 case 0x6f: /* FNEG */
10053 gen_helper_vfp_negs(tcg_res, tcg_op);
10054 break;
10055 case 0x7f: /* FSQRT */
10056 gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
10057 break;
10058 case 0x1a: /* FCVTNS */
10059 case 0x1b: /* FCVTMS */
10060 case 0x1c: /* FCVTAS */
10061 case 0x3a: /* FCVTPS */
10062 case 0x3b: /* FCVTZS */
10064 TCGv_i32 tcg_shift = tcg_const_i32(0);
10065 gen_helper_vfp_tosls(tcg_res, tcg_op,
10066 tcg_shift, tcg_fpstatus);
10067 tcg_temp_free_i32(tcg_shift);
10068 break;
10070 case 0x5a: /* FCVTNU */
10071 case 0x5b: /* FCVTMU */
10072 case 0x5c: /* FCVTAU */
10073 case 0x7a: /* FCVTPU */
10074 case 0x7b: /* FCVTZU */
10076 TCGv_i32 tcg_shift = tcg_const_i32(0);
10077 gen_helper_vfp_touls(tcg_res, tcg_op,
10078 tcg_shift, tcg_fpstatus);
10079 tcg_temp_free_i32(tcg_shift);
10080 break;
10082 case 0x18: /* FRINTN */
10083 case 0x19: /* FRINTM */
10084 case 0x38: /* FRINTP */
10085 case 0x39: /* FRINTZ */
10086 case 0x58: /* FRINTA */
10087 case 0x79: /* FRINTI */
10088 gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
10089 break;
10090 case 0x59: /* FRINTX */
10091 gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
10092 break;
10093 case 0x7c: /* URSQRTE */
10094 gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
10095 break;
10096 default:
10097 g_assert_not_reached();
10099 } else {
10100 /* Use helpers for 8 and 16 bit elements */
10101 switch (opcode) {
10102 case 0x5: /* CNT, RBIT */
10103 /* For these two insns size is part of the opcode specifier
10104 * (handled earlier); they always operate on byte elements.
10106 if (u) {
10107 gen_helper_neon_rbit_u8(tcg_res, tcg_op);
10108 } else {
10109 gen_helper_neon_cnt_u8(tcg_res, tcg_op);
10111 break;
10112 case 0x7: /* SQABS, SQNEG */
10114 NeonGenOneOpEnvFn *genfn;
10115 static NeonGenOneOpEnvFn * const fns[2][2] = {
10116 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10117 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10119 genfn = fns[size][u];
10120 genfn(tcg_res, cpu_env, tcg_op);
10121 break;
10123 case 0x8: /* CMGT, CMGE */
10124 case 0x9: /* CMEQ, CMLE */
10125 case 0xa: /* CMLT */
10127 static NeonGenTwoOpFn * const fns[3][2] = {
10128 { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
10129 { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
10130 { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
10132 NeonGenTwoOpFn *genfn;
10133 int comp;
10134 bool reverse;
10135 TCGv_i32 tcg_zero = tcg_const_i32(0);
10137 /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
10138 comp = (opcode - 0x8) * 2 + u;
10139 /* ...but LE, LT are implemented as reverse GE, GT */
10140 reverse = (comp > 2);
10141 if (reverse) {
10142 comp = 4 - comp;
10144 genfn = fns[comp][size];
10145 if (reverse) {
10146 genfn(tcg_res, tcg_zero, tcg_op);
10147 } else {
10148 genfn(tcg_res, tcg_op, tcg_zero);
10150 tcg_temp_free_i32(tcg_zero);
10151 break;
10153 case 0xb: /* ABS, NEG */
10154 if (u) {
10155 TCGv_i32 tcg_zero = tcg_const_i32(0);
10156 if (size) {
10157 gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
10158 } else {
10159 gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
10161 tcg_temp_free_i32(tcg_zero);
10162 } else {
10163 if (size) {
10164 gen_helper_neon_abs_s16(tcg_res, tcg_op);
10165 } else {
10166 gen_helper_neon_abs_s8(tcg_res, tcg_op);
10169 break;
10170 case 0x4: /* CLS, CLZ */
10171 if (u) {
10172 if (size == 0) {
10173 gen_helper_neon_clz_u8(tcg_res, tcg_op);
10174 } else {
10175 gen_helper_neon_clz_u16(tcg_res, tcg_op);
10177 } else {
10178 if (size == 0) {
10179 gen_helper_neon_cls_s8(tcg_res, tcg_op);
10180 } else {
10181 gen_helper_neon_cls_s16(tcg_res, tcg_op);
10184 break;
10185 default:
10186 g_assert_not_reached();
10190 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10192 tcg_temp_free_i32(tcg_res);
10193 tcg_temp_free_i32(tcg_op);
10196 if (!is_q) {
10197 clear_vec_high(s, rd);
10200 if (need_rmode) {
10201 gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10202 tcg_temp_free_i32(tcg_rmode);
10204 if (need_fpstatus) {
10205 tcg_temp_free_ptr(tcg_fpstatus);
10209 /* C3.6.13 AdvSIMD scalar x indexed element
10210 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
10211 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10212 * | 0 1 | U | 1 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd |
10213 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10214 * C3.6.18 AdvSIMD vector x indexed element
10215 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
10216 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10217 * | 0 | Q | U | 0 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd |
10218 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10220 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
10222 /* This encoding has two kinds of instruction:
10223 * normal, where we perform elt x idxelt => elt for each
10224 * element in the vector
10225 * long, where we perform elt x idxelt and generate a result of
10226 * double the width of the input element
10227 * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
10229 bool is_scalar = extract32(insn, 28, 1);
10230 bool is_q = extract32(insn, 30, 1);
10231 bool u = extract32(insn, 29, 1);
10232 int size = extract32(insn, 22, 2);
10233 int l = extract32(insn, 21, 1);
10234 int m = extract32(insn, 20, 1);
10235 /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
10236 int rm = extract32(insn, 16, 4);
10237 int opcode = extract32(insn, 12, 4);
10238 int h = extract32(insn, 11, 1);
10239 int rn = extract32(insn, 5, 5);
10240 int rd = extract32(insn, 0, 5);
10241 bool is_long = false;
10242 bool is_fp = false;
10243 int index;
10244 TCGv_ptr fpst;
10246 switch (opcode) {
10247 case 0x0: /* MLA */
10248 case 0x4: /* MLS */
10249 if (!u || is_scalar) {
10250 unallocated_encoding(s);
10251 return;
10253 break;
10254 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10255 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10256 case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
10257 if (is_scalar) {
10258 unallocated_encoding(s);
10259 return;
10261 is_long = true;
10262 break;
10263 case 0x3: /* SQDMLAL, SQDMLAL2 */
10264 case 0x7: /* SQDMLSL, SQDMLSL2 */
10265 case 0xb: /* SQDMULL, SQDMULL2 */
10266 is_long = true;
10267 /* fall through */
10268 case 0xc: /* SQDMULH */
10269 case 0xd: /* SQRDMULH */
10270 if (u) {
10271 unallocated_encoding(s);
10272 return;
10274 break;
10275 case 0x8: /* MUL */
10276 if (u || is_scalar) {
10277 unallocated_encoding(s);
10278 return;
10280 break;
10281 case 0x1: /* FMLA */
10282 case 0x5: /* FMLS */
10283 if (u) {
10284 unallocated_encoding(s);
10285 return;
10287 /* fall through */
10288 case 0x9: /* FMUL, FMULX */
10289 if (!extract32(size, 1, 1)) {
10290 unallocated_encoding(s);
10291 return;
10293 is_fp = true;
10294 break;
10295 default:
10296 unallocated_encoding(s);
10297 return;
10300 if (is_fp) {
10301 /* low bit of size indicates single/double */
10302 size = extract32(size, 0, 1) ? 3 : 2;
10303 if (size == 2) {
10304 index = h << 1 | l;
10305 } else {
10306 if (l || !is_q) {
10307 unallocated_encoding(s);
10308 return;
10310 index = h;
10312 rm |= (m << 4);
10313 } else {
10314 switch (size) {
10315 case 1:
10316 index = h << 2 | l << 1 | m;
10317 break;
10318 case 2:
10319 index = h << 1 | l;
10320 rm |= (m << 4);
10321 break;
10322 default:
10323 unallocated_encoding(s);
10324 return;
10328 if (!fp_access_check(s)) {
10329 return;
10332 if (is_fp) {
10333 fpst = get_fpstatus_ptr();
10334 } else {
10335 TCGV_UNUSED_PTR(fpst);
10338 if (size == 3) {
10339 TCGv_i64 tcg_idx = tcg_temp_new_i64();
10340 int pass;
10342 assert(is_fp && is_q && !is_long);
10344 read_vec_element(s, tcg_idx, rm, index, MO_64);
10346 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10347 TCGv_i64 tcg_op = tcg_temp_new_i64();
10348 TCGv_i64 tcg_res = tcg_temp_new_i64();
10350 read_vec_element(s, tcg_op, rn, pass, MO_64);
10352 switch (opcode) {
10353 case 0x5: /* FMLS */
10354 /* As usual for ARM, separate negation for fused multiply-add */
10355 gen_helper_vfp_negd(tcg_op, tcg_op);
10356 /* fall through */
10357 case 0x1: /* FMLA */
10358 read_vec_element(s, tcg_res, rd, pass, MO_64);
10359 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10360 break;
10361 case 0x9: /* FMUL, FMULX */
10362 if (u) {
10363 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
10364 } else {
10365 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
10367 break;
10368 default:
10369 g_assert_not_reached();
10372 write_vec_element(s, tcg_res, rd, pass, MO_64);
10373 tcg_temp_free_i64(tcg_op);
10374 tcg_temp_free_i64(tcg_res);
10377 if (is_scalar) {
10378 clear_vec_high(s, rd);
10381 tcg_temp_free_i64(tcg_idx);
10382 } else if (!is_long) {
10383 /* 32 bit floating point, or 16 or 32 bit integer.
10384 * For the 16 bit scalar case we use the usual Neon helpers and
10385 * rely on the fact that 0 op 0 == 0 with no side effects.
10387 TCGv_i32 tcg_idx = tcg_temp_new_i32();
10388 int pass, maxpasses;
10390 if (is_scalar) {
10391 maxpasses = 1;
10392 } else {
10393 maxpasses = is_q ? 4 : 2;
10396 read_vec_element_i32(s, tcg_idx, rm, index, size);
10398 if (size == 1 && !is_scalar) {
10399 /* The simplest way to handle the 16x16 indexed ops is to duplicate
10400 * the index into both halves of the 32 bit tcg_idx and then use
10401 * the usual Neon helpers.
10403 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10406 for (pass = 0; pass < maxpasses; pass++) {
10407 TCGv_i32 tcg_op = tcg_temp_new_i32();
10408 TCGv_i32 tcg_res = tcg_temp_new_i32();
10410 read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
10412 switch (opcode) {
10413 case 0x0: /* MLA */
10414 case 0x4: /* MLS */
10415 case 0x8: /* MUL */
10417 static NeonGenTwoOpFn * const fns[2][2] = {
10418 { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
10419 { tcg_gen_add_i32, tcg_gen_sub_i32 },
10421 NeonGenTwoOpFn *genfn;
10422 bool is_sub = opcode == 0x4;
10424 if (size == 1) {
10425 gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
10426 } else {
10427 tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
10429 if (opcode == 0x8) {
10430 break;
10432 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
10433 genfn = fns[size - 1][is_sub];
10434 genfn(tcg_res, tcg_op, tcg_res);
10435 break;
10437 case 0x5: /* FMLS */
10438 /* As usual for ARM, separate negation for fused multiply-add */
10439 gen_helper_vfp_negs(tcg_op, tcg_op);
10440 /* fall through */
10441 case 0x1: /* FMLA */
10442 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10443 gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10444 break;
10445 case 0x9: /* FMUL, FMULX */
10446 if (u) {
10447 gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
10448 } else {
10449 gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
10451 break;
10452 case 0xc: /* SQDMULH */
10453 if (size == 1) {
10454 gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
10455 tcg_op, tcg_idx);
10456 } else {
10457 gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
10458 tcg_op, tcg_idx);
10460 break;
10461 case 0xd: /* SQRDMULH */
10462 if (size == 1) {
10463 gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
10464 tcg_op, tcg_idx);
10465 } else {
10466 gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
10467 tcg_op, tcg_idx);
10469 break;
10470 default:
10471 g_assert_not_reached();
10474 if (is_scalar) {
10475 write_fp_sreg(s, rd, tcg_res);
10476 } else {
10477 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10480 tcg_temp_free_i32(tcg_op);
10481 tcg_temp_free_i32(tcg_res);
10484 tcg_temp_free_i32(tcg_idx);
10486 if (!is_q) {
10487 clear_vec_high(s, rd);
10489 } else {
10490 /* long ops: 16x16->32 or 32x32->64 */
10491 TCGv_i64 tcg_res[2];
10492 int pass;
10493 bool satop = extract32(opcode, 0, 1);
10494 TCGMemOp memop = MO_32;
10496 if (satop || !u) {
10497 memop |= MO_SIGN;
10500 if (size == 2) {
10501 TCGv_i64 tcg_idx = tcg_temp_new_i64();
10503 read_vec_element(s, tcg_idx, rm, index, memop);
10505 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10506 TCGv_i64 tcg_op = tcg_temp_new_i64();
10507 TCGv_i64 tcg_passres;
10508 int passelt;
10510 if (is_scalar) {
10511 passelt = 0;
10512 } else {
10513 passelt = pass + (is_q * 2);
10516 read_vec_element(s, tcg_op, rn, passelt, memop);
10518 tcg_res[pass] = tcg_temp_new_i64();
10520 if (opcode == 0xa || opcode == 0xb) {
10521 /* Non-accumulating ops */
10522 tcg_passres = tcg_res[pass];
10523 } else {
10524 tcg_passres = tcg_temp_new_i64();
10527 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
10528 tcg_temp_free_i64(tcg_op);
10530 if (satop) {
10531 /* saturating, doubling */
10532 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10533 tcg_passres, tcg_passres);
10536 if (opcode == 0xa || opcode == 0xb) {
10537 continue;
10540 /* Accumulating op: handle accumulate step */
10541 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10543 switch (opcode) {
10544 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10545 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10546 break;
10547 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10548 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10549 break;
10550 case 0x7: /* SQDMLSL, SQDMLSL2 */
10551 tcg_gen_neg_i64(tcg_passres, tcg_passres);
10552 /* fall through */
10553 case 0x3: /* SQDMLAL, SQDMLAL2 */
10554 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10555 tcg_res[pass],
10556 tcg_passres);
10557 break;
10558 default:
10559 g_assert_not_reached();
10561 tcg_temp_free_i64(tcg_passres);
10563 tcg_temp_free_i64(tcg_idx);
10565 if (is_scalar) {
10566 clear_vec_high(s, rd);
10568 } else {
10569 TCGv_i32 tcg_idx = tcg_temp_new_i32();
10571 assert(size == 1);
10572 read_vec_element_i32(s, tcg_idx, rm, index, size);
10574 if (!is_scalar) {
10575 /* The simplest way to handle the 16x16 indexed ops is to
10576 * duplicate the index into both halves of the 32 bit tcg_idx
10577 * and then use the usual Neon helpers.
10579 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10582 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10583 TCGv_i32 tcg_op = tcg_temp_new_i32();
10584 TCGv_i64 tcg_passres;
10586 if (is_scalar) {
10587 read_vec_element_i32(s, tcg_op, rn, pass, size);
10588 } else {
10589 read_vec_element_i32(s, tcg_op, rn,
10590 pass + (is_q * 2), MO_32);
10593 tcg_res[pass] = tcg_temp_new_i64();
10595 if (opcode == 0xa || opcode == 0xb) {
10596 /* Non-accumulating ops */
10597 tcg_passres = tcg_res[pass];
10598 } else {
10599 tcg_passres = tcg_temp_new_i64();
10602 if (memop & MO_SIGN) {
10603 gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
10604 } else {
10605 gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
10607 if (satop) {
10608 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10609 tcg_passres, tcg_passres);
10611 tcg_temp_free_i32(tcg_op);
10613 if (opcode == 0xa || opcode == 0xb) {
10614 continue;
10617 /* Accumulating op: handle accumulate step */
10618 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10620 switch (opcode) {
10621 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10622 gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
10623 tcg_passres);
10624 break;
10625 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10626 gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
10627 tcg_passres);
10628 break;
10629 case 0x7: /* SQDMLSL, SQDMLSL2 */
10630 gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10631 /* fall through */
10632 case 0x3: /* SQDMLAL, SQDMLAL2 */
10633 gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10634 tcg_res[pass],
10635 tcg_passres);
10636 break;
10637 default:
10638 g_assert_not_reached();
10640 tcg_temp_free_i64(tcg_passres);
10642 tcg_temp_free_i32(tcg_idx);
10644 if (is_scalar) {
10645 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
10649 if (is_scalar) {
10650 tcg_res[1] = tcg_const_i64(0);
10653 for (pass = 0; pass < 2; pass++) {
10654 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10655 tcg_temp_free_i64(tcg_res[pass]);
10659 if (!TCGV_IS_UNUSED_PTR(fpst)) {
10660 tcg_temp_free_ptr(fpst);
10664 /* C3.6.19 Crypto AES
10665 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0
10666 * +-----------------+------+-----------+--------+-----+------+------+
10667 * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd |
10668 * +-----------------+------+-----------+--------+-----+------+------+
10670 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
10672 int size = extract32(insn, 22, 2);
10673 int opcode = extract32(insn, 12, 5);
10674 int rn = extract32(insn, 5, 5);
10675 int rd = extract32(insn, 0, 5);
10676 int decrypt;
10677 TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_decrypt;
10678 CryptoThreeOpEnvFn *genfn;
10680 if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
10681 || size != 0) {
10682 unallocated_encoding(s);
10683 return;
10686 switch (opcode) {
10687 case 0x4: /* AESE */
10688 decrypt = 0;
10689 genfn = gen_helper_crypto_aese;
10690 break;
10691 case 0x6: /* AESMC */
10692 decrypt = 0;
10693 genfn = gen_helper_crypto_aesmc;
10694 break;
10695 case 0x5: /* AESD */
10696 decrypt = 1;
10697 genfn = gen_helper_crypto_aese;
10698 break;
10699 case 0x7: /* AESIMC */
10700 decrypt = 1;
10701 genfn = gen_helper_crypto_aesmc;
10702 break;
10703 default:
10704 unallocated_encoding(s);
10705 return;
10708 /* Note that we convert the Vx register indexes into the
10709 * index within the vfp.regs[] array, so we can share the
10710 * helper with the AArch32 instructions.
10712 tcg_rd_regno = tcg_const_i32(rd << 1);
10713 tcg_rn_regno = tcg_const_i32(rn << 1);
10714 tcg_decrypt = tcg_const_i32(decrypt);
10716 genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_decrypt);
10718 tcg_temp_free_i32(tcg_rd_regno);
10719 tcg_temp_free_i32(tcg_rn_regno);
10720 tcg_temp_free_i32(tcg_decrypt);
10723 /* C3.6.20 Crypto three-reg SHA
10724 * 31 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0
10725 * +-----------------+------+---+------+---+--------+-----+------+------+
10726 * | 0 1 0 1 1 1 1 0 | size | 0 | Rm | 0 | opcode | 0 0 | Rn | Rd |
10727 * +-----------------+------+---+------+---+--------+-----+------+------+
10729 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
10731 int size = extract32(insn, 22, 2);
10732 int opcode = extract32(insn, 12, 3);
10733 int rm = extract32(insn, 16, 5);
10734 int rn = extract32(insn, 5, 5);
10735 int rd = extract32(insn, 0, 5);
10736 CryptoThreeOpEnvFn *genfn;
10737 TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_rm_regno;
10738 int feature = ARM_FEATURE_V8_SHA256;
10740 if (size != 0) {
10741 unallocated_encoding(s);
10742 return;
10745 switch (opcode) {
10746 case 0: /* SHA1C */
10747 case 1: /* SHA1P */
10748 case 2: /* SHA1M */
10749 case 3: /* SHA1SU0 */
10750 genfn = NULL;
10751 feature = ARM_FEATURE_V8_SHA1;
10752 break;
10753 case 4: /* SHA256H */
10754 genfn = gen_helper_crypto_sha256h;
10755 break;
10756 case 5: /* SHA256H2 */
10757 genfn = gen_helper_crypto_sha256h2;
10758 break;
10759 case 6: /* SHA256SU1 */
10760 genfn = gen_helper_crypto_sha256su1;
10761 break;
10762 default:
10763 unallocated_encoding(s);
10764 return;
10767 if (!arm_dc_feature(s, feature)) {
10768 unallocated_encoding(s);
10769 return;
10772 tcg_rd_regno = tcg_const_i32(rd << 1);
10773 tcg_rn_regno = tcg_const_i32(rn << 1);
10774 tcg_rm_regno = tcg_const_i32(rm << 1);
10776 if (genfn) {
10777 genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_rm_regno);
10778 } else {
10779 TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
10781 gen_helper_crypto_sha1_3reg(cpu_env, tcg_rd_regno,
10782 tcg_rn_regno, tcg_rm_regno, tcg_opcode);
10783 tcg_temp_free_i32(tcg_opcode);
10786 tcg_temp_free_i32(tcg_rd_regno);
10787 tcg_temp_free_i32(tcg_rn_regno);
10788 tcg_temp_free_i32(tcg_rm_regno);
10791 /* C3.6.21 Crypto two-reg SHA
10792 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0
10793 * +-----------------+------+-----------+--------+-----+------+------+
10794 * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd |
10795 * +-----------------+------+-----------+--------+-----+------+------+
10797 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
10799 int size = extract32(insn, 22, 2);
10800 int opcode = extract32(insn, 12, 5);
10801 int rn = extract32(insn, 5, 5);
10802 int rd = extract32(insn, 0, 5);
10803 CryptoTwoOpEnvFn *genfn;
10804 int feature;
10805 TCGv_i32 tcg_rd_regno, tcg_rn_regno;
10807 if (size != 0) {
10808 unallocated_encoding(s);
10809 return;
10812 switch (opcode) {
10813 case 0: /* SHA1H */
10814 feature = ARM_FEATURE_V8_SHA1;
10815 genfn = gen_helper_crypto_sha1h;
10816 break;
10817 case 1: /* SHA1SU1 */
10818 feature = ARM_FEATURE_V8_SHA1;
10819 genfn = gen_helper_crypto_sha1su1;
10820 break;
10821 case 2: /* SHA256SU0 */
10822 feature = ARM_FEATURE_V8_SHA256;
10823 genfn = gen_helper_crypto_sha256su0;
10824 break;
10825 default:
10826 unallocated_encoding(s);
10827 return;
10830 if (!arm_dc_feature(s, feature)) {
10831 unallocated_encoding(s);
10832 return;
10835 tcg_rd_regno = tcg_const_i32(rd << 1);
10836 tcg_rn_regno = tcg_const_i32(rn << 1);
10838 genfn(cpu_env, tcg_rd_regno, tcg_rn_regno);
10840 tcg_temp_free_i32(tcg_rd_regno);
10841 tcg_temp_free_i32(tcg_rn_regno);
10844 /* C3.6 Data processing - SIMD, inc Crypto
10846 * As the decode gets a little complex we are using a table based
10847 * approach for this part of the decode.
10849 static const AArch64DecodeTable data_proc_simd[] = {
10850 /* pattern , mask , fn */
10851 { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
10852 { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
10853 { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
10854 { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
10855 { 0x0e000400, 0x9fe08400, disas_simd_copy },
10856 { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
10857 /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
10858 { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
10859 { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
10860 { 0x0e000000, 0xbf208c00, disas_simd_tb },
10861 { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
10862 { 0x2e000000, 0xbf208400, disas_simd_ext },
10863 { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
10864 { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
10865 { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
10866 { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
10867 { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
10868 { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
10869 { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
10870 { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
10871 { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
10872 { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
10873 { 0x00000000, 0x00000000, NULL }
10876 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
10878 /* Note that this is called with all non-FP cases from
10879 * table C3-6 so it must UNDEF for entries not specifically
10880 * allocated to instructions in that table.
10882 AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
10883 if (fn) {
10884 fn(s, insn);
10885 } else {
10886 unallocated_encoding(s);
10890 /* C3.6 Data processing - SIMD and floating point */
10891 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
10893 if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
10894 disas_data_proc_fp(s, insn);
10895 } else {
10896 /* SIMD, including crypto */
10897 disas_data_proc_simd(s, insn);
10901 /* C3.1 A64 instruction index by encoding */
10902 static void disas_a64_insn(CPUARMState *env, DisasContext *s)
10904 uint32_t insn;
10906 insn = arm_ldl_code(env, s->pc, s->bswap_code);
10907 s->insn = insn;
10908 s->pc += 4;
10910 s->fp_access_checked = false;
10912 switch (extract32(insn, 25, 4)) {
10913 case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
10914 unallocated_encoding(s);
10915 break;
10916 case 0x8: case 0x9: /* Data processing - immediate */
10917 disas_data_proc_imm(s, insn);
10918 break;
10919 case 0xa: case 0xb: /* Branch, exception generation and system insns */
10920 disas_b_exc_sys(s, insn);
10921 break;
10922 case 0x4:
10923 case 0x6:
10924 case 0xc:
10925 case 0xe: /* Loads and stores */
10926 disas_ldst(s, insn);
10927 break;
10928 case 0x5:
10929 case 0xd: /* Data processing - register */
10930 disas_data_proc_reg(s, insn);
10931 break;
10932 case 0x7:
10933 case 0xf: /* Data processing - SIMD and floating point */
10934 disas_data_proc_simd_fp(s, insn);
10935 break;
10936 default:
10937 assert(FALSE); /* all 15 cases should be handled above */
10938 break;
10941 /* if we allocated any temporaries, free them here */
10942 free_tmp_a64(s);
10945 void gen_intermediate_code_internal_a64(ARMCPU *cpu,
10946 TranslationBlock *tb,
10947 bool search_pc)
10949 CPUState *cs = CPU(cpu);
10950 CPUARMState *env = &cpu->env;
10951 DisasContext dc1, *dc = &dc1;
10952 CPUBreakpoint *bp;
10953 int j, lj;
10954 target_ulong pc_start;
10955 target_ulong next_page_start;
10956 int num_insns;
10957 int max_insns;
10959 pc_start = tb->pc;
10961 dc->tb = tb;
10963 dc->is_jmp = DISAS_NEXT;
10964 dc->pc = pc_start;
10965 dc->singlestep_enabled = cs->singlestep_enabled;
10966 dc->condjmp = 0;
10968 dc->aarch64 = 1;
10969 /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
10970 * there is no secure EL1, so we route exceptions to EL3.
10972 dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
10973 !arm_el_is_aa64(env, 3);
10974 dc->thumb = 0;
10975 dc->bswap_code = 0;
10976 dc->condexec_mask = 0;
10977 dc->condexec_cond = 0;
10978 dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags);
10979 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
10980 #if !defined(CONFIG_USER_ONLY)
10981 dc->user = (dc->current_el == 0);
10982 #endif
10983 dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(tb->flags);
10984 dc->vec_len = 0;
10985 dc->vec_stride = 0;
10986 dc->cp_regs = cpu->cp_regs;
10987 dc->features = env->features;
10989 /* Single step state. The code-generation logic here is:
10990 * SS_ACTIVE == 0:
10991 * generate code with no special handling for single-stepping (except
10992 * that anything that can make us go to SS_ACTIVE == 1 must end the TB;
10993 * this happens anyway because those changes are all system register or
10994 * PSTATE writes).
10995 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
10996 * emit code for one insn
10997 * emit code to clear PSTATE.SS
10998 * emit code to generate software step exception for completed step
10999 * end TB (as usual for having generated an exception)
11000 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11001 * emit code to generate a software step exception
11002 * end the TB
11004 dc->ss_active = ARM_TBFLAG_SS_ACTIVE(tb->flags);
11005 dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(tb->flags);
11006 dc->is_ldex = false;
11007 dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
11009 init_tmp_a64_array(dc);
11011 next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
11012 lj = -1;
11013 num_insns = 0;
11014 max_insns = tb->cflags & CF_COUNT_MASK;
11015 if (max_insns == 0) {
11016 max_insns = CF_COUNT_MASK;
11019 gen_tb_start(tb);
11021 tcg_clear_temp_count();
11023 do {
11024 if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
11025 QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
11026 if (bp->pc == dc->pc) {
11027 gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
11028 /* Advance PC so that clearing the breakpoint will
11029 invalidate this TB. */
11030 dc->pc += 2;
11031 goto done_generating;
11036 if (search_pc) {
11037 j = tcg_op_buf_count();
11038 if (lj < j) {
11039 lj++;
11040 while (lj < j) {
11041 tcg_ctx.gen_opc_instr_start[lj++] = 0;
11044 tcg_ctx.gen_opc_pc[lj] = dc->pc;
11045 tcg_ctx.gen_opc_instr_start[lj] = 1;
11046 tcg_ctx.gen_opc_icount[lj] = num_insns;
11049 if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) {
11050 gen_io_start();
11053 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
11054 tcg_gen_debug_insn_start(dc->pc);
11057 if (dc->ss_active && !dc->pstate_ss) {
11058 /* Singlestep state is Active-pending.
11059 * If we're in this state at the start of a TB then either
11060 * a) we just took an exception to an EL which is being debugged
11061 * and this is the first insn in the exception handler
11062 * b) debug exceptions were masked and we just unmasked them
11063 * without changing EL (eg by clearing PSTATE.D)
11064 * In either case we're going to take a swstep exception in the
11065 * "did not step an insn" case, and so the syndrome ISV and EX
11066 * bits should be zero.
11068 assert(num_insns == 0);
11069 gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
11070 default_exception_el(dc));
11071 dc->is_jmp = DISAS_EXC;
11072 break;
11075 disas_a64_insn(env, dc);
11077 if (tcg_check_temp_count()) {
11078 fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
11079 dc->pc);
11082 /* Translation stops when a conditional branch is encountered.
11083 * Otherwise the subsequent code could get translated several times.
11084 * Also stop translation when a page boundary is reached. This
11085 * ensures prefetch aborts occur at the right place.
11087 num_insns++;
11088 } while (!dc->is_jmp && !tcg_op_buf_full() &&
11089 !cs->singlestep_enabled &&
11090 !singlestep &&
11091 !dc->ss_active &&
11092 dc->pc < next_page_start &&
11093 num_insns < max_insns);
11095 if (tb->cflags & CF_LAST_IO) {
11096 gen_io_end();
11099 if (unlikely(cs->singlestep_enabled || dc->ss_active)
11100 && dc->is_jmp != DISAS_EXC) {
11101 /* Note that this means single stepping WFI doesn't halt the CPU.
11102 * For conditional branch insns this is harmless unreachable code as
11103 * gen_goto_tb() has already handled emitting the debug exception
11104 * (and thus a tb-jump is not possible when singlestepping).
11106 assert(dc->is_jmp != DISAS_TB_JUMP);
11107 if (dc->is_jmp != DISAS_JUMP) {
11108 gen_a64_set_pc_im(dc->pc);
11110 if (cs->singlestep_enabled) {
11111 gen_exception_internal(EXCP_DEBUG);
11112 } else {
11113 gen_step_complete_exception(dc);
11115 } else {
11116 switch (dc->is_jmp) {
11117 case DISAS_NEXT:
11118 gen_goto_tb(dc, 1, dc->pc);
11119 break;
11120 default:
11121 case DISAS_UPDATE:
11122 gen_a64_set_pc_im(dc->pc);
11123 /* fall through */
11124 case DISAS_JUMP:
11125 /* indicate that the hash table must be used to find the next TB */
11126 tcg_gen_exit_tb(0);
11127 break;
11128 case DISAS_TB_JUMP:
11129 case DISAS_EXC:
11130 case DISAS_SWI:
11131 break;
11132 case DISAS_WFE:
11133 gen_a64_set_pc_im(dc->pc);
11134 gen_helper_wfe(cpu_env);
11135 break;
11136 case DISAS_YIELD:
11137 gen_a64_set_pc_im(dc->pc);
11138 gen_helper_yield(cpu_env);
11139 break;
11140 case DISAS_WFI:
11141 /* This is a special case because we don't want to just halt the CPU
11142 * if trying to debug across a WFI.
11144 gen_a64_set_pc_im(dc->pc);
11145 gen_helper_wfi(cpu_env);
11146 /* The helper doesn't necessarily throw an exception, but we
11147 * must go back to the main loop to check for interrupts anyway.
11149 tcg_gen_exit_tb(0);
11150 break;
11154 done_generating:
11155 gen_tb_end(tb, num_insns);
11157 #ifdef DEBUG_DISAS
11158 if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
11159 qemu_log("----------------\n");
11160 qemu_log("IN: %s\n", lookup_symbol(pc_start));
11161 log_target_disas(cs, pc_start, dc->pc - pc_start,
11162 4 | (dc->bswap_code << 1));
11163 qemu_log("\n");
11165 #endif
11166 if (search_pc) {
11167 j = tcg_op_buf_count();
11168 lj++;
11169 while (lj <= j) {
11170 tcg_ctx.gen_opc_instr_start[lj++] = 0;
11172 } else {
11173 tb->size = dc->pc - pc_start;
11174 tb->icount = num_insns;