target/arm: Convert Neon 'load single structure to all lanes' to decodetree
[qemu/ar7.git] / target / arm / translate.c
blob7099274c92ab4ed5bf6803d5ecccb07e5c193ce6
1 /*
2 * ARM translation
4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21 #include "qemu/osdep.h"
23 #include "cpu.h"
24 #include "internals.h"
25 #include "disas/disas.h"
26 #include "exec/exec-all.h"
27 #include "tcg/tcg-op.h"
28 #include "tcg/tcg-op-gvec.h"
29 #include "qemu/log.h"
30 #include "qemu/bitops.h"
31 #include "arm_ldst.h"
32 #include "hw/semihosting/semihost.h"
34 #include "exec/helper-proto.h"
35 #include "exec/helper-gen.h"
37 #include "trace-tcg.h"
38 #include "exec/log.h"
41 #define ENABLE_ARCH_4T arm_dc_feature(s, ARM_FEATURE_V4T)
42 #define ENABLE_ARCH_5 arm_dc_feature(s, ARM_FEATURE_V5)
43 /* currently all emulated v5 cores are also v5TE, so don't bother */
44 #define ENABLE_ARCH_5TE arm_dc_feature(s, ARM_FEATURE_V5)
45 #define ENABLE_ARCH_5J dc_isar_feature(aa32_jazelle, s)
46 #define ENABLE_ARCH_6 arm_dc_feature(s, ARM_FEATURE_V6)
47 #define ENABLE_ARCH_6K arm_dc_feature(s, ARM_FEATURE_V6K)
48 #define ENABLE_ARCH_6T2 arm_dc_feature(s, ARM_FEATURE_THUMB2)
49 #define ENABLE_ARCH_7 arm_dc_feature(s, ARM_FEATURE_V7)
50 #define ENABLE_ARCH_8 arm_dc_feature(s, ARM_FEATURE_V8)
52 #define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
54 #include "translate.h"
56 #if defined(CONFIG_USER_ONLY)
57 #define IS_USER(s) 1
58 #else
59 #define IS_USER(s) (s->user)
60 #endif
62 /* We reuse the same 64-bit temporaries for efficiency. */
63 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
64 static TCGv_i32 cpu_R[16];
65 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
66 TCGv_i64 cpu_exclusive_addr;
67 TCGv_i64 cpu_exclusive_val;
69 #include "exec/gen-icount.h"
71 static const char * const regnames[] =
72 { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
73 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
75 /* Function prototypes for gen_ functions calling Neon helpers. */
76 typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
77 TCGv_i32, TCGv_i32);
78 /* Function prototypes for gen_ functions for fix point conversions */
79 typedef void VFPGenFixPointFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
81 /* initialize TCG globals. */
82 void arm_translate_init(void)
84 int i;
86 for (i = 0; i < 16; i++) {
87 cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
88 offsetof(CPUARMState, regs[i]),
89 regnames[i]);
91 cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
92 cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
93 cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
94 cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
96 cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
97 offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
98 cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
99 offsetof(CPUARMState, exclusive_val), "exclusive_val");
101 a64_translate_init();
104 /* Flags for the disas_set_da_iss info argument:
105 * lower bits hold the Rt register number, higher bits are flags.
107 typedef enum ISSInfo {
108 ISSNone = 0,
109 ISSRegMask = 0x1f,
110 ISSInvalid = (1 << 5),
111 ISSIsAcqRel = (1 << 6),
112 ISSIsWrite = (1 << 7),
113 ISSIs16Bit = (1 << 8),
114 } ISSInfo;
116 /* Save the syndrome information for a Data Abort */
117 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
119 uint32_t syn;
120 int sas = memop & MO_SIZE;
121 bool sse = memop & MO_SIGN;
122 bool is_acqrel = issinfo & ISSIsAcqRel;
123 bool is_write = issinfo & ISSIsWrite;
124 bool is_16bit = issinfo & ISSIs16Bit;
125 int srt = issinfo & ISSRegMask;
127 if (issinfo & ISSInvalid) {
128 /* Some callsites want to conditionally provide ISS info,
129 * eg "only if this was not a writeback"
131 return;
134 if (srt == 15) {
135 /* For AArch32, insns where the src/dest is R15 never generate
136 * ISS information. Catching that here saves checking at all
137 * the call sites.
139 return;
142 syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
143 0, 0, 0, is_write, 0, is_16bit);
144 disas_set_insn_syndrome(s, syn);
147 static inline int get_a32_user_mem_index(DisasContext *s)
149 /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
150 * insns:
151 * if PL2, UNPREDICTABLE (we choose to implement as if PL0)
152 * otherwise, access as if at PL0.
154 switch (s->mmu_idx) {
155 case ARMMMUIdx_E2: /* this one is UNPREDICTABLE */
156 case ARMMMUIdx_E10_0:
157 case ARMMMUIdx_E10_1:
158 case ARMMMUIdx_E10_1_PAN:
159 return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
160 case ARMMMUIdx_SE3:
161 case ARMMMUIdx_SE10_0:
162 case ARMMMUIdx_SE10_1:
163 case ARMMMUIdx_SE10_1_PAN:
164 return arm_to_core_mmu_idx(ARMMMUIdx_SE10_0);
165 case ARMMMUIdx_MUser:
166 case ARMMMUIdx_MPriv:
167 return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
168 case ARMMMUIdx_MUserNegPri:
169 case ARMMMUIdx_MPrivNegPri:
170 return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
171 case ARMMMUIdx_MSUser:
172 case ARMMMUIdx_MSPriv:
173 return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
174 case ARMMMUIdx_MSUserNegPri:
175 case ARMMMUIdx_MSPrivNegPri:
176 return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
177 default:
178 g_assert_not_reached();
182 static inline TCGv_i32 load_cpu_offset(int offset)
184 TCGv_i32 tmp = tcg_temp_new_i32();
185 tcg_gen_ld_i32(tmp, cpu_env, offset);
186 return tmp;
189 #define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
191 static inline void store_cpu_offset(TCGv_i32 var, int offset)
193 tcg_gen_st_i32(var, cpu_env, offset);
194 tcg_temp_free_i32(var);
197 #define store_cpu_field(var, name) \
198 store_cpu_offset(var, offsetof(CPUARMState, name))
200 /* The architectural value of PC. */
201 static uint32_t read_pc(DisasContext *s)
203 return s->pc_curr + (s->thumb ? 4 : 8);
206 /* Set a variable to the value of a CPU register. */
207 static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
209 if (reg == 15) {
210 tcg_gen_movi_i32(var, read_pc(s));
211 } else {
212 tcg_gen_mov_i32(var, cpu_R[reg]);
216 /* Create a new temporary and set it to the value of a CPU register. */
217 static inline TCGv_i32 load_reg(DisasContext *s, int reg)
219 TCGv_i32 tmp = tcg_temp_new_i32();
220 load_reg_var(s, tmp, reg);
221 return tmp;
225 * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
226 * This is used for load/store for which use of PC implies (literal),
227 * or ADD that implies ADR.
229 static TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
231 TCGv_i32 tmp = tcg_temp_new_i32();
233 if (reg == 15) {
234 tcg_gen_movi_i32(tmp, (read_pc(s) & ~3) + ofs);
235 } else {
236 tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
238 return tmp;
241 /* Set a CPU register. The source must be a temporary and will be
242 marked as dead. */
243 static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
245 if (reg == 15) {
246 /* In Thumb mode, we must ignore bit 0.
247 * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
248 * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
249 * We choose to ignore [1:0] in ARM mode for all architecture versions.
251 tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
252 s->base.is_jmp = DISAS_JUMP;
254 tcg_gen_mov_i32(cpu_R[reg], var);
255 tcg_temp_free_i32(var);
259 * Variant of store_reg which applies v8M stack-limit checks before updating
260 * SP. If the check fails this will result in an exception being taken.
261 * We disable the stack checks for CONFIG_USER_ONLY because we have
262 * no idea what the stack limits should be in that case.
263 * If stack checking is not being done this just acts like store_reg().
265 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
267 #ifndef CONFIG_USER_ONLY
268 if (s->v8m_stackcheck) {
269 gen_helper_v8m_stackcheck(cpu_env, var);
271 #endif
272 store_reg(s, 13, var);
275 /* Value extensions. */
276 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
277 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
278 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
279 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
281 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
282 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
285 static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
287 TCGv_i32 tmp_mask = tcg_const_i32(mask);
288 gen_helper_cpsr_write(cpu_env, var, tmp_mask);
289 tcg_temp_free_i32(tmp_mask);
291 /* Set NZCV flags from the high 4 bits of var. */
292 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
294 static void gen_exception_internal(int excp)
296 TCGv_i32 tcg_excp = tcg_const_i32(excp);
298 assert(excp_is_internal(excp));
299 gen_helper_exception_internal(cpu_env, tcg_excp);
300 tcg_temp_free_i32(tcg_excp);
303 static void gen_step_complete_exception(DisasContext *s)
305 /* We just completed step of an insn. Move from Active-not-pending
306 * to Active-pending, and then also take the swstep exception.
307 * This corresponds to making the (IMPDEF) choice to prioritize
308 * swstep exceptions over asynchronous exceptions taken to an exception
309 * level where debug is disabled. This choice has the advantage that
310 * we do not need to maintain internal state corresponding to the
311 * ISV/EX syndrome bits between completion of the step and generation
312 * of the exception, and our syndrome information is always correct.
314 gen_ss_advance(s);
315 gen_swstep_exception(s, 1, s->is_ldex);
316 s->base.is_jmp = DISAS_NORETURN;
319 static void gen_singlestep_exception(DisasContext *s)
321 /* Generate the right kind of exception for singlestep, which is
322 * either the architectural singlestep or EXCP_DEBUG for QEMU's
323 * gdb singlestepping.
325 if (s->ss_active) {
326 gen_step_complete_exception(s);
327 } else {
328 gen_exception_internal(EXCP_DEBUG);
332 static inline bool is_singlestepping(DisasContext *s)
334 /* Return true if we are singlestepping either because of
335 * architectural singlestep or QEMU gdbstub singlestep. This does
336 * not include the command line '-singlestep' mode which is rather
337 * misnamed as it only means "one instruction per TB" and doesn't
338 * affect the code we generate.
340 return s->base.singlestep_enabled || s->ss_active;
343 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
345 TCGv_i32 tmp1 = tcg_temp_new_i32();
346 TCGv_i32 tmp2 = tcg_temp_new_i32();
347 tcg_gen_ext16s_i32(tmp1, a);
348 tcg_gen_ext16s_i32(tmp2, b);
349 tcg_gen_mul_i32(tmp1, tmp1, tmp2);
350 tcg_temp_free_i32(tmp2);
351 tcg_gen_sari_i32(a, a, 16);
352 tcg_gen_sari_i32(b, b, 16);
353 tcg_gen_mul_i32(b, b, a);
354 tcg_gen_mov_i32(a, tmp1);
355 tcg_temp_free_i32(tmp1);
358 /* Byteswap each halfword. */
359 static void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
361 TCGv_i32 tmp = tcg_temp_new_i32();
362 TCGv_i32 mask = tcg_const_i32(0x00ff00ff);
363 tcg_gen_shri_i32(tmp, var, 8);
364 tcg_gen_and_i32(tmp, tmp, mask);
365 tcg_gen_and_i32(var, var, mask);
366 tcg_gen_shli_i32(var, var, 8);
367 tcg_gen_or_i32(dest, var, tmp);
368 tcg_temp_free_i32(mask);
369 tcg_temp_free_i32(tmp);
372 /* Byteswap low halfword and sign extend. */
373 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
375 tcg_gen_ext16u_i32(var, var);
376 tcg_gen_bswap16_i32(var, var);
377 tcg_gen_ext16s_i32(dest, var);
380 /* 32x32->64 multiply. Marks inputs as dead. */
381 static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
383 TCGv_i32 lo = tcg_temp_new_i32();
384 TCGv_i32 hi = tcg_temp_new_i32();
385 TCGv_i64 ret;
387 tcg_gen_mulu2_i32(lo, hi, a, b);
388 tcg_temp_free_i32(a);
389 tcg_temp_free_i32(b);
391 ret = tcg_temp_new_i64();
392 tcg_gen_concat_i32_i64(ret, lo, hi);
393 tcg_temp_free_i32(lo);
394 tcg_temp_free_i32(hi);
396 return ret;
399 static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
401 TCGv_i32 lo = tcg_temp_new_i32();
402 TCGv_i32 hi = tcg_temp_new_i32();
403 TCGv_i64 ret;
405 tcg_gen_muls2_i32(lo, hi, a, b);
406 tcg_temp_free_i32(a);
407 tcg_temp_free_i32(b);
409 ret = tcg_temp_new_i64();
410 tcg_gen_concat_i32_i64(ret, lo, hi);
411 tcg_temp_free_i32(lo);
412 tcg_temp_free_i32(hi);
414 return ret;
417 /* Swap low and high halfwords. */
418 static void gen_swap_half(TCGv_i32 var)
420 tcg_gen_rotri_i32(var, var, 16);
423 /* Dual 16-bit add. Result placed in t0 and t1 is marked as dead.
424 tmp = (t0 ^ t1) & 0x8000;
425 t0 &= ~0x8000;
426 t1 &= ~0x8000;
427 t0 = (t0 + t1) ^ tmp;
430 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
432 TCGv_i32 tmp = tcg_temp_new_i32();
433 tcg_gen_xor_i32(tmp, t0, t1);
434 tcg_gen_andi_i32(tmp, tmp, 0x8000);
435 tcg_gen_andi_i32(t0, t0, ~0x8000);
436 tcg_gen_andi_i32(t1, t1, ~0x8000);
437 tcg_gen_add_i32(t0, t0, t1);
438 tcg_gen_xor_i32(dest, t0, tmp);
439 tcg_temp_free_i32(tmp);
442 /* Set N and Z flags from var. */
443 static inline void gen_logic_CC(TCGv_i32 var)
445 tcg_gen_mov_i32(cpu_NF, var);
446 tcg_gen_mov_i32(cpu_ZF, var);
449 /* dest = T0 + T1 + CF. */
450 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
452 tcg_gen_add_i32(dest, t0, t1);
453 tcg_gen_add_i32(dest, dest, cpu_CF);
456 /* dest = T0 - T1 + CF - 1. */
457 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
459 tcg_gen_sub_i32(dest, t0, t1);
460 tcg_gen_add_i32(dest, dest, cpu_CF);
461 tcg_gen_subi_i32(dest, dest, 1);
464 /* dest = T0 + T1. Compute C, N, V and Z flags */
465 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
467 TCGv_i32 tmp = tcg_temp_new_i32();
468 tcg_gen_movi_i32(tmp, 0);
469 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
470 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
471 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
472 tcg_gen_xor_i32(tmp, t0, t1);
473 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
474 tcg_temp_free_i32(tmp);
475 tcg_gen_mov_i32(dest, cpu_NF);
478 /* dest = T0 + T1 + CF. Compute C, N, V and Z flags */
479 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
481 TCGv_i32 tmp = tcg_temp_new_i32();
482 if (TCG_TARGET_HAS_add2_i32) {
483 tcg_gen_movi_i32(tmp, 0);
484 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
485 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
486 } else {
487 TCGv_i64 q0 = tcg_temp_new_i64();
488 TCGv_i64 q1 = tcg_temp_new_i64();
489 tcg_gen_extu_i32_i64(q0, t0);
490 tcg_gen_extu_i32_i64(q1, t1);
491 tcg_gen_add_i64(q0, q0, q1);
492 tcg_gen_extu_i32_i64(q1, cpu_CF);
493 tcg_gen_add_i64(q0, q0, q1);
494 tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
495 tcg_temp_free_i64(q0);
496 tcg_temp_free_i64(q1);
498 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
499 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
500 tcg_gen_xor_i32(tmp, t0, t1);
501 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
502 tcg_temp_free_i32(tmp);
503 tcg_gen_mov_i32(dest, cpu_NF);
506 /* dest = T0 - T1. Compute C, N, V and Z flags */
507 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
509 TCGv_i32 tmp;
510 tcg_gen_sub_i32(cpu_NF, t0, t1);
511 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
512 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
513 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
514 tmp = tcg_temp_new_i32();
515 tcg_gen_xor_i32(tmp, t0, t1);
516 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
517 tcg_temp_free_i32(tmp);
518 tcg_gen_mov_i32(dest, cpu_NF);
521 /* dest = T0 + ~T1 + CF. Compute C, N, V and Z flags */
522 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
524 TCGv_i32 tmp = tcg_temp_new_i32();
525 tcg_gen_not_i32(tmp, t1);
526 gen_adc_CC(dest, t0, tmp);
527 tcg_temp_free_i32(tmp);
530 #define GEN_SHIFT(name) \
531 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) \
533 TCGv_i32 tmp1, tmp2, tmp3; \
534 tmp1 = tcg_temp_new_i32(); \
535 tcg_gen_andi_i32(tmp1, t1, 0xff); \
536 tmp2 = tcg_const_i32(0); \
537 tmp3 = tcg_const_i32(0x1f); \
538 tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0); \
539 tcg_temp_free_i32(tmp3); \
540 tcg_gen_andi_i32(tmp1, tmp1, 0x1f); \
541 tcg_gen_##name##_i32(dest, tmp2, tmp1); \
542 tcg_temp_free_i32(tmp2); \
543 tcg_temp_free_i32(tmp1); \
545 GEN_SHIFT(shl)
546 GEN_SHIFT(shr)
547 #undef GEN_SHIFT
549 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
551 TCGv_i32 tmp1, tmp2;
552 tmp1 = tcg_temp_new_i32();
553 tcg_gen_andi_i32(tmp1, t1, 0xff);
554 tmp2 = tcg_const_i32(0x1f);
555 tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
556 tcg_temp_free_i32(tmp2);
557 tcg_gen_sar_i32(dest, t0, tmp1);
558 tcg_temp_free_i32(tmp1);
561 static void shifter_out_im(TCGv_i32 var, int shift)
563 tcg_gen_extract_i32(cpu_CF, var, shift, 1);
566 /* Shift by immediate. Includes special handling for shift == 0. */
567 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
568 int shift, int flags)
570 switch (shiftop) {
571 case 0: /* LSL */
572 if (shift != 0) {
573 if (flags)
574 shifter_out_im(var, 32 - shift);
575 tcg_gen_shli_i32(var, var, shift);
577 break;
578 case 1: /* LSR */
579 if (shift == 0) {
580 if (flags) {
581 tcg_gen_shri_i32(cpu_CF, var, 31);
583 tcg_gen_movi_i32(var, 0);
584 } else {
585 if (flags)
586 shifter_out_im(var, shift - 1);
587 tcg_gen_shri_i32(var, var, shift);
589 break;
590 case 2: /* ASR */
591 if (shift == 0)
592 shift = 32;
593 if (flags)
594 shifter_out_im(var, shift - 1);
595 if (shift == 32)
596 shift = 31;
597 tcg_gen_sari_i32(var, var, shift);
598 break;
599 case 3: /* ROR/RRX */
600 if (shift != 0) {
601 if (flags)
602 shifter_out_im(var, shift - 1);
603 tcg_gen_rotri_i32(var, var, shift); break;
604 } else {
605 TCGv_i32 tmp = tcg_temp_new_i32();
606 tcg_gen_shli_i32(tmp, cpu_CF, 31);
607 if (flags)
608 shifter_out_im(var, 0);
609 tcg_gen_shri_i32(var, var, 1);
610 tcg_gen_or_i32(var, var, tmp);
611 tcg_temp_free_i32(tmp);
616 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
617 TCGv_i32 shift, int flags)
619 if (flags) {
620 switch (shiftop) {
621 case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
622 case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
623 case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
624 case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
626 } else {
627 switch (shiftop) {
628 case 0:
629 gen_shl(var, var, shift);
630 break;
631 case 1:
632 gen_shr(var, var, shift);
633 break;
634 case 2:
635 gen_sar(var, var, shift);
636 break;
637 case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
638 tcg_gen_rotr_i32(var, var, shift); break;
641 tcg_temp_free_i32(shift);
645 * Generate a conditional based on ARM condition code cc.
646 * This is common between ARM and Aarch64 targets.
648 void arm_test_cc(DisasCompare *cmp, int cc)
650 TCGv_i32 value;
651 TCGCond cond;
652 bool global = true;
654 switch (cc) {
655 case 0: /* eq: Z */
656 case 1: /* ne: !Z */
657 cond = TCG_COND_EQ;
658 value = cpu_ZF;
659 break;
661 case 2: /* cs: C */
662 case 3: /* cc: !C */
663 cond = TCG_COND_NE;
664 value = cpu_CF;
665 break;
667 case 4: /* mi: N */
668 case 5: /* pl: !N */
669 cond = TCG_COND_LT;
670 value = cpu_NF;
671 break;
673 case 6: /* vs: V */
674 case 7: /* vc: !V */
675 cond = TCG_COND_LT;
676 value = cpu_VF;
677 break;
679 case 8: /* hi: C && !Z */
680 case 9: /* ls: !C || Z -> !(C && !Z) */
681 cond = TCG_COND_NE;
682 value = tcg_temp_new_i32();
683 global = false;
684 /* CF is 1 for C, so -CF is an all-bits-set mask for C;
685 ZF is non-zero for !Z; so AND the two subexpressions. */
686 tcg_gen_neg_i32(value, cpu_CF);
687 tcg_gen_and_i32(value, value, cpu_ZF);
688 break;
690 case 10: /* ge: N == V -> N ^ V == 0 */
691 case 11: /* lt: N != V -> N ^ V != 0 */
692 /* Since we're only interested in the sign bit, == 0 is >= 0. */
693 cond = TCG_COND_GE;
694 value = tcg_temp_new_i32();
695 global = false;
696 tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
697 break;
699 case 12: /* gt: !Z && N == V */
700 case 13: /* le: Z || N != V */
701 cond = TCG_COND_NE;
702 value = tcg_temp_new_i32();
703 global = false;
704 /* (N == V) is equal to the sign bit of ~(NF ^ VF). Propagate
705 * the sign bit then AND with ZF to yield the result. */
706 tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
707 tcg_gen_sari_i32(value, value, 31);
708 tcg_gen_andc_i32(value, cpu_ZF, value);
709 break;
711 case 14: /* always */
712 case 15: /* always */
713 /* Use the ALWAYS condition, which will fold early.
714 * It doesn't matter what we use for the value. */
715 cond = TCG_COND_ALWAYS;
716 value = cpu_ZF;
717 goto no_invert;
719 default:
720 fprintf(stderr, "Bad condition code 0x%x\n", cc);
721 abort();
724 if (cc & 1) {
725 cond = tcg_invert_cond(cond);
728 no_invert:
729 cmp->cond = cond;
730 cmp->value = value;
731 cmp->value_global = global;
734 void arm_free_cc(DisasCompare *cmp)
736 if (!cmp->value_global) {
737 tcg_temp_free_i32(cmp->value);
741 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
743 tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
746 void arm_gen_test_cc(int cc, TCGLabel *label)
748 DisasCompare cmp;
749 arm_test_cc(&cmp, cc);
750 arm_jump_cc(&cmp, label);
751 arm_free_cc(&cmp);
754 static inline void gen_set_condexec(DisasContext *s)
756 if (s->condexec_mask) {
757 uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
758 TCGv_i32 tmp = tcg_temp_new_i32();
759 tcg_gen_movi_i32(tmp, val);
760 store_cpu_field(tmp, condexec_bits);
764 static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
766 tcg_gen_movi_i32(cpu_R[15], val);
769 /* Set PC and Thumb state from var. var is marked as dead. */
770 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
772 s->base.is_jmp = DISAS_JUMP;
773 tcg_gen_andi_i32(cpu_R[15], var, ~1);
774 tcg_gen_andi_i32(var, var, 1);
775 store_cpu_field(var, thumb);
779 * Set PC and Thumb state from var. var is marked as dead.
780 * For M-profile CPUs, include logic to detect exception-return
781 * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
782 * and BX reg, and no others, and happens only for code in Handler mode.
783 * The Security Extension also requires us to check for the FNC_RETURN
784 * which signals a function return from non-secure state; this can happen
785 * in both Handler and Thread mode.
786 * To avoid having to do multiple comparisons in inline generated code,
787 * we make the check we do here loose, so it will match for EXC_RETURN
788 * in Thread mode. For system emulation do_v7m_exception_exit() checks
789 * for these spurious cases and returns without doing anything (giving
790 * the same behaviour as for a branch to a non-magic address).
792 * In linux-user mode it is unclear what the right behaviour for an
793 * attempted FNC_RETURN should be, because in real hardware this will go
794 * directly to Secure code (ie not the Linux kernel) which will then treat
795 * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
796 * attempt behave the way it would on a CPU without the security extension,
797 * which is to say "like a normal branch". That means we can simply treat
798 * all branches as normal with no magic address behaviour.
800 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
802 /* Generate the same code here as for a simple bx, but flag via
803 * s->base.is_jmp that we need to do the rest of the work later.
805 gen_bx(s, var);
806 #ifndef CONFIG_USER_ONLY
807 if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
808 (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
809 s->base.is_jmp = DISAS_BX_EXCRET;
811 #endif
814 static inline void gen_bx_excret_final_code(DisasContext *s)
816 /* Generate the code to finish possible exception return and end the TB */
817 TCGLabel *excret_label = gen_new_label();
818 uint32_t min_magic;
820 if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
821 /* Covers FNC_RETURN and EXC_RETURN magic */
822 min_magic = FNC_RETURN_MIN_MAGIC;
823 } else {
824 /* EXC_RETURN magic only */
825 min_magic = EXC_RETURN_MIN_MAGIC;
828 /* Is the new PC value in the magic range indicating exception return? */
829 tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
830 /* No: end the TB as we would for a DISAS_JMP */
831 if (is_singlestepping(s)) {
832 gen_singlestep_exception(s);
833 } else {
834 tcg_gen_exit_tb(NULL, 0);
836 gen_set_label(excret_label);
837 /* Yes: this is an exception return.
838 * At this point in runtime env->regs[15] and env->thumb will hold
839 * the exception-return magic number, which do_v7m_exception_exit()
840 * will read. Nothing else will be able to see those values because
841 * the cpu-exec main loop guarantees that we will always go straight
842 * from raising the exception to the exception-handling code.
844 * gen_ss_advance(s) does nothing on M profile currently but
845 * calling it is conceptually the right thing as we have executed
846 * this instruction (compare SWI, HVC, SMC handling).
848 gen_ss_advance(s);
849 gen_exception_internal(EXCP_EXCEPTION_EXIT);
852 static inline void gen_bxns(DisasContext *s, int rm)
854 TCGv_i32 var = load_reg(s, rm);
856 /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
857 * we need to sync state before calling it, but:
858 * - we don't need to do gen_set_pc_im() because the bxns helper will
859 * always set the PC itself
860 * - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
861 * unless it's outside an IT block or the last insn in an IT block,
862 * so we know that condexec == 0 (already set at the top of the TB)
863 * is correct in the non-UNPREDICTABLE cases, and we can choose
864 * "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
866 gen_helper_v7m_bxns(cpu_env, var);
867 tcg_temp_free_i32(var);
868 s->base.is_jmp = DISAS_EXIT;
871 static inline void gen_blxns(DisasContext *s, int rm)
873 TCGv_i32 var = load_reg(s, rm);
875 /* We don't need to sync condexec state, for the same reason as bxns.
876 * We do however need to set the PC, because the blxns helper reads it.
877 * The blxns helper may throw an exception.
879 gen_set_pc_im(s, s->base.pc_next);
880 gen_helper_v7m_blxns(cpu_env, var);
881 tcg_temp_free_i32(var);
882 s->base.is_jmp = DISAS_EXIT;
885 /* Variant of store_reg which uses branch&exchange logic when storing
886 to r15 in ARM architecture v7 and above. The source must be a temporary
887 and will be marked as dead. */
888 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
890 if (reg == 15 && ENABLE_ARCH_7) {
891 gen_bx(s, var);
892 } else {
893 store_reg(s, reg, var);
897 /* Variant of store_reg which uses branch&exchange logic when storing
898 * to r15 in ARM architecture v5T and above. This is used for storing
899 * the results of a LDR/LDM/POP into r15, and corresponds to the cases
900 * in the ARM ARM which use the LoadWritePC() pseudocode function. */
901 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
903 if (reg == 15 && ENABLE_ARCH_5) {
904 gen_bx_excret(s, var);
905 } else {
906 store_reg(s, reg, var);
910 #ifdef CONFIG_USER_ONLY
911 #define IS_USER_ONLY 1
912 #else
913 #define IS_USER_ONLY 0
914 #endif
916 /* Abstractions of "generate code to do a guest load/store for
917 * AArch32", where a vaddr is always 32 bits (and is zero
918 * extended if we're a 64 bit core) and data is also
919 * 32 bits unless specifically doing a 64 bit access.
920 * These functions work like tcg_gen_qemu_{ld,st}* except
921 * that the address argument is TCGv_i32 rather than TCGv.
924 static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
926 TCGv addr = tcg_temp_new();
927 tcg_gen_extu_i32_tl(addr, a32);
929 /* Not needed for user-mode BE32, where we use MO_BE instead. */
930 if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
931 tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
933 return addr;
936 static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
937 int index, MemOp opc)
939 TCGv addr;
941 if (arm_dc_feature(s, ARM_FEATURE_M) &&
942 !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
943 opc |= MO_ALIGN;
946 addr = gen_aa32_addr(s, a32, opc);
947 tcg_gen_qemu_ld_i32(val, addr, index, opc);
948 tcg_temp_free(addr);
951 static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
952 int index, MemOp opc)
954 TCGv addr;
956 if (arm_dc_feature(s, ARM_FEATURE_M) &&
957 !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
958 opc |= MO_ALIGN;
961 addr = gen_aa32_addr(s, a32, opc);
962 tcg_gen_qemu_st_i32(val, addr, index, opc);
963 tcg_temp_free(addr);
966 #define DO_GEN_LD(SUFF, OPC) \
967 static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
968 TCGv_i32 a32, int index) \
970 gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data); \
973 #define DO_GEN_ST(SUFF, OPC) \
974 static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
975 TCGv_i32 a32, int index) \
977 gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data); \
980 static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
982 /* Not needed for user-mode BE32, where we use MO_BE instead. */
983 if (!IS_USER_ONLY && s->sctlr_b) {
984 tcg_gen_rotri_i64(val, val, 32);
988 static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
989 int index, MemOp opc)
991 TCGv addr = gen_aa32_addr(s, a32, opc);
992 tcg_gen_qemu_ld_i64(val, addr, index, opc);
993 gen_aa32_frob64(s, val);
994 tcg_temp_free(addr);
997 static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
998 TCGv_i32 a32, int index)
1000 gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
1003 static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1004 int index, MemOp opc)
1006 TCGv addr = gen_aa32_addr(s, a32, opc);
1008 /* Not needed for user-mode BE32, where we use MO_BE instead. */
1009 if (!IS_USER_ONLY && s->sctlr_b) {
1010 TCGv_i64 tmp = tcg_temp_new_i64();
1011 tcg_gen_rotri_i64(tmp, val, 32);
1012 tcg_gen_qemu_st_i64(tmp, addr, index, opc);
1013 tcg_temp_free_i64(tmp);
1014 } else {
1015 tcg_gen_qemu_st_i64(val, addr, index, opc);
1017 tcg_temp_free(addr);
1020 static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
1021 TCGv_i32 a32, int index)
1023 gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
1026 DO_GEN_LD(8u, MO_UB)
1027 DO_GEN_LD(16u, MO_UW)
1028 DO_GEN_LD(32u, MO_UL)
1029 DO_GEN_ST(8, MO_UB)
1030 DO_GEN_ST(16, MO_UW)
1031 DO_GEN_ST(32, MO_UL)
1033 static inline void gen_hvc(DisasContext *s, int imm16)
1035 /* The pre HVC helper handles cases when HVC gets trapped
1036 * as an undefined insn by runtime configuration (ie before
1037 * the insn really executes).
1039 gen_set_pc_im(s, s->pc_curr);
1040 gen_helper_pre_hvc(cpu_env);
1041 /* Otherwise we will treat this as a real exception which
1042 * happens after execution of the insn. (The distinction matters
1043 * for the PC value reported to the exception handler and also
1044 * for single stepping.)
1046 s->svc_imm = imm16;
1047 gen_set_pc_im(s, s->base.pc_next);
1048 s->base.is_jmp = DISAS_HVC;
1051 static inline void gen_smc(DisasContext *s)
1053 /* As with HVC, we may take an exception either before or after
1054 * the insn executes.
1056 TCGv_i32 tmp;
1058 gen_set_pc_im(s, s->pc_curr);
1059 tmp = tcg_const_i32(syn_aa32_smc());
1060 gen_helper_pre_smc(cpu_env, tmp);
1061 tcg_temp_free_i32(tmp);
1062 gen_set_pc_im(s, s->base.pc_next);
1063 s->base.is_jmp = DISAS_SMC;
1066 static void gen_exception_internal_insn(DisasContext *s, uint32_t pc, int excp)
1068 gen_set_condexec(s);
1069 gen_set_pc_im(s, pc);
1070 gen_exception_internal(excp);
1071 s->base.is_jmp = DISAS_NORETURN;
1074 static void gen_exception_insn(DisasContext *s, uint32_t pc, int excp,
1075 int syn, uint32_t target_el)
1077 gen_set_condexec(s);
1078 gen_set_pc_im(s, pc);
1079 gen_exception(excp, syn, target_el);
1080 s->base.is_jmp = DISAS_NORETURN;
1083 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1085 TCGv_i32 tcg_syn;
1087 gen_set_condexec(s);
1088 gen_set_pc_im(s, s->pc_curr);
1089 tcg_syn = tcg_const_i32(syn);
1090 gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
1091 tcg_temp_free_i32(tcg_syn);
1092 s->base.is_jmp = DISAS_NORETURN;
1095 static void unallocated_encoding(DisasContext *s)
1097 /* Unallocated and reserved encodings are uncategorized */
1098 gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
1099 default_exception_el(s));
1102 /* Force a TB lookup after an instruction that changes the CPU state. */
1103 static inline void gen_lookup_tb(DisasContext *s)
1105 tcg_gen_movi_i32(cpu_R[15], s->base.pc_next);
1106 s->base.is_jmp = DISAS_EXIT;
1109 static inline void gen_hlt(DisasContext *s, int imm)
1111 /* HLT. This has two purposes.
1112 * Architecturally, it is an external halting debug instruction.
1113 * Since QEMU doesn't implement external debug, we treat this as
1114 * it is required for halting debug disabled: it will UNDEF.
1115 * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1116 * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1117 * must trigger semihosting even for ARMv7 and earlier, where
1118 * HLT was an undefined encoding.
1119 * In system mode, we don't allow userspace access to
1120 * semihosting, to provide some semblance of security
1121 * (and for consistency with our 32-bit semihosting).
1123 if (semihosting_enabled() &&
1124 #ifndef CONFIG_USER_ONLY
1125 s->current_el != 0 &&
1126 #endif
1127 (imm == (s->thumb ? 0x3c : 0xf000))) {
1128 gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
1129 return;
1132 unallocated_encoding(s);
1135 static TCGv_ptr get_fpstatus_ptr(int neon)
1137 TCGv_ptr statusptr = tcg_temp_new_ptr();
1138 int offset;
1139 if (neon) {
1140 offset = offsetof(CPUARMState, vfp.standard_fp_status);
1141 } else {
1142 offset = offsetof(CPUARMState, vfp.fp_status);
1144 tcg_gen_addi_ptr(statusptr, cpu_env, offset);
1145 return statusptr;
1148 static inline long vfp_reg_offset(bool dp, unsigned reg)
1150 if (dp) {
1151 return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1152 } else {
1153 long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
1154 if (reg & 1) {
1155 ofs += offsetof(CPU_DoubleU, l.upper);
1156 } else {
1157 ofs += offsetof(CPU_DoubleU, l.lower);
1159 return ofs;
1163 /* Return the offset of a 32-bit piece of a NEON register.
1164 zero is the least significant end of the register. */
1165 static inline long
1166 neon_reg_offset (int reg, int n)
1168 int sreg;
1169 sreg = reg * 2 + n;
1170 return vfp_reg_offset(0, sreg);
1173 /* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1174 * where 0 is the least significant end of the register.
1176 static inline long
1177 neon_element_offset(int reg, int element, MemOp size)
1179 int element_size = 1 << size;
1180 int ofs = element * element_size;
1181 #ifdef HOST_WORDS_BIGENDIAN
1182 /* Calculate the offset assuming fully little-endian,
1183 * then XOR to account for the order of the 8-byte units.
1185 if (element_size < 8) {
1186 ofs ^= 8 - element_size;
1188 #endif
1189 return neon_reg_offset(reg, 0) + ofs;
1192 static TCGv_i32 neon_load_reg(int reg, int pass)
1194 TCGv_i32 tmp = tcg_temp_new_i32();
1195 tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1196 return tmp;
1199 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
1201 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1203 switch (mop) {
1204 case MO_UB:
1205 tcg_gen_ld8u_i32(var, cpu_env, offset);
1206 break;
1207 case MO_UW:
1208 tcg_gen_ld16u_i32(var, cpu_env, offset);
1209 break;
1210 case MO_UL:
1211 tcg_gen_ld_i32(var, cpu_env, offset);
1212 break;
1213 default:
1214 g_assert_not_reached();
1218 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
1220 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1222 switch (mop) {
1223 case MO_UB:
1224 tcg_gen_ld8u_i64(var, cpu_env, offset);
1225 break;
1226 case MO_UW:
1227 tcg_gen_ld16u_i64(var, cpu_env, offset);
1228 break;
1229 case MO_UL:
1230 tcg_gen_ld32u_i64(var, cpu_env, offset);
1231 break;
1232 case MO_Q:
1233 tcg_gen_ld_i64(var, cpu_env, offset);
1234 break;
1235 default:
1236 g_assert_not_reached();
1240 static void neon_store_reg(int reg, int pass, TCGv_i32 var)
1242 tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1243 tcg_temp_free_i32(var);
1246 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
1248 long offset = neon_element_offset(reg, ele, size);
1250 switch (size) {
1251 case MO_8:
1252 tcg_gen_st8_i32(var, cpu_env, offset);
1253 break;
1254 case MO_16:
1255 tcg_gen_st16_i32(var, cpu_env, offset);
1256 break;
1257 case MO_32:
1258 tcg_gen_st_i32(var, cpu_env, offset);
1259 break;
1260 default:
1261 g_assert_not_reached();
1265 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
1267 long offset = neon_element_offset(reg, ele, size);
1269 switch (size) {
1270 case MO_8:
1271 tcg_gen_st8_i64(var, cpu_env, offset);
1272 break;
1273 case MO_16:
1274 tcg_gen_st16_i64(var, cpu_env, offset);
1275 break;
1276 case MO_32:
1277 tcg_gen_st32_i64(var, cpu_env, offset);
1278 break;
1279 case MO_64:
1280 tcg_gen_st_i64(var, cpu_env, offset);
1281 break;
1282 default:
1283 g_assert_not_reached();
1287 static inline void neon_load_reg64(TCGv_i64 var, int reg)
1289 tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1292 static inline void neon_store_reg64(TCGv_i64 var, int reg)
1294 tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1297 static inline void neon_load_reg32(TCGv_i32 var, int reg)
1299 tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
1302 static inline void neon_store_reg32(TCGv_i32 var, int reg)
1304 tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
1307 static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
1309 TCGv_ptr ret = tcg_temp_new_ptr();
1310 tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
1311 return ret;
1314 #define ARM_CP_RW_BIT (1 << 20)
1316 /* Include the VFP and Neon decoders */
1317 #include "translate-vfp.inc.c"
1318 #include "translate-neon.inc.c"
1320 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1322 tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1325 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1327 tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1330 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1332 TCGv_i32 var = tcg_temp_new_i32();
1333 tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1334 return var;
1337 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1339 tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1340 tcg_temp_free_i32(var);
1343 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1345 iwmmxt_store_reg(cpu_M0, rn);
1348 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1350 iwmmxt_load_reg(cpu_M0, rn);
1353 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1355 iwmmxt_load_reg(cpu_V1, rn);
1356 tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1359 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1361 iwmmxt_load_reg(cpu_V1, rn);
1362 tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1365 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1367 iwmmxt_load_reg(cpu_V1, rn);
1368 tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1371 #define IWMMXT_OP(name) \
1372 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1374 iwmmxt_load_reg(cpu_V1, rn); \
1375 gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1378 #define IWMMXT_OP_ENV(name) \
1379 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1381 iwmmxt_load_reg(cpu_V1, rn); \
1382 gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1385 #define IWMMXT_OP_ENV_SIZE(name) \
1386 IWMMXT_OP_ENV(name##b) \
1387 IWMMXT_OP_ENV(name##w) \
1388 IWMMXT_OP_ENV(name##l)
1390 #define IWMMXT_OP_ENV1(name) \
1391 static inline void gen_op_iwmmxt_##name##_M0(void) \
1393 gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1396 IWMMXT_OP(maddsq)
1397 IWMMXT_OP(madduq)
1398 IWMMXT_OP(sadb)
1399 IWMMXT_OP(sadw)
1400 IWMMXT_OP(mulslw)
1401 IWMMXT_OP(mulshw)
1402 IWMMXT_OP(mululw)
1403 IWMMXT_OP(muluhw)
1404 IWMMXT_OP(macsw)
1405 IWMMXT_OP(macuw)
1407 IWMMXT_OP_ENV_SIZE(unpackl)
1408 IWMMXT_OP_ENV_SIZE(unpackh)
1410 IWMMXT_OP_ENV1(unpacklub)
1411 IWMMXT_OP_ENV1(unpackluw)
1412 IWMMXT_OP_ENV1(unpacklul)
1413 IWMMXT_OP_ENV1(unpackhub)
1414 IWMMXT_OP_ENV1(unpackhuw)
1415 IWMMXT_OP_ENV1(unpackhul)
1416 IWMMXT_OP_ENV1(unpacklsb)
1417 IWMMXT_OP_ENV1(unpacklsw)
1418 IWMMXT_OP_ENV1(unpacklsl)
1419 IWMMXT_OP_ENV1(unpackhsb)
1420 IWMMXT_OP_ENV1(unpackhsw)
1421 IWMMXT_OP_ENV1(unpackhsl)
1423 IWMMXT_OP_ENV_SIZE(cmpeq)
1424 IWMMXT_OP_ENV_SIZE(cmpgtu)
1425 IWMMXT_OP_ENV_SIZE(cmpgts)
1427 IWMMXT_OP_ENV_SIZE(mins)
1428 IWMMXT_OP_ENV_SIZE(minu)
1429 IWMMXT_OP_ENV_SIZE(maxs)
1430 IWMMXT_OP_ENV_SIZE(maxu)
1432 IWMMXT_OP_ENV_SIZE(subn)
1433 IWMMXT_OP_ENV_SIZE(addn)
1434 IWMMXT_OP_ENV_SIZE(subu)
1435 IWMMXT_OP_ENV_SIZE(addu)
1436 IWMMXT_OP_ENV_SIZE(subs)
1437 IWMMXT_OP_ENV_SIZE(adds)
1439 IWMMXT_OP_ENV(avgb0)
1440 IWMMXT_OP_ENV(avgb1)
1441 IWMMXT_OP_ENV(avgw0)
1442 IWMMXT_OP_ENV(avgw1)
1444 IWMMXT_OP_ENV(packuw)
1445 IWMMXT_OP_ENV(packul)
1446 IWMMXT_OP_ENV(packuq)
1447 IWMMXT_OP_ENV(packsw)
1448 IWMMXT_OP_ENV(packsl)
1449 IWMMXT_OP_ENV(packsq)
1451 static void gen_op_iwmmxt_set_mup(void)
1453 TCGv_i32 tmp;
1454 tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1455 tcg_gen_ori_i32(tmp, tmp, 2);
1456 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1459 static void gen_op_iwmmxt_set_cup(void)
1461 TCGv_i32 tmp;
1462 tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1463 tcg_gen_ori_i32(tmp, tmp, 1);
1464 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1467 static void gen_op_iwmmxt_setpsr_nz(void)
1469 TCGv_i32 tmp = tcg_temp_new_i32();
1470 gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1471 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1474 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1476 iwmmxt_load_reg(cpu_V1, rn);
1477 tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1478 tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1481 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1482 TCGv_i32 dest)
1484 int rd;
1485 uint32_t offset;
1486 TCGv_i32 tmp;
1488 rd = (insn >> 16) & 0xf;
1489 tmp = load_reg(s, rd);
1491 offset = (insn & 0xff) << ((insn >> 7) & 2);
1492 if (insn & (1 << 24)) {
1493 /* Pre indexed */
1494 if (insn & (1 << 23))
1495 tcg_gen_addi_i32(tmp, tmp, offset);
1496 else
1497 tcg_gen_addi_i32(tmp, tmp, -offset);
1498 tcg_gen_mov_i32(dest, tmp);
1499 if (insn & (1 << 21))
1500 store_reg(s, rd, tmp);
1501 else
1502 tcg_temp_free_i32(tmp);
1503 } else if (insn & (1 << 21)) {
1504 /* Post indexed */
1505 tcg_gen_mov_i32(dest, tmp);
1506 if (insn & (1 << 23))
1507 tcg_gen_addi_i32(tmp, tmp, offset);
1508 else
1509 tcg_gen_addi_i32(tmp, tmp, -offset);
1510 store_reg(s, rd, tmp);
1511 } else if (!(insn & (1 << 23)))
1512 return 1;
1513 return 0;
1516 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1518 int rd = (insn >> 0) & 0xf;
1519 TCGv_i32 tmp;
1521 if (insn & (1 << 8)) {
1522 if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1523 return 1;
1524 } else {
1525 tmp = iwmmxt_load_creg(rd);
1527 } else {
1528 tmp = tcg_temp_new_i32();
1529 iwmmxt_load_reg(cpu_V0, rd);
1530 tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1532 tcg_gen_andi_i32(tmp, tmp, mask);
1533 tcg_gen_mov_i32(dest, tmp);
1534 tcg_temp_free_i32(tmp);
1535 return 0;
1538 /* Disassemble an iwMMXt instruction. Returns nonzero if an error occurred
1539 (ie. an undefined instruction). */
1540 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1542 int rd, wrd;
1543 int rdhi, rdlo, rd0, rd1, i;
1544 TCGv_i32 addr;
1545 TCGv_i32 tmp, tmp2, tmp3;
1547 if ((insn & 0x0e000e00) == 0x0c000000) {
1548 if ((insn & 0x0fe00ff0) == 0x0c400000) {
1549 wrd = insn & 0xf;
1550 rdlo = (insn >> 12) & 0xf;
1551 rdhi = (insn >> 16) & 0xf;
1552 if (insn & ARM_CP_RW_BIT) { /* TMRRC */
1553 iwmmxt_load_reg(cpu_V0, wrd);
1554 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1555 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1556 } else { /* TMCRR */
1557 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1558 iwmmxt_store_reg(cpu_V0, wrd);
1559 gen_op_iwmmxt_set_mup();
1561 return 0;
1564 wrd = (insn >> 12) & 0xf;
1565 addr = tcg_temp_new_i32();
1566 if (gen_iwmmxt_address(s, insn, addr)) {
1567 tcg_temp_free_i32(addr);
1568 return 1;
1570 if (insn & ARM_CP_RW_BIT) {
1571 if ((insn >> 28) == 0xf) { /* WLDRW wCx */
1572 tmp = tcg_temp_new_i32();
1573 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1574 iwmmxt_store_creg(wrd, tmp);
1575 } else {
1576 i = 1;
1577 if (insn & (1 << 8)) {
1578 if (insn & (1 << 22)) { /* WLDRD */
1579 gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1580 i = 0;
1581 } else { /* WLDRW wRd */
1582 tmp = tcg_temp_new_i32();
1583 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1585 } else {
1586 tmp = tcg_temp_new_i32();
1587 if (insn & (1 << 22)) { /* WLDRH */
1588 gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1589 } else { /* WLDRB */
1590 gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1593 if (i) {
1594 tcg_gen_extu_i32_i64(cpu_M0, tmp);
1595 tcg_temp_free_i32(tmp);
1597 gen_op_iwmmxt_movq_wRn_M0(wrd);
1599 } else {
1600 if ((insn >> 28) == 0xf) { /* WSTRW wCx */
1601 tmp = iwmmxt_load_creg(wrd);
1602 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1603 } else {
1604 gen_op_iwmmxt_movq_M0_wRn(wrd);
1605 tmp = tcg_temp_new_i32();
1606 if (insn & (1 << 8)) {
1607 if (insn & (1 << 22)) { /* WSTRD */
1608 gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1609 } else { /* WSTRW wRd */
1610 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1611 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1613 } else {
1614 if (insn & (1 << 22)) { /* WSTRH */
1615 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1616 gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1617 } else { /* WSTRB */
1618 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1619 gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1623 tcg_temp_free_i32(tmp);
1625 tcg_temp_free_i32(addr);
1626 return 0;
1629 if ((insn & 0x0f000000) != 0x0e000000)
1630 return 1;
1632 switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1633 case 0x000: /* WOR */
1634 wrd = (insn >> 12) & 0xf;
1635 rd0 = (insn >> 0) & 0xf;
1636 rd1 = (insn >> 16) & 0xf;
1637 gen_op_iwmmxt_movq_M0_wRn(rd0);
1638 gen_op_iwmmxt_orq_M0_wRn(rd1);
1639 gen_op_iwmmxt_setpsr_nz();
1640 gen_op_iwmmxt_movq_wRn_M0(wrd);
1641 gen_op_iwmmxt_set_mup();
1642 gen_op_iwmmxt_set_cup();
1643 break;
1644 case 0x011: /* TMCR */
1645 if (insn & 0xf)
1646 return 1;
1647 rd = (insn >> 12) & 0xf;
1648 wrd = (insn >> 16) & 0xf;
1649 switch (wrd) {
1650 case ARM_IWMMXT_wCID:
1651 case ARM_IWMMXT_wCASF:
1652 break;
1653 case ARM_IWMMXT_wCon:
1654 gen_op_iwmmxt_set_cup();
1655 /* Fall through. */
1656 case ARM_IWMMXT_wCSSF:
1657 tmp = iwmmxt_load_creg(wrd);
1658 tmp2 = load_reg(s, rd);
1659 tcg_gen_andc_i32(tmp, tmp, tmp2);
1660 tcg_temp_free_i32(tmp2);
1661 iwmmxt_store_creg(wrd, tmp);
1662 break;
1663 case ARM_IWMMXT_wCGR0:
1664 case ARM_IWMMXT_wCGR1:
1665 case ARM_IWMMXT_wCGR2:
1666 case ARM_IWMMXT_wCGR3:
1667 gen_op_iwmmxt_set_cup();
1668 tmp = load_reg(s, rd);
1669 iwmmxt_store_creg(wrd, tmp);
1670 break;
1671 default:
1672 return 1;
1674 break;
1675 case 0x100: /* WXOR */
1676 wrd = (insn >> 12) & 0xf;
1677 rd0 = (insn >> 0) & 0xf;
1678 rd1 = (insn >> 16) & 0xf;
1679 gen_op_iwmmxt_movq_M0_wRn(rd0);
1680 gen_op_iwmmxt_xorq_M0_wRn(rd1);
1681 gen_op_iwmmxt_setpsr_nz();
1682 gen_op_iwmmxt_movq_wRn_M0(wrd);
1683 gen_op_iwmmxt_set_mup();
1684 gen_op_iwmmxt_set_cup();
1685 break;
1686 case 0x111: /* TMRC */
1687 if (insn & 0xf)
1688 return 1;
1689 rd = (insn >> 12) & 0xf;
1690 wrd = (insn >> 16) & 0xf;
1691 tmp = iwmmxt_load_creg(wrd);
1692 store_reg(s, rd, tmp);
1693 break;
1694 case 0x300: /* WANDN */
1695 wrd = (insn >> 12) & 0xf;
1696 rd0 = (insn >> 0) & 0xf;
1697 rd1 = (insn >> 16) & 0xf;
1698 gen_op_iwmmxt_movq_M0_wRn(rd0);
1699 tcg_gen_neg_i64(cpu_M0, cpu_M0);
1700 gen_op_iwmmxt_andq_M0_wRn(rd1);
1701 gen_op_iwmmxt_setpsr_nz();
1702 gen_op_iwmmxt_movq_wRn_M0(wrd);
1703 gen_op_iwmmxt_set_mup();
1704 gen_op_iwmmxt_set_cup();
1705 break;
1706 case 0x200: /* WAND */
1707 wrd = (insn >> 12) & 0xf;
1708 rd0 = (insn >> 0) & 0xf;
1709 rd1 = (insn >> 16) & 0xf;
1710 gen_op_iwmmxt_movq_M0_wRn(rd0);
1711 gen_op_iwmmxt_andq_M0_wRn(rd1);
1712 gen_op_iwmmxt_setpsr_nz();
1713 gen_op_iwmmxt_movq_wRn_M0(wrd);
1714 gen_op_iwmmxt_set_mup();
1715 gen_op_iwmmxt_set_cup();
1716 break;
1717 case 0x810: case 0xa10: /* WMADD */
1718 wrd = (insn >> 12) & 0xf;
1719 rd0 = (insn >> 0) & 0xf;
1720 rd1 = (insn >> 16) & 0xf;
1721 gen_op_iwmmxt_movq_M0_wRn(rd0);
1722 if (insn & (1 << 21))
1723 gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1724 else
1725 gen_op_iwmmxt_madduq_M0_wRn(rd1);
1726 gen_op_iwmmxt_movq_wRn_M0(wrd);
1727 gen_op_iwmmxt_set_mup();
1728 break;
1729 case 0x10e: case 0x50e: case 0x90e: case 0xd0e: /* WUNPCKIL */
1730 wrd = (insn >> 12) & 0xf;
1731 rd0 = (insn >> 16) & 0xf;
1732 rd1 = (insn >> 0) & 0xf;
1733 gen_op_iwmmxt_movq_M0_wRn(rd0);
1734 switch ((insn >> 22) & 3) {
1735 case 0:
1736 gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1737 break;
1738 case 1:
1739 gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1740 break;
1741 case 2:
1742 gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1743 break;
1744 case 3:
1745 return 1;
1747 gen_op_iwmmxt_movq_wRn_M0(wrd);
1748 gen_op_iwmmxt_set_mup();
1749 gen_op_iwmmxt_set_cup();
1750 break;
1751 case 0x10c: case 0x50c: case 0x90c: case 0xd0c: /* WUNPCKIH */
1752 wrd = (insn >> 12) & 0xf;
1753 rd0 = (insn >> 16) & 0xf;
1754 rd1 = (insn >> 0) & 0xf;
1755 gen_op_iwmmxt_movq_M0_wRn(rd0);
1756 switch ((insn >> 22) & 3) {
1757 case 0:
1758 gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1759 break;
1760 case 1:
1761 gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1762 break;
1763 case 2:
1764 gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1765 break;
1766 case 3:
1767 return 1;
1769 gen_op_iwmmxt_movq_wRn_M0(wrd);
1770 gen_op_iwmmxt_set_mup();
1771 gen_op_iwmmxt_set_cup();
1772 break;
1773 case 0x012: case 0x112: case 0x412: case 0x512: /* WSAD */
1774 wrd = (insn >> 12) & 0xf;
1775 rd0 = (insn >> 16) & 0xf;
1776 rd1 = (insn >> 0) & 0xf;
1777 gen_op_iwmmxt_movq_M0_wRn(rd0);
1778 if (insn & (1 << 22))
1779 gen_op_iwmmxt_sadw_M0_wRn(rd1);
1780 else
1781 gen_op_iwmmxt_sadb_M0_wRn(rd1);
1782 if (!(insn & (1 << 20)))
1783 gen_op_iwmmxt_addl_M0_wRn(wrd);
1784 gen_op_iwmmxt_movq_wRn_M0(wrd);
1785 gen_op_iwmmxt_set_mup();
1786 break;
1787 case 0x010: case 0x110: case 0x210: case 0x310: /* WMUL */
1788 wrd = (insn >> 12) & 0xf;
1789 rd0 = (insn >> 16) & 0xf;
1790 rd1 = (insn >> 0) & 0xf;
1791 gen_op_iwmmxt_movq_M0_wRn(rd0);
1792 if (insn & (1 << 21)) {
1793 if (insn & (1 << 20))
1794 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1795 else
1796 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1797 } else {
1798 if (insn & (1 << 20))
1799 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1800 else
1801 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1803 gen_op_iwmmxt_movq_wRn_M0(wrd);
1804 gen_op_iwmmxt_set_mup();
1805 break;
1806 case 0x410: case 0x510: case 0x610: case 0x710: /* WMAC */
1807 wrd = (insn >> 12) & 0xf;
1808 rd0 = (insn >> 16) & 0xf;
1809 rd1 = (insn >> 0) & 0xf;
1810 gen_op_iwmmxt_movq_M0_wRn(rd0);
1811 if (insn & (1 << 21))
1812 gen_op_iwmmxt_macsw_M0_wRn(rd1);
1813 else
1814 gen_op_iwmmxt_macuw_M0_wRn(rd1);
1815 if (!(insn & (1 << 20))) {
1816 iwmmxt_load_reg(cpu_V1, wrd);
1817 tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1819 gen_op_iwmmxt_movq_wRn_M0(wrd);
1820 gen_op_iwmmxt_set_mup();
1821 break;
1822 case 0x006: case 0x406: case 0x806: case 0xc06: /* WCMPEQ */
1823 wrd = (insn >> 12) & 0xf;
1824 rd0 = (insn >> 16) & 0xf;
1825 rd1 = (insn >> 0) & 0xf;
1826 gen_op_iwmmxt_movq_M0_wRn(rd0);
1827 switch ((insn >> 22) & 3) {
1828 case 0:
1829 gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1830 break;
1831 case 1:
1832 gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1833 break;
1834 case 2:
1835 gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1836 break;
1837 case 3:
1838 return 1;
1840 gen_op_iwmmxt_movq_wRn_M0(wrd);
1841 gen_op_iwmmxt_set_mup();
1842 gen_op_iwmmxt_set_cup();
1843 break;
1844 case 0x800: case 0x900: case 0xc00: case 0xd00: /* WAVG2 */
1845 wrd = (insn >> 12) & 0xf;
1846 rd0 = (insn >> 16) & 0xf;
1847 rd1 = (insn >> 0) & 0xf;
1848 gen_op_iwmmxt_movq_M0_wRn(rd0);
1849 if (insn & (1 << 22)) {
1850 if (insn & (1 << 20))
1851 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1852 else
1853 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1854 } else {
1855 if (insn & (1 << 20))
1856 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1857 else
1858 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1860 gen_op_iwmmxt_movq_wRn_M0(wrd);
1861 gen_op_iwmmxt_set_mup();
1862 gen_op_iwmmxt_set_cup();
1863 break;
1864 case 0x802: case 0x902: case 0xa02: case 0xb02: /* WALIGNR */
1865 wrd = (insn >> 12) & 0xf;
1866 rd0 = (insn >> 16) & 0xf;
1867 rd1 = (insn >> 0) & 0xf;
1868 gen_op_iwmmxt_movq_M0_wRn(rd0);
1869 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1870 tcg_gen_andi_i32(tmp, tmp, 7);
1871 iwmmxt_load_reg(cpu_V1, rd1);
1872 gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1873 tcg_temp_free_i32(tmp);
1874 gen_op_iwmmxt_movq_wRn_M0(wrd);
1875 gen_op_iwmmxt_set_mup();
1876 break;
1877 case 0x601: case 0x605: case 0x609: case 0x60d: /* TINSR */
1878 if (((insn >> 6) & 3) == 3)
1879 return 1;
1880 rd = (insn >> 12) & 0xf;
1881 wrd = (insn >> 16) & 0xf;
1882 tmp = load_reg(s, rd);
1883 gen_op_iwmmxt_movq_M0_wRn(wrd);
1884 switch ((insn >> 6) & 3) {
1885 case 0:
1886 tmp2 = tcg_const_i32(0xff);
1887 tmp3 = tcg_const_i32((insn & 7) << 3);
1888 break;
1889 case 1:
1890 tmp2 = tcg_const_i32(0xffff);
1891 tmp3 = tcg_const_i32((insn & 3) << 4);
1892 break;
1893 case 2:
1894 tmp2 = tcg_const_i32(0xffffffff);
1895 tmp3 = tcg_const_i32((insn & 1) << 5);
1896 break;
1897 default:
1898 tmp2 = NULL;
1899 tmp3 = NULL;
1901 gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1902 tcg_temp_free_i32(tmp3);
1903 tcg_temp_free_i32(tmp2);
1904 tcg_temp_free_i32(tmp);
1905 gen_op_iwmmxt_movq_wRn_M0(wrd);
1906 gen_op_iwmmxt_set_mup();
1907 break;
1908 case 0x107: case 0x507: case 0x907: case 0xd07: /* TEXTRM */
1909 rd = (insn >> 12) & 0xf;
1910 wrd = (insn >> 16) & 0xf;
1911 if (rd == 15 || ((insn >> 22) & 3) == 3)
1912 return 1;
1913 gen_op_iwmmxt_movq_M0_wRn(wrd);
1914 tmp = tcg_temp_new_i32();
1915 switch ((insn >> 22) & 3) {
1916 case 0:
1917 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1918 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1919 if (insn & 8) {
1920 tcg_gen_ext8s_i32(tmp, tmp);
1921 } else {
1922 tcg_gen_andi_i32(tmp, tmp, 0xff);
1924 break;
1925 case 1:
1926 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1927 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1928 if (insn & 8) {
1929 tcg_gen_ext16s_i32(tmp, tmp);
1930 } else {
1931 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1933 break;
1934 case 2:
1935 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1936 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1937 break;
1939 store_reg(s, rd, tmp);
1940 break;
1941 case 0x117: case 0x517: case 0x917: case 0xd17: /* TEXTRC */
1942 if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1943 return 1;
1944 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1945 switch ((insn >> 22) & 3) {
1946 case 0:
1947 tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1948 break;
1949 case 1:
1950 tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1951 break;
1952 case 2:
1953 tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1954 break;
1956 tcg_gen_shli_i32(tmp, tmp, 28);
1957 gen_set_nzcv(tmp);
1958 tcg_temp_free_i32(tmp);
1959 break;
1960 case 0x401: case 0x405: case 0x409: case 0x40d: /* TBCST */
1961 if (((insn >> 6) & 3) == 3)
1962 return 1;
1963 rd = (insn >> 12) & 0xf;
1964 wrd = (insn >> 16) & 0xf;
1965 tmp = load_reg(s, rd);
1966 switch ((insn >> 6) & 3) {
1967 case 0:
1968 gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1969 break;
1970 case 1:
1971 gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1972 break;
1973 case 2:
1974 gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1975 break;
1977 tcg_temp_free_i32(tmp);
1978 gen_op_iwmmxt_movq_wRn_M0(wrd);
1979 gen_op_iwmmxt_set_mup();
1980 break;
1981 case 0x113: case 0x513: case 0x913: case 0xd13: /* TANDC */
1982 if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1983 return 1;
1984 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1985 tmp2 = tcg_temp_new_i32();
1986 tcg_gen_mov_i32(tmp2, tmp);
1987 switch ((insn >> 22) & 3) {
1988 case 0:
1989 for (i = 0; i < 7; i ++) {
1990 tcg_gen_shli_i32(tmp2, tmp2, 4);
1991 tcg_gen_and_i32(tmp, tmp, tmp2);
1993 break;
1994 case 1:
1995 for (i = 0; i < 3; i ++) {
1996 tcg_gen_shli_i32(tmp2, tmp2, 8);
1997 tcg_gen_and_i32(tmp, tmp, tmp2);
1999 break;
2000 case 2:
2001 tcg_gen_shli_i32(tmp2, tmp2, 16);
2002 tcg_gen_and_i32(tmp, tmp, tmp2);
2003 break;
2005 gen_set_nzcv(tmp);
2006 tcg_temp_free_i32(tmp2);
2007 tcg_temp_free_i32(tmp);
2008 break;
2009 case 0x01c: case 0x41c: case 0x81c: case 0xc1c: /* WACC */
2010 wrd = (insn >> 12) & 0xf;
2011 rd0 = (insn >> 16) & 0xf;
2012 gen_op_iwmmxt_movq_M0_wRn(rd0);
2013 switch ((insn >> 22) & 3) {
2014 case 0:
2015 gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
2016 break;
2017 case 1:
2018 gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2019 break;
2020 case 2:
2021 gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2022 break;
2023 case 3:
2024 return 1;
2026 gen_op_iwmmxt_movq_wRn_M0(wrd);
2027 gen_op_iwmmxt_set_mup();
2028 break;
2029 case 0x115: case 0x515: case 0x915: case 0xd15: /* TORC */
2030 if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2031 return 1;
2032 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2033 tmp2 = tcg_temp_new_i32();
2034 tcg_gen_mov_i32(tmp2, tmp);
2035 switch ((insn >> 22) & 3) {
2036 case 0:
2037 for (i = 0; i < 7; i ++) {
2038 tcg_gen_shli_i32(tmp2, tmp2, 4);
2039 tcg_gen_or_i32(tmp, tmp, tmp2);
2041 break;
2042 case 1:
2043 for (i = 0; i < 3; i ++) {
2044 tcg_gen_shli_i32(tmp2, tmp2, 8);
2045 tcg_gen_or_i32(tmp, tmp, tmp2);
2047 break;
2048 case 2:
2049 tcg_gen_shli_i32(tmp2, tmp2, 16);
2050 tcg_gen_or_i32(tmp, tmp, tmp2);
2051 break;
2053 gen_set_nzcv(tmp);
2054 tcg_temp_free_i32(tmp2);
2055 tcg_temp_free_i32(tmp);
2056 break;
2057 case 0x103: case 0x503: case 0x903: case 0xd03: /* TMOVMSK */
2058 rd = (insn >> 12) & 0xf;
2059 rd0 = (insn >> 16) & 0xf;
2060 if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2061 return 1;
2062 gen_op_iwmmxt_movq_M0_wRn(rd0);
2063 tmp = tcg_temp_new_i32();
2064 switch ((insn >> 22) & 3) {
2065 case 0:
2066 gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2067 break;
2068 case 1:
2069 gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2070 break;
2071 case 2:
2072 gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2073 break;
2075 store_reg(s, rd, tmp);
2076 break;
2077 case 0x106: case 0x306: case 0x506: case 0x706: /* WCMPGT */
2078 case 0x906: case 0xb06: case 0xd06: case 0xf06:
2079 wrd = (insn >> 12) & 0xf;
2080 rd0 = (insn >> 16) & 0xf;
2081 rd1 = (insn >> 0) & 0xf;
2082 gen_op_iwmmxt_movq_M0_wRn(rd0);
2083 switch ((insn >> 22) & 3) {
2084 case 0:
2085 if (insn & (1 << 21))
2086 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2087 else
2088 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2089 break;
2090 case 1:
2091 if (insn & (1 << 21))
2092 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2093 else
2094 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2095 break;
2096 case 2:
2097 if (insn & (1 << 21))
2098 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2099 else
2100 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2101 break;
2102 case 3:
2103 return 1;
2105 gen_op_iwmmxt_movq_wRn_M0(wrd);
2106 gen_op_iwmmxt_set_mup();
2107 gen_op_iwmmxt_set_cup();
2108 break;
2109 case 0x00e: case 0x20e: case 0x40e: case 0x60e: /* WUNPCKEL */
2110 case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2111 wrd = (insn >> 12) & 0xf;
2112 rd0 = (insn >> 16) & 0xf;
2113 gen_op_iwmmxt_movq_M0_wRn(rd0);
2114 switch ((insn >> 22) & 3) {
2115 case 0:
2116 if (insn & (1 << 21))
2117 gen_op_iwmmxt_unpacklsb_M0();
2118 else
2119 gen_op_iwmmxt_unpacklub_M0();
2120 break;
2121 case 1:
2122 if (insn & (1 << 21))
2123 gen_op_iwmmxt_unpacklsw_M0();
2124 else
2125 gen_op_iwmmxt_unpackluw_M0();
2126 break;
2127 case 2:
2128 if (insn & (1 << 21))
2129 gen_op_iwmmxt_unpacklsl_M0();
2130 else
2131 gen_op_iwmmxt_unpacklul_M0();
2132 break;
2133 case 3:
2134 return 1;
2136 gen_op_iwmmxt_movq_wRn_M0(wrd);
2137 gen_op_iwmmxt_set_mup();
2138 gen_op_iwmmxt_set_cup();
2139 break;
2140 case 0x00c: case 0x20c: case 0x40c: case 0x60c: /* WUNPCKEH */
2141 case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2142 wrd = (insn >> 12) & 0xf;
2143 rd0 = (insn >> 16) & 0xf;
2144 gen_op_iwmmxt_movq_M0_wRn(rd0);
2145 switch ((insn >> 22) & 3) {
2146 case 0:
2147 if (insn & (1 << 21))
2148 gen_op_iwmmxt_unpackhsb_M0();
2149 else
2150 gen_op_iwmmxt_unpackhub_M0();
2151 break;
2152 case 1:
2153 if (insn & (1 << 21))
2154 gen_op_iwmmxt_unpackhsw_M0();
2155 else
2156 gen_op_iwmmxt_unpackhuw_M0();
2157 break;
2158 case 2:
2159 if (insn & (1 << 21))
2160 gen_op_iwmmxt_unpackhsl_M0();
2161 else
2162 gen_op_iwmmxt_unpackhul_M0();
2163 break;
2164 case 3:
2165 return 1;
2167 gen_op_iwmmxt_movq_wRn_M0(wrd);
2168 gen_op_iwmmxt_set_mup();
2169 gen_op_iwmmxt_set_cup();
2170 break;
2171 case 0x204: case 0x604: case 0xa04: case 0xe04: /* WSRL */
2172 case 0x214: case 0x614: case 0xa14: case 0xe14:
2173 if (((insn >> 22) & 3) == 0)
2174 return 1;
2175 wrd = (insn >> 12) & 0xf;
2176 rd0 = (insn >> 16) & 0xf;
2177 gen_op_iwmmxt_movq_M0_wRn(rd0);
2178 tmp = tcg_temp_new_i32();
2179 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2180 tcg_temp_free_i32(tmp);
2181 return 1;
2183 switch ((insn >> 22) & 3) {
2184 case 1:
2185 gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2186 break;
2187 case 2:
2188 gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2189 break;
2190 case 3:
2191 gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2192 break;
2194 tcg_temp_free_i32(tmp);
2195 gen_op_iwmmxt_movq_wRn_M0(wrd);
2196 gen_op_iwmmxt_set_mup();
2197 gen_op_iwmmxt_set_cup();
2198 break;
2199 case 0x004: case 0x404: case 0x804: case 0xc04: /* WSRA */
2200 case 0x014: case 0x414: case 0x814: case 0xc14:
2201 if (((insn >> 22) & 3) == 0)
2202 return 1;
2203 wrd = (insn >> 12) & 0xf;
2204 rd0 = (insn >> 16) & 0xf;
2205 gen_op_iwmmxt_movq_M0_wRn(rd0);
2206 tmp = tcg_temp_new_i32();
2207 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2208 tcg_temp_free_i32(tmp);
2209 return 1;
2211 switch ((insn >> 22) & 3) {
2212 case 1:
2213 gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2214 break;
2215 case 2:
2216 gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2217 break;
2218 case 3:
2219 gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2220 break;
2222 tcg_temp_free_i32(tmp);
2223 gen_op_iwmmxt_movq_wRn_M0(wrd);
2224 gen_op_iwmmxt_set_mup();
2225 gen_op_iwmmxt_set_cup();
2226 break;
2227 case 0x104: case 0x504: case 0x904: case 0xd04: /* WSLL */
2228 case 0x114: case 0x514: case 0x914: case 0xd14:
2229 if (((insn >> 22) & 3) == 0)
2230 return 1;
2231 wrd = (insn >> 12) & 0xf;
2232 rd0 = (insn >> 16) & 0xf;
2233 gen_op_iwmmxt_movq_M0_wRn(rd0);
2234 tmp = tcg_temp_new_i32();
2235 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2236 tcg_temp_free_i32(tmp);
2237 return 1;
2239 switch ((insn >> 22) & 3) {
2240 case 1:
2241 gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2242 break;
2243 case 2:
2244 gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2245 break;
2246 case 3:
2247 gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2248 break;
2250 tcg_temp_free_i32(tmp);
2251 gen_op_iwmmxt_movq_wRn_M0(wrd);
2252 gen_op_iwmmxt_set_mup();
2253 gen_op_iwmmxt_set_cup();
2254 break;
2255 case 0x304: case 0x704: case 0xb04: case 0xf04: /* WROR */
2256 case 0x314: case 0x714: case 0xb14: case 0xf14:
2257 if (((insn >> 22) & 3) == 0)
2258 return 1;
2259 wrd = (insn >> 12) & 0xf;
2260 rd0 = (insn >> 16) & 0xf;
2261 gen_op_iwmmxt_movq_M0_wRn(rd0);
2262 tmp = tcg_temp_new_i32();
2263 switch ((insn >> 22) & 3) {
2264 case 1:
2265 if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2266 tcg_temp_free_i32(tmp);
2267 return 1;
2269 gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2270 break;
2271 case 2:
2272 if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2273 tcg_temp_free_i32(tmp);
2274 return 1;
2276 gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2277 break;
2278 case 3:
2279 if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2280 tcg_temp_free_i32(tmp);
2281 return 1;
2283 gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2284 break;
2286 tcg_temp_free_i32(tmp);
2287 gen_op_iwmmxt_movq_wRn_M0(wrd);
2288 gen_op_iwmmxt_set_mup();
2289 gen_op_iwmmxt_set_cup();
2290 break;
2291 case 0x116: case 0x316: case 0x516: case 0x716: /* WMIN */
2292 case 0x916: case 0xb16: case 0xd16: case 0xf16:
2293 wrd = (insn >> 12) & 0xf;
2294 rd0 = (insn >> 16) & 0xf;
2295 rd1 = (insn >> 0) & 0xf;
2296 gen_op_iwmmxt_movq_M0_wRn(rd0);
2297 switch ((insn >> 22) & 3) {
2298 case 0:
2299 if (insn & (1 << 21))
2300 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2301 else
2302 gen_op_iwmmxt_minub_M0_wRn(rd1);
2303 break;
2304 case 1:
2305 if (insn & (1 << 21))
2306 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2307 else
2308 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2309 break;
2310 case 2:
2311 if (insn & (1 << 21))
2312 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2313 else
2314 gen_op_iwmmxt_minul_M0_wRn(rd1);
2315 break;
2316 case 3:
2317 return 1;
2319 gen_op_iwmmxt_movq_wRn_M0(wrd);
2320 gen_op_iwmmxt_set_mup();
2321 break;
2322 case 0x016: case 0x216: case 0x416: case 0x616: /* WMAX */
2323 case 0x816: case 0xa16: case 0xc16: case 0xe16:
2324 wrd = (insn >> 12) & 0xf;
2325 rd0 = (insn >> 16) & 0xf;
2326 rd1 = (insn >> 0) & 0xf;
2327 gen_op_iwmmxt_movq_M0_wRn(rd0);
2328 switch ((insn >> 22) & 3) {
2329 case 0:
2330 if (insn & (1 << 21))
2331 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2332 else
2333 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2334 break;
2335 case 1:
2336 if (insn & (1 << 21))
2337 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2338 else
2339 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2340 break;
2341 case 2:
2342 if (insn & (1 << 21))
2343 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2344 else
2345 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2346 break;
2347 case 3:
2348 return 1;
2350 gen_op_iwmmxt_movq_wRn_M0(wrd);
2351 gen_op_iwmmxt_set_mup();
2352 break;
2353 case 0x002: case 0x102: case 0x202: case 0x302: /* WALIGNI */
2354 case 0x402: case 0x502: case 0x602: case 0x702:
2355 wrd = (insn >> 12) & 0xf;
2356 rd0 = (insn >> 16) & 0xf;
2357 rd1 = (insn >> 0) & 0xf;
2358 gen_op_iwmmxt_movq_M0_wRn(rd0);
2359 tmp = tcg_const_i32((insn >> 20) & 3);
2360 iwmmxt_load_reg(cpu_V1, rd1);
2361 gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2362 tcg_temp_free_i32(tmp);
2363 gen_op_iwmmxt_movq_wRn_M0(wrd);
2364 gen_op_iwmmxt_set_mup();
2365 break;
2366 case 0x01a: case 0x11a: case 0x21a: case 0x31a: /* WSUB */
2367 case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2368 case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2369 case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2370 wrd = (insn >> 12) & 0xf;
2371 rd0 = (insn >> 16) & 0xf;
2372 rd1 = (insn >> 0) & 0xf;
2373 gen_op_iwmmxt_movq_M0_wRn(rd0);
2374 switch ((insn >> 20) & 0xf) {
2375 case 0x0:
2376 gen_op_iwmmxt_subnb_M0_wRn(rd1);
2377 break;
2378 case 0x1:
2379 gen_op_iwmmxt_subub_M0_wRn(rd1);
2380 break;
2381 case 0x3:
2382 gen_op_iwmmxt_subsb_M0_wRn(rd1);
2383 break;
2384 case 0x4:
2385 gen_op_iwmmxt_subnw_M0_wRn(rd1);
2386 break;
2387 case 0x5:
2388 gen_op_iwmmxt_subuw_M0_wRn(rd1);
2389 break;
2390 case 0x7:
2391 gen_op_iwmmxt_subsw_M0_wRn(rd1);
2392 break;
2393 case 0x8:
2394 gen_op_iwmmxt_subnl_M0_wRn(rd1);
2395 break;
2396 case 0x9:
2397 gen_op_iwmmxt_subul_M0_wRn(rd1);
2398 break;
2399 case 0xb:
2400 gen_op_iwmmxt_subsl_M0_wRn(rd1);
2401 break;
2402 default:
2403 return 1;
2405 gen_op_iwmmxt_movq_wRn_M0(wrd);
2406 gen_op_iwmmxt_set_mup();
2407 gen_op_iwmmxt_set_cup();
2408 break;
2409 case 0x01e: case 0x11e: case 0x21e: case 0x31e: /* WSHUFH */
2410 case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2411 case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2412 case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2413 wrd = (insn >> 12) & 0xf;
2414 rd0 = (insn >> 16) & 0xf;
2415 gen_op_iwmmxt_movq_M0_wRn(rd0);
2416 tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2417 gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2418 tcg_temp_free_i32(tmp);
2419 gen_op_iwmmxt_movq_wRn_M0(wrd);
2420 gen_op_iwmmxt_set_mup();
2421 gen_op_iwmmxt_set_cup();
2422 break;
2423 case 0x018: case 0x118: case 0x218: case 0x318: /* WADD */
2424 case 0x418: case 0x518: case 0x618: case 0x718:
2425 case 0x818: case 0x918: case 0xa18: case 0xb18:
2426 case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2427 wrd = (insn >> 12) & 0xf;
2428 rd0 = (insn >> 16) & 0xf;
2429 rd1 = (insn >> 0) & 0xf;
2430 gen_op_iwmmxt_movq_M0_wRn(rd0);
2431 switch ((insn >> 20) & 0xf) {
2432 case 0x0:
2433 gen_op_iwmmxt_addnb_M0_wRn(rd1);
2434 break;
2435 case 0x1:
2436 gen_op_iwmmxt_addub_M0_wRn(rd1);
2437 break;
2438 case 0x3:
2439 gen_op_iwmmxt_addsb_M0_wRn(rd1);
2440 break;
2441 case 0x4:
2442 gen_op_iwmmxt_addnw_M0_wRn(rd1);
2443 break;
2444 case 0x5:
2445 gen_op_iwmmxt_adduw_M0_wRn(rd1);
2446 break;
2447 case 0x7:
2448 gen_op_iwmmxt_addsw_M0_wRn(rd1);
2449 break;
2450 case 0x8:
2451 gen_op_iwmmxt_addnl_M0_wRn(rd1);
2452 break;
2453 case 0x9:
2454 gen_op_iwmmxt_addul_M0_wRn(rd1);
2455 break;
2456 case 0xb:
2457 gen_op_iwmmxt_addsl_M0_wRn(rd1);
2458 break;
2459 default:
2460 return 1;
2462 gen_op_iwmmxt_movq_wRn_M0(wrd);
2463 gen_op_iwmmxt_set_mup();
2464 gen_op_iwmmxt_set_cup();
2465 break;
2466 case 0x008: case 0x108: case 0x208: case 0x308: /* WPACK */
2467 case 0x408: case 0x508: case 0x608: case 0x708:
2468 case 0x808: case 0x908: case 0xa08: case 0xb08:
2469 case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2470 if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2471 return 1;
2472 wrd = (insn >> 12) & 0xf;
2473 rd0 = (insn >> 16) & 0xf;
2474 rd1 = (insn >> 0) & 0xf;
2475 gen_op_iwmmxt_movq_M0_wRn(rd0);
2476 switch ((insn >> 22) & 3) {
2477 case 1:
2478 if (insn & (1 << 21))
2479 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2480 else
2481 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2482 break;
2483 case 2:
2484 if (insn & (1 << 21))
2485 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2486 else
2487 gen_op_iwmmxt_packul_M0_wRn(rd1);
2488 break;
2489 case 3:
2490 if (insn & (1 << 21))
2491 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2492 else
2493 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2494 break;
2496 gen_op_iwmmxt_movq_wRn_M0(wrd);
2497 gen_op_iwmmxt_set_mup();
2498 gen_op_iwmmxt_set_cup();
2499 break;
2500 case 0x201: case 0x203: case 0x205: case 0x207:
2501 case 0x209: case 0x20b: case 0x20d: case 0x20f:
2502 case 0x211: case 0x213: case 0x215: case 0x217:
2503 case 0x219: case 0x21b: case 0x21d: case 0x21f:
2504 wrd = (insn >> 5) & 0xf;
2505 rd0 = (insn >> 12) & 0xf;
2506 rd1 = (insn >> 0) & 0xf;
2507 if (rd0 == 0xf || rd1 == 0xf)
2508 return 1;
2509 gen_op_iwmmxt_movq_M0_wRn(wrd);
2510 tmp = load_reg(s, rd0);
2511 tmp2 = load_reg(s, rd1);
2512 switch ((insn >> 16) & 0xf) {
2513 case 0x0: /* TMIA */
2514 gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2515 break;
2516 case 0x8: /* TMIAPH */
2517 gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2518 break;
2519 case 0xc: case 0xd: case 0xe: case 0xf: /* TMIAxy */
2520 if (insn & (1 << 16))
2521 tcg_gen_shri_i32(tmp, tmp, 16);
2522 if (insn & (1 << 17))
2523 tcg_gen_shri_i32(tmp2, tmp2, 16);
2524 gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2525 break;
2526 default:
2527 tcg_temp_free_i32(tmp2);
2528 tcg_temp_free_i32(tmp);
2529 return 1;
2531 tcg_temp_free_i32(tmp2);
2532 tcg_temp_free_i32(tmp);
2533 gen_op_iwmmxt_movq_wRn_M0(wrd);
2534 gen_op_iwmmxt_set_mup();
2535 break;
2536 default:
2537 return 1;
2540 return 0;
2543 /* Disassemble an XScale DSP instruction. Returns nonzero if an error occurred
2544 (ie. an undefined instruction). */
2545 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2547 int acc, rd0, rd1, rdhi, rdlo;
2548 TCGv_i32 tmp, tmp2;
2550 if ((insn & 0x0ff00f10) == 0x0e200010) {
2551 /* Multiply with Internal Accumulate Format */
2552 rd0 = (insn >> 12) & 0xf;
2553 rd1 = insn & 0xf;
2554 acc = (insn >> 5) & 7;
2556 if (acc != 0)
2557 return 1;
2559 tmp = load_reg(s, rd0);
2560 tmp2 = load_reg(s, rd1);
2561 switch ((insn >> 16) & 0xf) {
2562 case 0x0: /* MIA */
2563 gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2564 break;
2565 case 0x8: /* MIAPH */
2566 gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2567 break;
2568 case 0xc: /* MIABB */
2569 case 0xd: /* MIABT */
2570 case 0xe: /* MIATB */
2571 case 0xf: /* MIATT */
2572 if (insn & (1 << 16))
2573 tcg_gen_shri_i32(tmp, tmp, 16);
2574 if (insn & (1 << 17))
2575 tcg_gen_shri_i32(tmp2, tmp2, 16);
2576 gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2577 break;
2578 default:
2579 return 1;
2581 tcg_temp_free_i32(tmp2);
2582 tcg_temp_free_i32(tmp);
2584 gen_op_iwmmxt_movq_wRn_M0(acc);
2585 return 0;
2588 if ((insn & 0x0fe00ff8) == 0x0c400000) {
2589 /* Internal Accumulator Access Format */
2590 rdhi = (insn >> 16) & 0xf;
2591 rdlo = (insn >> 12) & 0xf;
2592 acc = insn & 7;
2594 if (acc != 0)
2595 return 1;
2597 if (insn & ARM_CP_RW_BIT) { /* MRA */
2598 iwmmxt_load_reg(cpu_V0, acc);
2599 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2600 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2601 tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2602 } else { /* MAR */
2603 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2604 iwmmxt_store_reg(cpu_V0, acc);
2606 return 0;
2609 return 1;
2612 #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2613 #define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2614 if (dc_isar_feature(aa32_simd_r32, s)) { \
2615 reg = (((insn) >> (bigbit)) & 0x0f) \
2616 | (((insn) >> ((smallbit) - 4)) & 0x10); \
2617 } else { \
2618 if (insn & (1 << (smallbit))) \
2619 return 1; \
2620 reg = ((insn) >> (bigbit)) & 0x0f; \
2621 }} while (0)
2623 #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2624 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7)
2625 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5)
2627 static void gen_neon_dup_low16(TCGv_i32 var)
2629 TCGv_i32 tmp = tcg_temp_new_i32();
2630 tcg_gen_ext16u_i32(var, var);
2631 tcg_gen_shli_i32(tmp, var, 16);
2632 tcg_gen_or_i32(var, var, tmp);
2633 tcg_temp_free_i32(tmp);
2636 static void gen_neon_dup_high16(TCGv_i32 var)
2638 TCGv_i32 tmp = tcg_temp_new_i32();
2639 tcg_gen_andi_i32(var, var, 0xffff0000);
2640 tcg_gen_shri_i32(tmp, var, 16);
2641 tcg_gen_or_i32(var, var, tmp);
2642 tcg_temp_free_i32(tmp);
2645 static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
2647 #ifndef CONFIG_USER_ONLY
2648 return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
2649 ((s->base.pc_next - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
2650 #else
2651 return true;
2652 #endif
2655 static void gen_goto_ptr(void)
2657 tcg_gen_lookup_and_goto_ptr();
2660 /* This will end the TB but doesn't guarantee we'll return to
2661 * cpu_loop_exec. Any live exit_requests will be processed as we
2662 * enter the next TB.
2664 static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
2666 if (use_goto_tb(s, dest)) {
2667 tcg_gen_goto_tb(n);
2668 gen_set_pc_im(s, dest);
2669 tcg_gen_exit_tb(s->base.tb, n);
2670 } else {
2671 gen_set_pc_im(s, dest);
2672 gen_goto_ptr();
2674 s->base.is_jmp = DISAS_NORETURN;
2677 static inline void gen_jmp (DisasContext *s, uint32_t dest)
2679 if (unlikely(is_singlestepping(s))) {
2680 /* An indirect jump so that we still trigger the debug exception. */
2681 gen_set_pc_im(s, dest);
2682 s->base.is_jmp = DISAS_JUMP;
2683 } else {
2684 gen_goto_tb(s, 0, dest);
2688 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2690 if (x)
2691 tcg_gen_sari_i32(t0, t0, 16);
2692 else
2693 gen_sxth(t0);
2694 if (y)
2695 tcg_gen_sari_i32(t1, t1, 16);
2696 else
2697 gen_sxth(t1);
2698 tcg_gen_mul_i32(t0, t0, t1);
2701 /* Return the mask of PSR bits set by a MSR instruction. */
2702 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2704 uint32_t mask = 0;
2706 if (flags & (1 << 0)) {
2707 mask |= 0xff;
2709 if (flags & (1 << 1)) {
2710 mask |= 0xff00;
2712 if (flags & (1 << 2)) {
2713 mask |= 0xff0000;
2715 if (flags & (1 << 3)) {
2716 mask |= 0xff000000;
2719 /* Mask out undefined and reserved bits. */
2720 mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2722 /* Mask out execution state. */
2723 if (!spsr) {
2724 mask &= ~CPSR_EXEC;
2727 /* Mask out privileged bits. */
2728 if (IS_USER(s)) {
2729 mask &= CPSR_USER;
2731 return mask;
2734 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2735 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2737 TCGv_i32 tmp;
2738 if (spsr) {
2739 /* ??? This is also undefined in system mode. */
2740 if (IS_USER(s))
2741 return 1;
2743 tmp = load_cpu_field(spsr);
2744 tcg_gen_andi_i32(tmp, tmp, ~mask);
2745 tcg_gen_andi_i32(t0, t0, mask);
2746 tcg_gen_or_i32(tmp, tmp, t0);
2747 store_cpu_field(tmp, spsr);
2748 } else {
2749 gen_set_cpsr(t0, mask);
2751 tcg_temp_free_i32(t0);
2752 gen_lookup_tb(s);
2753 return 0;
2756 /* Returns nonzero if access to the PSR is not permitted. */
2757 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2759 TCGv_i32 tmp;
2760 tmp = tcg_temp_new_i32();
2761 tcg_gen_movi_i32(tmp, val);
2762 return gen_set_psr(s, mask, spsr, tmp);
2765 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2766 int *tgtmode, int *regno)
2768 /* Decode the r and sysm fields of MSR/MRS banked accesses into
2769 * the target mode and register number, and identify the various
2770 * unpredictable cases.
2771 * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2772 * + executed in user mode
2773 * + using R15 as the src/dest register
2774 * + accessing an unimplemented register
2775 * + accessing a register that's inaccessible at current PL/security state*
2776 * + accessing a register that you could access with a different insn
2777 * We choose to UNDEF in all these cases.
2778 * Since we don't know which of the various AArch32 modes we are in
2779 * we have to defer some checks to runtime.
2780 * Accesses to Monitor mode registers from Secure EL1 (which implies
2781 * that EL3 is AArch64) must trap to EL3.
2783 * If the access checks fail this function will emit code to take
2784 * an exception and return false. Otherwise it will return true,
2785 * and set *tgtmode and *regno appropriately.
2787 int exc_target = default_exception_el(s);
2789 /* These instructions are present only in ARMv8, or in ARMv7 with the
2790 * Virtualization Extensions.
2792 if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2793 !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2794 goto undef;
2797 if (IS_USER(s) || rn == 15) {
2798 goto undef;
2801 /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2802 * of registers into (r, sysm).
2804 if (r) {
2805 /* SPSRs for other modes */
2806 switch (sysm) {
2807 case 0xe: /* SPSR_fiq */
2808 *tgtmode = ARM_CPU_MODE_FIQ;
2809 break;
2810 case 0x10: /* SPSR_irq */
2811 *tgtmode = ARM_CPU_MODE_IRQ;
2812 break;
2813 case 0x12: /* SPSR_svc */
2814 *tgtmode = ARM_CPU_MODE_SVC;
2815 break;
2816 case 0x14: /* SPSR_abt */
2817 *tgtmode = ARM_CPU_MODE_ABT;
2818 break;
2819 case 0x16: /* SPSR_und */
2820 *tgtmode = ARM_CPU_MODE_UND;
2821 break;
2822 case 0x1c: /* SPSR_mon */
2823 *tgtmode = ARM_CPU_MODE_MON;
2824 break;
2825 case 0x1e: /* SPSR_hyp */
2826 *tgtmode = ARM_CPU_MODE_HYP;
2827 break;
2828 default: /* unallocated */
2829 goto undef;
2831 /* We arbitrarily assign SPSR a register number of 16. */
2832 *regno = 16;
2833 } else {
2834 /* general purpose registers for other modes */
2835 switch (sysm) {
2836 case 0x0 ... 0x6: /* 0b00xxx : r8_usr ... r14_usr */
2837 *tgtmode = ARM_CPU_MODE_USR;
2838 *regno = sysm + 8;
2839 break;
2840 case 0x8 ... 0xe: /* 0b01xxx : r8_fiq ... r14_fiq */
2841 *tgtmode = ARM_CPU_MODE_FIQ;
2842 *regno = sysm;
2843 break;
2844 case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2845 *tgtmode = ARM_CPU_MODE_IRQ;
2846 *regno = sysm & 1 ? 13 : 14;
2847 break;
2848 case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2849 *tgtmode = ARM_CPU_MODE_SVC;
2850 *regno = sysm & 1 ? 13 : 14;
2851 break;
2852 case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2853 *tgtmode = ARM_CPU_MODE_ABT;
2854 *regno = sysm & 1 ? 13 : 14;
2855 break;
2856 case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2857 *tgtmode = ARM_CPU_MODE_UND;
2858 *regno = sysm & 1 ? 13 : 14;
2859 break;
2860 case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2861 *tgtmode = ARM_CPU_MODE_MON;
2862 *regno = sysm & 1 ? 13 : 14;
2863 break;
2864 case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2865 *tgtmode = ARM_CPU_MODE_HYP;
2866 /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2867 *regno = sysm & 1 ? 13 : 17;
2868 break;
2869 default: /* unallocated */
2870 goto undef;
2874 /* Catch the 'accessing inaccessible register' cases we can detect
2875 * at translate time.
2877 switch (*tgtmode) {
2878 case ARM_CPU_MODE_MON:
2879 if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2880 goto undef;
2882 if (s->current_el == 1) {
2883 /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2884 * then accesses to Mon registers trap to EL3
2886 exc_target = 3;
2887 goto undef;
2889 break;
2890 case ARM_CPU_MODE_HYP:
2892 * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2893 * (and so we can forbid accesses from EL2 or below). elr_hyp
2894 * can be accessed also from Hyp mode, so forbid accesses from
2895 * EL0 or EL1.
2897 if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2898 (s->current_el < 3 && *regno != 17)) {
2899 goto undef;
2901 break;
2902 default:
2903 break;
2906 return true;
2908 undef:
2909 /* If we get here then some access check did not pass */
2910 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
2911 syn_uncategorized(), exc_target);
2912 return false;
2915 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2917 TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2918 int tgtmode = 0, regno = 0;
2920 if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2921 return;
2924 /* Sync state because msr_banked() can raise exceptions */
2925 gen_set_condexec(s);
2926 gen_set_pc_im(s, s->pc_curr);
2927 tcg_reg = load_reg(s, rn);
2928 tcg_tgtmode = tcg_const_i32(tgtmode);
2929 tcg_regno = tcg_const_i32(regno);
2930 gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
2931 tcg_temp_free_i32(tcg_tgtmode);
2932 tcg_temp_free_i32(tcg_regno);
2933 tcg_temp_free_i32(tcg_reg);
2934 s->base.is_jmp = DISAS_UPDATE;
2937 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2939 TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2940 int tgtmode = 0, regno = 0;
2942 if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2943 return;
2946 /* Sync state because mrs_banked() can raise exceptions */
2947 gen_set_condexec(s);
2948 gen_set_pc_im(s, s->pc_curr);
2949 tcg_reg = tcg_temp_new_i32();
2950 tcg_tgtmode = tcg_const_i32(tgtmode);
2951 tcg_regno = tcg_const_i32(regno);
2952 gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
2953 tcg_temp_free_i32(tcg_tgtmode);
2954 tcg_temp_free_i32(tcg_regno);
2955 store_reg(s, rn, tcg_reg);
2956 s->base.is_jmp = DISAS_UPDATE;
2959 /* Store value to PC as for an exception return (ie don't
2960 * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2961 * will do the masking based on the new value of the Thumb bit.
2963 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2965 tcg_gen_mov_i32(cpu_R[15], pc);
2966 tcg_temp_free_i32(pc);
2969 /* Generate a v6 exception return. Marks both values as dead. */
2970 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2972 store_pc_exc_ret(s, pc);
2973 /* The cpsr_write_eret helper will mask the low bits of PC
2974 * appropriately depending on the new Thumb bit, so it must
2975 * be called after storing the new PC.
2977 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2978 gen_io_start();
2980 gen_helper_cpsr_write_eret(cpu_env, cpsr);
2981 tcg_temp_free_i32(cpsr);
2982 /* Must exit loop to check un-masked IRQs */
2983 s->base.is_jmp = DISAS_EXIT;
2986 /* Generate an old-style exception return. Marks pc as dead. */
2987 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2989 gen_rfe(s, pc, load_cpu_field(spsr));
2992 #define CPU_V001 cpu_V0, cpu_V0, cpu_V1
2994 static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
2996 switch (size) {
2997 case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
2998 case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
2999 case 2: tcg_gen_add_i32(t0, t0, t1); break;
3000 default: abort();
3004 static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
3006 switch (size) {
3007 case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
3008 case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
3009 case 2: tcg_gen_sub_i32(t0, t1, t0); break;
3010 default: return;
3014 /* 32-bit pairwise ops end up the same as the elementwise versions. */
3015 #define gen_helper_neon_pmax_s32 tcg_gen_smax_i32
3016 #define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
3017 #define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
3018 #define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
3020 #define GEN_NEON_INTEGER_OP_ENV(name) do { \
3021 switch ((size << 1) | u) { \
3022 case 0: \
3023 gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
3024 break; \
3025 case 1: \
3026 gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
3027 break; \
3028 case 2: \
3029 gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
3030 break; \
3031 case 3: \
3032 gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
3033 break; \
3034 case 4: \
3035 gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
3036 break; \
3037 case 5: \
3038 gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
3039 break; \
3040 default: return 1; \
3041 }} while (0)
3043 #define GEN_NEON_INTEGER_OP(name) do { \
3044 switch ((size << 1) | u) { \
3045 case 0: \
3046 gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
3047 break; \
3048 case 1: \
3049 gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
3050 break; \
3051 case 2: \
3052 gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
3053 break; \
3054 case 3: \
3055 gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
3056 break; \
3057 case 4: \
3058 gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
3059 break; \
3060 case 5: \
3061 gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
3062 break; \
3063 default: return 1; \
3064 }} while (0)
3066 static TCGv_i32 neon_load_scratch(int scratch)
3068 TCGv_i32 tmp = tcg_temp_new_i32();
3069 tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3070 return tmp;
3073 static void neon_store_scratch(int scratch, TCGv_i32 var)
3075 tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3076 tcg_temp_free_i32(var);
3079 static inline TCGv_i32 neon_get_scalar(int size, int reg)
3081 TCGv_i32 tmp;
3082 if (size == 1) {
3083 tmp = neon_load_reg(reg & 7, reg >> 4);
3084 if (reg & 8) {
3085 gen_neon_dup_high16(tmp);
3086 } else {
3087 gen_neon_dup_low16(tmp);
3089 } else {
3090 tmp = neon_load_reg(reg & 15, reg >> 4);
3092 return tmp;
3095 static int gen_neon_unzip(int rd, int rm, int size, int q)
3097 TCGv_ptr pd, pm;
3099 if (!q && size == 2) {
3100 return 1;
3102 pd = vfp_reg_ptr(true, rd);
3103 pm = vfp_reg_ptr(true, rm);
3104 if (q) {
3105 switch (size) {
3106 case 0:
3107 gen_helper_neon_qunzip8(pd, pm);
3108 break;
3109 case 1:
3110 gen_helper_neon_qunzip16(pd, pm);
3111 break;
3112 case 2:
3113 gen_helper_neon_qunzip32(pd, pm);
3114 break;
3115 default:
3116 abort();
3118 } else {
3119 switch (size) {
3120 case 0:
3121 gen_helper_neon_unzip8(pd, pm);
3122 break;
3123 case 1:
3124 gen_helper_neon_unzip16(pd, pm);
3125 break;
3126 default:
3127 abort();
3130 tcg_temp_free_ptr(pd);
3131 tcg_temp_free_ptr(pm);
3132 return 0;
3135 static int gen_neon_zip(int rd, int rm, int size, int q)
3137 TCGv_ptr pd, pm;
3139 if (!q && size == 2) {
3140 return 1;
3142 pd = vfp_reg_ptr(true, rd);
3143 pm = vfp_reg_ptr(true, rm);
3144 if (q) {
3145 switch (size) {
3146 case 0:
3147 gen_helper_neon_qzip8(pd, pm);
3148 break;
3149 case 1:
3150 gen_helper_neon_qzip16(pd, pm);
3151 break;
3152 case 2:
3153 gen_helper_neon_qzip32(pd, pm);
3154 break;
3155 default:
3156 abort();
3158 } else {
3159 switch (size) {
3160 case 0:
3161 gen_helper_neon_zip8(pd, pm);
3162 break;
3163 case 1:
3164 gen_helper_neon_zip16(pd, pm);
3165 break;
3166 default:
3167 abort();
3170 tcg_temp_free_ptr(pd);
3171 tcg_temp_free_ptr(pm);
3172 return 0;
3175 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3177 TCGv_i32 rd, tmp;
3179 rd = tcg_temp_new_i32();
3180 tmp = tcg_temp_new_i32();
3182 tcg_gen_shli_i32(rd, t0, 8);
3183 tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3184 tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3185 tcg_gen_or_i32(rd, rd, tmp);
3187 tcg_gen_shri_i32(t1, t1, 8);
3188 tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3189 tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3190 tcg_gen_or_i32(t1, t1, tmp);
3191 tcg_gen_mov_i32(t0, rd);
3193 tcg_temp_free_i32(tmp);
3194 tcg_temp_free_i32(rd);
3197 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3199 TCGv_i32 rd, tmp;
3201 rd = tcg_temp_new_i32();
3202 tmp = tcg_temp_new_i32();
3204 tcg_gen_shli_i32(rd, t0, 16);
3205 tcg_gen_andi_i32(tmp, t1, 0xffff);
3206 tcg_gen_or_i32(rd, rd, tmp);
3207 tcg_gen_shri_i32(t1, t1, 16);
3208 tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3209 tcg_gen_or_i32(t1, t1, tmp);
3210 tcg_gen_mov_i32(t0, rd);
3212 tcg_temp_free_i32(tmp);
3213 tcg_temp_free_i32(rd);
3217 /* Translate a NEON load/store element instruction. Return nonzero if the
3218 instruction is invalid. */
3219 static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
3221 int rd, rn, rm;
3222 int nregs;
3223 int stride;
3224 int size;
3225 int reg;
3226 int load;
3227 TCGv_i32 addr;
3228 TCGv_i32 tmp;
3230 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3231 return 1;
3234 /* FIXME: this access check should not take precedence over UNDEF
3235 * for invalid encodings; we will generate incorrect syndrome information
3236 * for attempts to execute invalid vfp/neon encodings with FP disabled.
3238 if (s->fp_excp_el) {
3239 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
3240 syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
3241 return 0;
3244 if (!s->vfp_enabled)
3245 return 1;
3246 VFP_DREG_D(rd, insn);
3247 rn = (insn >> 16) & 0xf;
3248 rm = insn & 0xf;
3249 load = (insn & (1 << 21)) != 0;
3250 if ((insn & (1 << 23)) == 0) {
3251 /* Load store all elements -- handled already by decodetree */
3252 return 1;
3253 } else {
3254 size = (insn >> 10) & 3;
3255 if (size == 3) {
3256 /* Load single element to all lanes -- handled by decodetree */
3257 return 1;
3258 } else {
3259 /* Single element. */
3260 int idx = (insn >> 4) & 0xf;
3261 int reg_idx;
3262 switch (size) {
3263 case 0:
3264 reg_idx = (insn >> 5) & 7;
3265 stride = 1;
3266 break;
3267 case 1:
3268 reg_idx = (insn >> 6) & 3;
3269 stride = (insn & (1 << 5)) ? 2 : 1;
3270 break;
3271 case 2:
3272 reg_idx = (insn >> 7) & 1;
3273 stride = (insn & (1 << 6)) ? 2 : 1;
3274 break;
3275 default:
3276 abort();
3278 nregs = ((insn >> 8) & 3) + 1;
3279 /* Catch the UNDEF cases. This is unavoidably a bit messy. */
3280 switch (nregs) {
3281 case 1:
3282 if (((idx & (1 << size)) != 0) ||
3283 (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) {
3284 return 1;
3286 break;
3287 case 3:
3288 if ((idx & 1) != 0) {
3289 return 1;
3291 /* fall through */
3292 case 2:
3293 if (size == 2 && (idx & 2) != 0) {
3294 return 1;
3296 break;
3297 case 4:
3298 if ((size == 2) && ((idx & 3) == 3)) {
3299 return 1;
3301 break;
3302 default:
3303 abort();
3305 if ((rd + stride * (nregs - 1)) > 31) {
3306 /* Attempts to write off the end of the register file
3307 * are UNPREDICTABLE; we choose to UNDEF because otherwise
3308 * the neon_load_reg() would write off the end of the array.
3310 return 1;
3312 tmp = tcg_temp_new_i32();
3313 addr = tcg_temp_new_i32();
3314 load_reg_var(s, addr, rn);
3315 for (reg = 0; reg < nregs; reg++) {
3316 if (load) {
3317 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
3318 s->be_data | size);
3319 neon_store_element(rd, reg_idx, size, tmp);
3320 } else { /* Store */
3321 neon_load_element(tmp, rd, reg_idx, size);
3322 gen_aa32_st_i32(s, tmp, addr, get_mem_index(s),
3323 s->be_data | size);
3325 rd += stride;
3326 tcg_gen_addi_i32(addr, addr, 1 << size);
3328 tcg_temp_free_i32(addr);
3329 tcg_temp_free_i32(tmp);
3330 stride = nregs * (1 << size);
3333 if (rm != 15) {
3334 TCGv_i32 base;
3336 base = load_reg(s, rn);
3337 if (rm == 13) {
3338 tcg_gen_addi_i32(base, base, stride);
3339 } else {
3340 TCGv_i32 index;
3341 index = load_reg(s, rm);
3342 tcg_gen_add_i32(base, base, index);
3343 tcg_temp_free_i32(index);
3345 store_reg(s, rn, base);
3347 return 0;
3350 static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
3352 switch (size) {
3353 case 0: gen_helper_neon_narrow_u8(dest, src); break;
3354 case 1: gen_helper_neon_narrow_u16(dest, src); break;
3355 case 2: tcg_gen_extrl_i64_i32(dest, src); break;
3356 default: abort();
3360 static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3362 switch (size) {
3363 case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
3364 case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
3365 case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
3366 default: abort();
3370 static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src)
3372 switch (size) {
3373 case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
3374 case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
3375 case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
3376 default: abort();
3380 static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3382 switch (size) {
3383 case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
3384 case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
3385 case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
3386 default: abort();
3390 static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift,
3391 int q, int u)
3393 if (q) {
3394 if (u) {
3395 switch (size) {
3396 case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
3397 case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
3398 default: abort();
3400 } else {
3401 switch (size) {
3402 case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
3403 case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
3404 default: abort();
3407 } else {
3408 if (u) {
3409 switch (size) {
3410 case 1: gen_helper_neon_shl_u16(var, var, shift); break;
3411 case 2: gen_ushl_i32(var, var, shift); break;
3412 default: abort();
3414 } else {
3415 switch (size) {
3416 case 1: gen_helper_neon_shl_s16(var, var, shift); break;
3417 case 2: gen_sshl_i32(var, var, shift); break;
3418 default: abort();
3424 static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
3426 if (u) {
3427 switch (size) {
3428 case 0: gen_helper_neon_widen_u8(dest, src); break;
3429 case 1: gen_helper_neon_widen_u16(dest, src); break;
3430 case 2: tcg_gen_extu_i32_i64(dest, src); break;
3431 default: abort();
3433 } else {
3434 switch (size) {
3435 case 0: gen_helper_neon_widen_s8(dest, src); break;
3436 case 1: gen_helper_neon_widen_s16(dest, src); break;
3437 case 2: tcg_gen_ext_i32_i64(dest, src); break;
3438 default: abort();
3441 tcg_temp_free_i32(src);
3444 static inline void gen_neon_addl(int size)
3446 switch (size) {
3447 case 0: gen_helper_neon_addl_u16(CPU_V001); break;
3448 case 1: gen_helper_neon_addl_u32(CPU_V001); break;
3449 case 2: tcg_gen_add_i64(CPU_V001); break;
3450 default: abort();
3454 static inline void gen_neon_subl(int size)
3456 switch (size) {
3457 case 0: gen_helper_neon_subl_u16(CPU_V001); break;
3458 case 1: gen_helper_neon_subl_u32(CPU_V001); break;
3459 case 2: tcg_gen_sub_i64(CPU_V001); break;
3460 default: abort();
3464 static inline void gen_neon_negl(TCGv_i64 var, int size)
3466 switch (size) {
3467 case 0: gen_helper_neon_negl_u16(var, var); break;
3468 case 1: gen_helper_neon_negl_u32(var, var); break;
3469 case 2:
3470 tcg_gen_neg_i64(var, var);
3471 break;
3472 default: abort();
3476 static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
3478 switch (size) {
3479 case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
3480 case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
3481 default: abort();
3485 static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
3486 int size, int u)
3488 TCGv_i64 tmp;
3490 switch ((size << 1) | u) {
3491 case 0: gen_helper_neon_mull_s8(dest, a, b); break;
3492 case 1: gen_helper_neon_mull_u8(dest, a, b); break;
3493 case 2: gen_helper_neon_mull_s16(dest, a, b); break;
3494 case 3: gen_helper_neon_mull_u16(dest, a, b); break;
3495 case 4:
3496 tmp = gen_muls_i64_i32(a, b);
3497 tcg_gen_mov_i64(dest, tmp);
3498 tcg_temp_free_i64(tmp);
3499 break;
3500 case 5:
3501 tmp = gen_mulu_i64_i32(a, b);
3502 tcg_gen_mov_i64(dest, tmp);
3503 tcg_temp_free_i64(tmp);
3504 break;
3505 default: abort();
3508 /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
3509 Don't forget to clean them now. */
3510 if (size < 2) {
3511 tcg_temp_free_i32(a);
3512 tcg_temp_free_i32(b);
3516 static void gen_neon_narrow_op(int op, int u, int size,
3517 TCGv_i32 dest, TCGv_i64 src)
3519 if (op) {
3520 if (u) {
3521 gen_neon_unarrow_sats(size, dest, src);
3522 } else {
3523 gen_neon_narrow(size, dest, src);
3525 } else {
3526 if (u) {
3527 gen_neon_narrow_satu(size, dest, src);
3528 } else {
3529 gen_neon_narrow_sats(size, dest, src);
3534 /* Symbolic constants for op fields for Neon 3-register same-length.
3535 * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
3536 * table A7-9.
3538 #define NEON_3R_VHADD 0
3539 #define NEON_3R_VQADD 1
3540 #define NEON_3R_VRHADD 2
3541 #define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
3542 #define NEON_3R_VHSUB 4
3543 #define NEON_3R_VQSUB 5
3544 #define NEON_3R_VCGT 6
3545 #define NEON_3R_VCGE 7
3546 #define NEON_3R_VSHL 8
3547 #define NEON_3R_VQSHL 9
3548 #define NEON_3R_VRSHL 10
3549 #define NEON_3R_VQRSHL 11
3550 #define NEON_3R_VMAX 12
3551 #define NEON_3R_VMIN 13
3552 #define NEON_3R_VABD 14
3553 #define NEON_3R_VABA 15
3554 #define NEON_3R_VADD_VSUB 16
3555 #define NEON_3R_VTST_VCEQ 17
3556 #define NEON_3R_VML 18 /* VMLA, VMLS */
3557 #define NEON_3R_VMUL 19
3558 #define NEON_3R_VPMAX 20
3559 #define NEON_3R_VPMIN 21
3560 #define NEON_3R_VQDMULH_VQRDMULH 22
3561 #define NEON_3R_VPADD_VQRDMLAH 23
3562 #define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
3563 #define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */
3564 #define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
3565 #define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
3566 #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
3567 #define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
3568 #define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
3569 #define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
3571 static const uint8_t neon_3r_sizes[] = {
3572 [NEON_3R_VHADD] = 0x7,
3573 [NEON_3R_VQADD] = 0xf,
3574 [NEON_3R_VRHADD] = 0x7,
3575 [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
3576 [NEON_3R_VHSUB] = 0x7,
3577 [NEON_3R_VQSUB] = 0xf,
3578 [NEON_3R_VCGT] = 0x7,
3579 [NEON_3R_VCGE] = 0x7,
3580 [NEON_3R_VSHL] = 0xf,
3581 [NEON_3R_VQSHL] = 0xf,
3582 [NEON_3R_VRSHL] = 0xf,
3583 [NEON_3R_VQRSHL] = 0xf,
3584 [NEON_3R_VMAX] = 0x7,
3585 [NEON_3R_VMIN] = 0x7,
3586 [NEON_3R_VABD] = 0x7,
3587 [NEON_3R_VABA] = 0x7,
3588 [NEON_3R_VADD_VSUB] = 0xf,
3589 [NEON_3R_VTST_VCEQ] = 0x7,
3590 [NEON_3R_VML] = 0x7,
3591 [NEON_3R_VMUL] = 0x7,
3592 [NEON_3R_VPMAX] = 0x7,
3593 [NEON_3R_VPMIN] = 0x7,
3594 [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
3595 [NEON_3R_VPADD_VQRDMLAH] = 0x7,
3596 [NEON_3R_SHA] = 0xf, /* size field encodes op type */
3597 [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
3598 [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
3599 [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
3600 [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
3601 [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
3602 [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
3603 [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
3606 /* Symbolic constants for op fields for Neon 2-register miscellaneous.
3607 * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
3608 * table A7-13.
3610 #define NEON_2RM_VREV64 0
3611 #define NEON_2RM_VREV32 1
3612 #define NEON_2RM_VREV16 2
3613 #define NEON_2RM_VPADDL 4
3614 #define NEON_2RM_VPADDL_U 5
3615 #define NEON_2RM_AESE 6 /* Includes AESD */
3616 #define NEON_2RM_AESMC 7 /* Includes AESIMC */
3617 #define NEON_2RM_VCLS 8
3618 #define NEON_2RM_VCLZ 9
3619 #define NEON_2RM_VCNT 10
3620 #define NEON_2RM_VMVN 11
3621 #define NEON_2RM_VPADAL 12
3622 #define NEON_2RM_VPADAL_U 13
3623 #define NEON_2RM_VQABS 14
3624 #define NEON_2RM_VQNEG 15
3625 #define NEON_2RM_VCGT0 16
3626 #define NEON_2RM_VCGE0 17
3627 #define NEON_2RM_VCEQ0 18
3628 #define NEON_2RM_VCLE0 19
3629 #define NEON_2RM_VCLT0 20
3630 #define NEON_2RM_SHA1H 21
3631 #define NEON_2RM_VABS 22
3632 #define NEON_2RM_VNEG 23
3633 #define NEON_2RM_VCGT0_F 24
3634 #define NEON_2RM_VCGE0_F 25
3635 #define NEON_2RM_VCEQ0_F 26
3636 #define NEON_2RM_VCLE0_F 27
3637 #define NEON_2RM_VCLT0_F 28
3638 #define NEON_2RM_VABS_F 30
3639 #define NEON_2RM_VNEG_F 31
3640 #define NEON_2RM_VSWP 32
3641 #define NEON_2RM_VTRN 33
3642 #define NEON_2RM_VUZP 34
3643 #define NEON_2RM_VZIP 35
3644 #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
3645 #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
3646 #define NEON_2RM_VSHLL 38
3647 #define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
3648 #define NEON_2RM_VRINTN 40
3649 #define NEON_2RM_VRINTX 41
3650 #define NEON_2RM_VRINTA 42
3651 #define NEON_2RM_VRINTZ 43
3652 #define NEON_2RM_VCVT_F16_F32 44
3653 #define NEON_2RM_VRINTM 45
3654 #define NEON_2RM_VCVT_F32_F16 46
3655 #define NEON_2RM_VRINTP 47
3656 #define NEON_2RM_VCVTAU 48
3657 #define NEON_2RM_VCVTAS 49
3658 #define NEON_2RM_VCVTNU 50
3659 #define NEON_2RM_VCVTNS 51
3660 #define NEON_2RM_VCVTPU 52
3661 #define NEON_2RM_VCVTPS 53
3662 #define NEON_2RM_VCVTMU 54
3663 #define NEON_2RM_VCVTMS 55
3664 #define NEON_2RM_VRECPE 56
3665 #define NEON_2RM_VRSQRTE 57
3666 #define NEON_2RM_VRECPE_F 58
3667 #define NEON_2RM_VRSQRTE_F 59
3668 #define NEON_2RM_VCVT_FS 60
3669 #define NEON_2RM_VCVT_FU 61
3670 #define NEON_2RM_VCVT_SF 62
3671 #define NEON_2RM_VCVT_UF 63
3673 static bool neon_2rm_is_v8_op(int op)
3675 /* Return true if this neon 2reg-misc op is ARMv8 and up */
3676 switch (op) {
3677 case NEON_2RM_VRINTN:
3678 case NEON_2RM_VRINTA:
3679 case NEON_2RM_VRINTM:
3680 case NEON_2RM_VRINTP:
3681 case NEON_2RM_VRINTZ:
3682 case NEON_2RM_VRINTX:
3683 case NEON_2RM_VCVTAU:
3684 case NEON_2RM_VCVTAS:
3685 case NEON_2RM_VCVTNU:
3686 case NEON_2RM_VCVTNS:
3687 case NEON_2RM_VCVTPU:
3688 case NEON_2RM_VCVTPS:
3689 case NEON_2RM_VCVTMU:
3690 case NEON_2RM_VCVTMS:
3691 return true;
3692 default:
3693 return false;
3697 /* Each entry in this array has bit n set if the insn allows
3698 * size value n (otherwise it will UNDEF). Since unallocated
3699 * op values will have no bits set they always UNDEF.
3701 static const uint8_t neon_2rm_sizes[] = {
3702 [NEON_2RM_VREV64] = 0x7,
3703 [NEON_2RM_VREV32] = 0x3,
3704 [NEON_2RM_VREV16] = 0x1,
3705 [NEON_2RM_VPADDL] = 0x7,
3706 [NEON_2RM_VPADDL_U] = 0x7,
3707 [NEON_2RM_AESE] = 0x1,
3708 [NEON_2RM_AESMC] = 0x1,
3709 [NEON_2RM_VCLS] = 0x7,
3710 [NEON_2RM_VCLZ] = 0x7,
3711 [NEON_2RM_VCNT] = 0x1,
3712 [NEON_2RM_VMVN] = 0x1,
3713 [NEON_2RM_VPADAL] = 0x7,
3714 [NEON_2RM_VPADAL_U] = 0x7,
3715 [NEON_2RM_VQABS] = 0x7,
3716 [NEON_2RM_VQNEG] = 0x7,
3717 [NEON_2RM_VCGT0] = 0x7,
3718 [NEON_2RM_VCGE0] = 0x7,
3719 [NEON_2RM_VCEQ0] = 0x7,
3720 [NEON_2RM_VCLE0] = 0x7,
3721 [NEON_2RM_VCLT0] = 0x7,
3722 [NEON_2RM_SHA1H] = 0x4,
3723 [NEON_2RM_VABS] = 0x7,
3724 [NEON_2RM_VNEG] = 0x7,
3725 [NEON_2RM_VCGT0_F] = 0x4,
3726 [NEON_2RM_VCGE0_F] = 0x4,
3727 [NEON_2RM_VCEQ0_F] = 0x4,
3728 [NEON_2RM_VCLE0_F] = 0x4,
3729 [NEON_2RM_VCLT0_F] = 0x4,
3730 [NEON_2RM_VABS_F] = 0x4,
3731 [NEON_2RM_VNEG_F] = 0x4,
3732 [NEON_2RM_VSWP] = 0x1,
3733 [NEON_2RM_VTRN] = 0x7,
3734 [NEON_2RM_VUZP] = 0x7,
3735 [NEON_2RM_VZIP] = 0x7,
3736 [NEON_2RM_VMOVN] = 0x7,
3737 [NEON_2RM_VQMOVN] = 0x7,
3738 [NEON_2RM_VSHLL] = 0x7,
3739 [NEON_2RM_SHA1SU1] = 0x4,
3740 [NEON_2RM_VRINTN] = 0x4,
3741 [NEON_2RM_VRINTX] = 0x4,
3742 [NEON_2RM_VRINTA] = 0x4,
3743 [NEON_2RM_VRINTZ] = 0x4,
3744 [NEON_2RM_VCVT_F16_F32] = 0x2,
3745 [NEON_2RM_VRINTM] = 0x4,
3746 [NEON_2RM_VCVT_F32_F16] = 0x2,
3747 [NEON_2RM_VRINTP] = 0x4,
3748 [NEON_2RM_VCVTAU] = 0x4,
3749 [NEON_2RM_VCVTAS] = 0x4,
3750 [NEON_2RM_VCVTNU] = 0x4,
3751 [NEON_2RM_VCVTNS] = 0x4,
3752 [NEON_2RM_VCVTPU] = 0x4,
3753 [NEON_2RM_VCVTPS] = 0x4,
3754 [NEON_2RM_VCVTMU] = 0x4,
3755 [NEON_2RM_VCVTMS] = 0x4,
3756 [NEON_2RM_VRECPE] = 0x4,
3757 [NEON_2RM_VRSQRTE] = 0x4,
3758 [NEON_2RM_VRECPE_F] = 0x4,
3759 [NEON_2RM_VRSQRTE_F] = 0x4,
3760 [NEON_2RM_VCVT_FS] = 0x4,
3761 [NEON_2RM_VCVT_FU] = 0x4,
3762 [NEON_2RM_VCVT_SF] = 0x4,
3763 [NEON_2RM_VCVT_UF] = 0x4,
3767 /* Expand v8.1 simd helper. */
3768 static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn,
3769 int q, int rd, int rn, int rm)
3771 if (dc_isar_feature(aa32_rdm, s)) {
3772 int opr_sz = (1 + q) * 8;
3773 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
3774 vfp_reg_offset(1, rn),
3775 vfp_reg_offset(1, rm), cpu_env,
3776 opr_sz, opr_sz, 0, fn);
3777 return 0;
3779 return 1;
3782 static void gen_ceq0_i32(TCGv_i32 d, TCGv_i32 a)
3784 tcg_gen_setcondi_i32(TCG_COND_EQ, d, a, 0);
3785 tcg_gen_neg_i32(d, d);
3788 static void gen_ceq0_i64(TCGv_i64 d, TCGv_i64 a)
3790 tcg_gen_setcondi_i64(TCG_COND_EQ, d, a, 0);
3791 tcg_gen_neg_i64(d, d);
3794 static void gen_ceq0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
3796 TCGv_vec zero = tcg_const_zeros_vec_matching(d);
3797 tcg_gen_cmp_vec(TCG_COND_EQ, vece, d, a, zero);
3798 tcg_temp_free_vec(zero);
3801 static const TCGOpcode vecop_list_cmp[] = {
3802 INDEX_op_cmp_vec, 0
3805 const GVecGen2 ceq0_op[4] = {
3806 { .fno = gen_helper_gvec_ceq0_b,
3807 .fniv = gen_ceq0_vec,
3808 .opt_opc = vecop_list_cmp,
3809 .vece = MO_8 },
3810 { .fno = gen_helper_gvec_ceq0_h,
3811 .fniv = gen_ceq0_vec,
3812 .opt_opc = vecop_list_cmp,
3813 .vece = MO_16 },
3814 { .fni4 = gen_ceq0_i32,
3815 .fniv = gen_ceq0_vec,
3816 .opt_opc = vecop_list_cmp,
3817 .vece = MO_32 },
3818 { .fni8 = gen_ceq0_i64,
3819 .fniv = gen_ceq0_vec,
3820 .opt_opc = vecop_list_cmp,
3821 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3822 .vece = MO_64 },
3825 static void gen_cle0_i32(TCGv_i32 d, TCGv_i32 a)
3827 tcg_gen_setcondi_i32(TCG_COND_LE, d, a, 0);
3828 tcg_gen_neg_i32(d, d);
3831 static void gen_cle0_i64(TCGv_i64 d, TCGv_i64 a)
3833 tcg_gen_setcondi_i64(TCG_COND_LE, d, a, 0);
3834 tcg_gen_neg_i64(d, d);
3837 static void gen_cle0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
3839 TCGv_vec zero = tcg_const_zeros_vec_matching(d);
3840 tcg_gen_cmp_vec(TCG_COND_LE, vece, d, a, zero);
3841 tcg_temp_free_vec(zero);
3844 const GVecGen2 cle0_op[4] = {
3845 { .fno = gen_helper_gvec_cle0_b,
3846 .fniv = gen_cle0_vec,
3847 .opt_opc = vecop_list_cmp,
3848 .vece = MO_8 },
3849 { .fno = gen_helper_gvec_cle0_h,
3850 .fniv = gen_cle0_vec,
3851 .opt_opc = vecop_list_cmp,
3852 .vece = MO_16 },
3853 { .fni4 = gen_cle0_i32,
3854 .fniv = gen_cle0_vec,
3855 .opt_opc = vecop_list_cmp,
3856 .vece = MO_32 },
3857 { .fni8 = gen_cle0_i64,
3858 .fniv = gen_cle0_vec,
3859 .opt_opc = vecop_list_cmp,
3860 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3861 .vece = MO_64 },
3864 static void gen_cge0_i32(TCGv_i32 d, TCGv_i32 a)
3866 tcg_gen_setcondi_i32(TCG_COND_GE, d, a, 0);
3867 tcg_gen_neg_i32(d, d);
3870 static void gen_cge0_i64(TCGv_i64 d, TCGv_i64 a)
3872 tcg_gen_setcondi_i64(TCG_COND_GE, d, a, 0);
3873 tcg_gen_neg_i64(d, d);
3876 static void gen_cge0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
3878 TCGv_vec zero = tcg_const_zeros_vec_matching(d);
3879 tcg_gen_cmp_vec(TCG_COND_GE, vece, d, a, zero);
3880 tcg_temp_free_vec(zero);
3883 const GVecGen2 cge0_op[4] = {
3884 { .fno = gen_helper_gvec_cge0_b,
3885 .fniv = gen_cge0_vec,
3886 .opt_opc = vecop_list_cmp,
3887 .vece = MO_8 },
3888 { .fno = gen_helper_gvec_cge0_h,
3889 .fniv = gen_cge0_vec,
3890 .opt_opc = vecop_list_cmp,
3891 .vece = MO_16 },
3892 { .fni4 = gen_cge0_i32,
3893 .fniv = gen_cge0_vec,
3894 .opt_opc = vecop_list_cmp,
3895 .vece = MO_32 },
3896 { .fni8 = gen_cge0_i64,
3897 .fniv = gen_cge0_vec,
3898 .opt_opc = vecop_list_cmp,
3899 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3900 .vece = MO_64 },
3903 static void gen_clt0_i32(TCGv_i32 d, TCGv_i32 a)
3905 tcg_gen_setcondi_i32(TCG_COND_LT, d, a, 0);
3906 tcg_gen_neg_i32(d, d);
3909 static void gen_clt0_i64(TCGv_i64 d, TCGv_i64 a)
3911 tcg_gen_setcondi_i64(TCG_COND_LT, d, a, 0);
3912 tcg_gen_neg_i64(d, d);
3915 static void gen_clt0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
3917 TCGv_vec zero = tcg_const_zeros_vec_matching(d);
3918 tcg_gen_cmp_vec(TCG_COND_LT, vece, d, a, zero);
3919 tcg_temp_free_vec(zero);
3922 const GVecGen2 clt0_op[4] = {
3923 { .fno = gen_helper_gvec_clt0_b,
3924 .fniv = gen_clt0_vec,
3925 .opt_opc = vecop_list_cmp,
3926 .vece = MO_8 },
3927 { .fno = gen_helper_gvec_clt0_h,
3928 .fniv = gen_clt0_vec,
3929 .opt_opc = vecop_list_cmp,
3930 .vece = MO_16 },
3931 { .fni4 = gen_clt0_i32,
3932 .fniv = gen_clt0_vec,
3933 .opt_opc = vecop_list_cmp,
3934 .vece = MO_32 },
3935 { .fni8 = gen_clt0_i64,
3936 .fniv = gen_clt0_vec,
3937 .opt_opc = vecop_list_cmp,
3938 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3939 .vece = MO_64 },
3942 static void gen_cgt0_i32(TCGv_i32 d, TCGv_i32 a)
3944 tcg_gen_setcondi_i32(TCG_COND_GT, d, a, 0);
3945 tcg_gen_neg_i32(d, d);
3948 static void gen_cgt0_i64(TCGv_i64 d, TCGv_i64 a)
3950 tcg_gen_setcondi_i64(TCG_COND_GT, d, a, 0);
3951 tcg_gen_neg_i64(d, d);
3954 static void gen_cgt0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
3956 TCGv_vec zero = tcg_const_zeros_vec_matching(d);
3957 tcg_gen_cmp_vec(TCG_COND_GT, vece, d, a, zero);
3958 tcg_temp_free_vec(zero);
3961 const GVecGen2 cgt0_op[4] = {
3962 { .fno = gen_helper_gvec_cgt0_b,
3963 .fniv = gen_cgt0_vec,
3964 .opt_opc = vecop_list_cmp,
3965 .vece = MO_8 },
3966 { .fno = gen_helper_gvec_cgt0_h,
3967 .fniv = gen_cgt0_vec,
3968 .opt_opc = vecop_list_cmp,
3969 .vece = MO_16 },
3970 { .fni4 = gen_cgt0_i32,
3971 .fniv = gen_cgt0_vec,
3972 .opt_opc = vecop_list_cmp,
3973 .vece = MO_32 },
3974 { .fni8 = gen_cgt0_i64,
3975 .fniv = gen_cgt0_vec,
3976 .opt_opc = vecop_list_cmp,
3977 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3978 .vece = MO_64 },
3981 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3983 tcg_gen_vec_sar8i_i64(a, a, shift);
3984 tcg_gen_vec_add8_i64(d, d, a);
3987 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3989 tcg_gen_vec_sar16i_i64(a, a, shift);
3990 tcg_gen_vec_add16_i64(d, d, a);
3993 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3995 tcg_gen_sari_i32(a, a, shift);
3996 tcg_gen_add_i32(d, d, a);
3999 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4001 tcg_gen_sari_i64(a, a, shift);
4002 tcg_gen_add_i64(d, d, a);
4005 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4007 tcg_gen_sari_vec(vece, a, a, sh);
4008 tcg_gen_add_vec(vece, d, d, a);
4011 static const TCGOpcode vecop_list_ssra[] = {
4012 INDEX_op_sari_vec, INDEX_op_add_vec, 0
4015 const GVecGen2i ssra_op[4] = {
4016 { .fni8 = gen_ssra8_i64,
4017 .fniv = gen_ssra_vec,
4018 .load_dest = true,
4019 .opt_opc = vecop_list_ssra,
4020 .vece = MO_8 },
4021 { .fni8 = gen_ssra16_i64,
4022 .fniv = gen_ssra_vec,
4023 .load_dest = true,
4024 .opt_opc = vecop_list_ssra,
4025 .vece = MO_16 },
4026 { .fni4 = gen_ssra32_i32,
4027 .fniv = gen_ssra_vec,
4028 .load_dest = true,
4029 .opt_opc = vecop_list_ssra,
4030 .vece = MO_32 },
4031 { .fni8 = gen_ssra64_i64,
4032 .fniv = gen_ssra_vec,
4033 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4034 .opt_opc = vecop_list_ssra,
4035 .load_dest = true,
4036 .vece = MO_64 },
4039 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4041 tcg_gen_vec_shr8i_i64(a, a, shift);
4042 tcg_gen_vec_add8_i64(d, d, a);
4045 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4047 tcg_gen_vec_shr16i_i64(a, a, shift);
4048 tcg_gen_vec_add16_i64(d, d, a);
4051 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4053 tcg_gen_shri_i32(a, a, shift);
4054 tcg_gen_add_i32(d, d, a);
4057 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4059 tcg_gen_shri_i64(a, a, shift);
4060 tcg_gen_add_i64(d, d, a);
4063 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4065 tcg_gen_shri_vec(vece, a, a, sh);
4066 tcg_gen_add_vec(vece, d, d, a);
4069 static const TCGOpcode vecop_list_usra[] = {
4070 INDEX_op_shri_vec, INDEX_op_add_vec, 0
4073 const GVecGen2i usra_op[4] = {
4074 { .fni8 = gen_usra8_i64,
4075 .fniv = gen_usra_vec,
4076 .load_dest = true,
4077 .opt_opc = vecop_list_usra,
4078 .vece = MO_8, },
4079 { .fni8 = gen_usra16_i64,
4080 .fniv = gen_usra_vec,
4081 .load_dest = true,
4082 .opt_opc = vecop_list_usra,
4083 .vece = MO_16, },
4084 { .fni4 = gen_usra32_i32,
4085 .fniv = gen_usra_vec,
4086 .load_dest = true,
4087 .opt_opc = vecop_list_usra,
4088 .vece = MO_32, },
4089 { .fni8 = gen_usra64_i64,
4090 .fniv = gen_usra_vec,
4091 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4092 .load_dest = true,
4093 .opt_opc = vecop_list_usra,
4094 .vece = MO_64, },
4097 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4099 uint64_t mask = dup_const(MO_8, 0xff >> shift);
4100 TCGv_i64 t = tcg_temp_new_i64();
4102 tcg_gen_shri_i64(t, a, shift);
4103 tcg_gen_andi_i64(t, t, mask);
4104 tcg_gen_andi_i64(d, d, ~mask);
4105 tcg_gen_or_i64(d, d, t);
4106 tcg_temp_free_i64(t);
4109 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4111 uint64_t mask = dup_const(MO_16, 0xffff >> shift);
4112 TCGv_i64 t = tcg_temp_new_i64();
4114 tcg_gen_shri_i64(t, a, shift);
4115 tcg_gen_andi_i64(t, t, mask);
4116 tcg_gen_andi_i64(d, d, ~mask);
4117 tcg_gen_or_i64(d, d, t);
4118 tcg_temp_free_i64(t);
4121 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4123 tcg_gen_shri_i32(a, a, shift);
4124 tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
4127 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4129 tcg_gen_shri_i64(a, a, shift);
4130 tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
4133 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4135 if (sh == 0) {
4136 tcg_gen_mov_vec(d, a);
4137 } else {
4138 TCGv_vec t = tcg_temp_new_vec_matching(d);
4139 TCGv_vec m = tcg_temp_new_vec_matching(d);
4141 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
4142 tcg_gen_shri_vec(vece, t, a, sh);
4143 tcg_gen_and_vec(vece, d, d, m);
4144 tcg_gen_or_vec(vece, d, d, t);
4146 tcg_temp_free_vec(t);
4147 tcg_temp_free_vec(m);
4151 static const TCGOpcode vecop_list_sri[] = { INDEX_op_shri_vec, 0 };
4153 const GVecGen2i sri_op[4] = {
4154 { .fni8 = gen_shr8_ins_i64,
4155 .fniv = gen_shr_ins_vec,
4156 .load_dest = true,
4157 .opt_opc = vecop_list_sri,
4158 .vece = MO_8 },
4159 { .fni8 = gen_shr16_ins_i64,
4160 .fniv = gen_shr_ins_vec,
4161 .load_dest = true,
4162 .opt_opc = vecop_list_sri,
4163 .vece = MO_16 },
4164 { .fni4 = gen_shr32_ins_i32,
4165 .fniv = gen_shr_ins_vec,
4166 .load_dest = true,
4167 .opt_opc = vecop_list_sri,
4168 .vece = MO_32 },
4169 { .fni8 = gen_shr64_ins_i64,
4170 .fniv = gen_shr_ins_vec,
4171 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4172 .load_dest = true,
4173 .opt_opc = vecop_list_sri,
4174 .vece = MO_64 },
4177 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4179 uint64_t mask = dup_const(MO_8, 0xff << shift);
4180 TCGv_i64 t = tcg_temp_new_i64();
4182 tcg_gen_shli_i64(t, a, shift);
4183 tcg_gen_andi_i64(t, t, mask);
4184 tcg_gen_andi_i64(d, d, ~mask);
4185 tcg_gen_or_i64(d, d, t);
4186 tcg_temp_free_i64(t);
4189 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4191 uint64_t mask = dup_const(MO_16, 0xffff << shift);
4192 TCGv_i64 t = tcg_temp_new_i64();
4194 tcg_gen_shli_i64(t, a, shift);
4195 tcg_gen_andi_i64(t, t, mask);
4196 tcg_gen_andi_i64(d, d, ~mask);
4197 tcg_gen_or_i64(d, d, t);
4198 tcg_temp_free_i64(t);
4201 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4203 tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
4206 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4208 tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
4211 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4213 if (sh == 0) {
4214 tcg_gen_mov_vec(d, a);
4215 } else {
4216 TCGv_vec t = tcg_temp_new_vec_matching(d);
4217 TCGv_vec m = tcg_temp_new_vec_matching(d);
4219 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
4220 tcg_gen_shli_vec(vece, t, a, sh);
4221 tcg_gen_and_vec(vece, d, d, m);
4222 tcg_gen_or_vec(vece, d, d, t);
4224 tcg_temp_free_vec(t);
4225 tcg_temp_free_vec(m);
4229 static const TCGOpcode vecop_list_sli[] = { INDEX_op_shli_vec, 0 };
4231 const GVecGen2i sli_op[4] = {
4232 { .fni8 = gen_shl8_ins_i64,
4233 .fniv = gen_shl_ins_vec,
4234 .load_dest = true,
4235 .opt_opc = vecop_list_sli,
4236 .vece = MO_8 },
4237 { .fni8 = gen_shl16_ins_i64,
4238 .fniv = gen_shl_ins_vec,
4239 .load_dest = true,
4240 .opt_opc = vecop_list_sli,
4241 .vece = MO_16 },
4242 { .fni4 = gen_shl32_ins_i32,
4243 .fniv = gen_shl_ins_vec,
4244 .load_dest = true,
4245 .opt_opc = vecop_list_sli,
4246 .vece = MO_32 },
4247 { .fni8 = gen_shl64_ins_i64,
4248 .fniv = gen_shl_ins_vec,
4249 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4250 .load_dest = true,
4251 .opt_opc = vecop_list_sli,
4252 .vece = MO_64 },
4255 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4257 gen_helper_neon_mul_u8(a, a, b);
4258 gen_helper_neon_add_u8(d, d, a);
4261 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4263 gen_helper_neon_mul_u8(a, a, b);
4264 gen_helper_neon_sub_u8(d, d, a);
4267 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4269 gen_helper_neon_mul_u16(a, a, b);
4270 gen_helper_neon_add_u16(d, d, a);
4273 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4275 gen_helper_neon_mul_u16(a, a, b);
4276 gen_helper_neon_sub_u16(d, d, a);
4279 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4281 tcg_gen_mul_i32(a, a, b);
4282 tcg_gen_add_i32(d, d, a);
4285 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4287 tcg_gen_mul_i32(a, a, b);
4288 tcg_gen_sub_i32(d, d, a);
4291 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4293 tcg_gen_mul_i64(a, a, b);
4294 tcg_gen_add_i64(d, d, a);
4297 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4299 tcg_gen_mul_i64(a, a, b);
4300 tcg_gen_sub_i64(d, d, a);
4303 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4305 tcg_gen_mul_vec(vece, a, a, b);
4306 tcg_gen_add_vec(vece, d, d, a);
4309 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4311 tcg_gen_mul_vec(vece, a, a, b);
4312 tcg_gen_sub_vec(vece, d, d, a);
4315 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
4316 * these tables are shared with AArch64 which does support them.
4319 static const TCGOpcode vecop_list_mla[] = {
4320 INDEX_op_mul_vec, INDEX_op_add_vec, 0
4323 static const TCGOpcode vecop_list_mls[] = {
4324 INDEX_op_mul_vec, INDEX_op_sub_vec, 0
4327 const GVecGen3 mla_op[4] = {
4328 { .fni4 = gen_mla8_i32,
4329 .fniv = gen_mla_vec,
4330 .load_dest = true,
4331 .opt_opc = vecop_list_mla,
4332 .vece = MO_8 },
4333 { .fni4 = gen_mla16_i32,
4334 .fniv = gen_mla_vec,
4335 .load_dest = true,
4336 .opt_opc = vecop_list_mla,
4337 .vece = MO_16 },
4338 { .fni4 = gen_mla32_i32,
4339 .fniv = gen_mla_vec,
4340 .load_dest = true,
4341 .opt_opc = vecop_list_mla,
4342 .vece = MO_32 },
4343 { .fni8 = gen_mla64_i64,
4344 .fniv = gen_mla_vec,
4345 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4346 .load_dest = true,
4347 .opt_opc = vecop_list_mla,
4348 .vece = MO_64 },
4351 const GVecGen3 mls_op[4] = {
4352 { .fni4 = gen_mls8_i32,
4353 .fniv = gen_mls_vec,
4354 .load_dest = true,
4355 .opt_opc = vecop_list_mls,
4356 .vece = MO_8 },
4357 { .fni4 = gen_mls16_i32,
4358 .fniv = gen_mls_vec,
4359 .load_dest = true,
4360 .opt_opc = vecop_list_mls,
4361 .vece = MO_16 },
4362 { .fni4 = gen_mls32_i32,
4363 .fniv = gen_mls_vec,
4364 .load_dest = true,
4365 .opt_opc = vecop_list_mls,
4366 .vece = MO_32 },
4367 { .fni8 = gen_mls64_i64,
4368 .fniv = gen_mls_vec,
4369 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4370 .load_dest = true,
4371 .opt_opc = vecop_list_mls,
4372 .vece = MO_64 },
4375 /* CMTST : test is "if (X & Y != 0)". */
4376 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4378 tcg_gen_and_i32(d, a, b);
4379 tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
4380 tcg_gen_neg_i32(d, d);
4383 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4385 tcg_gen_and_i64(d, a, b);
4386 tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
4387 tcg_gen_neg_i64(d, d);
4390 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4392 tcg_gen_and_vec(vece, d, a, b);
4393 tcg_gen_dupi_vec(vece, a, 0);
4394 tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
4397 static const TCGOpcode vecop_list_cmtst[] = { INDEX_op_cmp_vec, 0 };
4399 const GVecGen3 cmtst_op[4] = {
4400 { .fni4 = gen_helper_neon_tst_u8,
4401 .fniv = gen_cmtst_vec,
4402 .opt_opc = vecop_list_cmtst,
4403 .vece = MO_8 },
4404 { .fni4 = gen_helper_neon_tst_u16,
4405 .fniv = gen_cmtst_vec,
4406 .opt_opc = vecop_list_cmtst,
4407 .vece = MO_16 },
4408 { .fni4 = gen_cmtst_i32,
4409 .fniv = gen_cmtst_vec,
4410 .opt_opc = vecop_list_cmtst,
4411 .vece = MO_32 },
4412 { .fni8 = gen_cmtst_i64,
4413 .fniv = gen_cmtst_vec,
4414 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4415 .opt_opc = vecop_list_cmtst,
4416 .vece = MO_64 },
4419 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4421 TCGv_i32 lval = tcg_temp_new_i32();
4422 TCGv_i32 rval = tcg_temp_new_i32();
4423 TCGv_i32 lsh = tcg_temp_new_i32();
4424 TCGv_i32 rsh = tcg_temp_new_i32();
4425 TCGv_i32 zero = tcg_const_i32(0);
4426 TCGv_i32 max = tcg_const_i32(32);
4429 * Rely on the TCG guarantee that out of range shifts produce
4430 * unspecified results, not undefined behaviour (i.e. no trap).
4431 * Discard out-of-range results after the fact.
4433 tcg_gen_ext8s_i32(lsh, shift);
4434 tcg_gen_neg_i32(rsh, lsh);
4435 tcg_gen_shl_i32(lval, src, lsh);
4436 tcg_gen_shr_i32(rval, src, rsh);
4437 tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
4438 tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
4440 tcg_temp_free_i32(lval);
4441 tcg_temp_free_i32(rval);
4442 tcg_temp_free_i32(lsh);
4443 tcg_temp_free_i32(rsh);
4444 tcg_temp_free_i32(zero);
4445 tcg_temp_free_i32(max);
4448 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4450 TCGv_i64 lval = tcg_temp_new_i64();
4451 TCGv_i64 rval = tcg_temp_new_i64();
4452 TCGv_i64 lsh = tcg_temp_new_i64();
4453 TCGv_i64 rsh = tcg_temp_new_i64();
4454 TCGv_i64 zero = tcg_const_i64(0);
4455 TCGv_i64 max = tcg_const_i64(64);
4458 * Rely on the TCG guarantee that out of range shifts produce
4459 * unspecified results, not undefined behaviour (i.e. no trap).
4460 * Discard out-of-range results after the fact.
4462 tcg_gen_ext8s_i64(lsh, shift);
4463 tcg_gen_neg_i64(rsh, lsh);
4464 tcg_gen_shl_i64(lval, src, lsh);
4465 tcg_gen_shr_i64(rval, src, rsh);
4466 tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
4467 tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
4469 tcg_temp_free_i64(lval);
4470 tcg_temp_free_i64(rval);
4471 tcg_temp_free_i64(lsh);
4472 tcg_temp_free_i64(rsh);
4473 tcg_temp_free_i64(zero);
4474 tcg_temp_free_i64(max);
4477 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
4478 TCGv_vec src, TCGv_vec shift)
4480 TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4481 TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4482 TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4483 TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4484 TCGv_vec msk, max;
4486 tcg_gen_neg_vec(vece, rsh, shift);
4487 if (vece == MO_8) {
4488 tcg_gen_mov_vec(lsh, shift);
4489 } else {
4490 msk = tcg_temp_new_vec_matching(dst);
4491 tcg_gen_dupi_vec(vece, msk, 0xff);
4492 tcg_gen_and_vec(vece, lsh, shift, msk);
4493 tcg_gen_and_vec(vece, rsh, rsh, msk);
4494 tcg_temp_free_vec(msk);
4498 * Rely on the TCG guarantee that out of range shifts produce
4499 * unspecified results, not undefined behaviour (i.e. no trap).
4500 * Discard out-of-range results after the fact.
4502 tcg_gen_shlv_vec(vece, lval, src, lsh);
4503 tcg_gen_shrv_vec(vece, rval, src, rsh);
4505 max = tcg_temp_new_vec_matching(dst);
4506 tcg_gen_dupi_vec(vece, max, 8 << vece);
4509 * The choice of LT (signed) and GEU (unsigned) are biased toward
4510 * the instructions of the x86_64 host. For MO_8, the whole byte
4511 * is significant so we must use an unsigned compare; otherwise we
4512 * have already masked to a byte and so a signed compare works.
4513 * Other tcg hosts have a full set of comparisons and do not care.
4515 if (vece == MO_8) {
4516 tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
4517 tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
4518 tcg_gen_andc_vec(vece, lval, lval, lsh);
4519 tcg_gen_andc_vec(vece, rval, rval, rsh);
4520 } else {
4521 tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
4522 tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
4523 tcg_gen_and_vec(vece, lval, lval, lsh);
4524 tcg_gen_and_vec(vece, rval, rval, rsh);
4526 tcg_gen_or_vec(vece, dst, lval, rval);
4528 tcg_temp_free_vec(max);
4529 tcg_temp_free_vec(lval);
4530 tcg_temp_free_vec(rval);
4531 tcg_temp_free_vec(lsh);
4532 tcg_temp_free_vec(rsh);
4535 static const TCGOpcode ushl_list[] = {
4536 INDEX_op_neg_vec, INDEX_op_shlv_vec,
4537 INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4540 const GVecGen3 ushl_op[4] = {
4541 { .fniv = gen_ushl_vec,
4542 .fno = gen_helper_gvec_ushl_b,
4543 .opt_opc = ushl_list,
4544 .vece = MO_8 },
4545 { .fniv = gen_ushl_vec,
4546 .fno = gen_helper_gvec_ushl_h,
4547 .opt_opc = ushl_list,
4548 .vece = MO_16 },
4549 { .fni4 = gen_ushl_i32,
4550 .fniv = gen_ushl_vec,
4551 .opt_opc = ushl_list,
4552 .vece = MO_32 },
4553 { .fni8 = gen_ushl_i64,
4554 .fniv = gen_ushl_vec,
4555 .opt_opc = ushl_list,
4556 .vece = MO_64 },
4559 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4561 TCGv_i32 lval = tcg_temp_new_i32();
4562 TCGv_i32 rval = tcg_temp_new_i32();
4563 TCGv_i32 lsh = tcg_temp_new_i32();
4564 TCGv_i32 rsh = tcg_temp_new_i32();
4565 TCGv_i32 zero = tcg_const_i32(0);
4566 TCGv_i32 max = tcg_const_i32(31);
4569 * Rely on the TCG guarantee that out of range shifts produce
4570 * unspecified results, not undefined behaviour (i.e. no trap).
4571 * Discard out-of-range results after the fact.
4573 tcg_gen_ext8s_i32(lsh, shift);
4574 tcg_gen_neg_i32(rsh, lsh);
4575 tcg_gen_shl_i32(lval, src, lsh);
4576 tcg_gen_umin_i32(rsh, rsh, max);
4577 tcg_gen_sar_i32(rval, src, rsh);
4578 tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4579 tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4581 tcg_temp_free_i32(lval);
4582 tcg_temp_free_i32(rval);
4583 tcg_temp_free_i32(lsh);
4584 tcg_temp_free_i32(rsh);
4585 tcg_temp_free_i32(zero);
4586 tcg_temp_free_i32(max);
4589 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4591 TCGv_i64 lval = tcg_temp_new_i64();
4592 TCGv_i64 rval = tcg_temp_new_i64();
4593 TCGv_i64 lsh = tcg_temp_new_i64();
4594 TCGv_i64 rsh = tcg_temp_new_i64();
4595 TCGv_i64 zero = tcg_const_i64(0);
4596 TCGv_i64 max = tcg_const_i64(63);
4599 * Rely on the TCG guarantee that out of range shifts produce
4600 * unspecified results, not undefined behaviour (i.e. no trap).
4601 * Discard out-of-range results after the fact.
4603 tcg_gen_ext8s_i64(lsh, shift);
4604 tcg_gen_neg_i64(rsh, lsh);
4605 tcg_gen_shl_i64(lval, src, lsh);
4606 tcg_gen_umin_i64(rsh, rsh, max);
4607 tcg_gen_sar_i64(rval, src, rsh);
4608 tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4609 tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4611 tcg_temp_free_i64(lval);
4612 tcg_temp_free_i64(rval);
4613 tcg_temp_free_i64(lsh);
4614 tcg_temp_free_i64(rsh);
4615 tcg_temp_free_i64(zero);
4616 tcg_temp_free_i64(max);
4619 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4620 TCGv_vec src, TCGv_vec shift)
4622 TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4623 TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4624 TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4625 TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4626 TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4629 * Rely on the TCG guarantee that out of range shifts produce
4630 * unspecified results, not undefined behaviour (i.e. no trap).
4631 * Discard out-of-range results after the fact.
4633 tcg_gen_neg_vec(vece, rsh, shift);
4634 if (vece == MO_8) {
4635 tcg_gen_mov_vec(lsh, shift);
4636 } else {
4637 tcg_gen_dupi_vec(vece, tmp, 0xff);
4638 tcg_gen_and_vec(vece, lsh, shift, tmp);
4639 tcg_gen_and_vec(vece, rsh, rsh, tmp);
4642 /* Bound rsh so out of bound right shift gets -1. */
4643 tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4644 tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4645 tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4647 tcg_gen_shlv_vec(vece, lval, src, lsh);
4648 tcg_gen_sarv_vec(vece, rval, src, rsh);
4650 /* Select in-bound left shift. */
4651 tcg_gen_andc_vec(vece, lval, lval, tmp);
4653 /* Select between left and right shift. */
4654 if (vece == MO_8) {
4655 tcg_gen_dupi_vec(vece, tmp, 0);
4656 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4657 } else {
4658 tcg_gen_dupi_vec(vece, tmp, 0x80);
4659 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4662 tcg_temp_free_vec(lval);
4663 tcg_temp_free_vec(rval);
4664 tcg_temp_free_vec(lsh);
4665 tcg_temp_free_vec(rsh);
4666 tcg_temp_free_vec(tmp);
4669 static const TCGOpcode sshl_list[] = {
4670 INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4671 INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4674 const GVecGen3 sshl_op[4] = {
4675 { .fniv = gen_sshl_vec,
4676 .fno = gen_helper_gvec_sshl_b,
4677 .opt_opc = sshl_list,
4678 .vece = MO_8 },
4679 { .fniv = gen_sshl_vec,
4680 .fno = gen_helper_gvec_sshl_h,
4681 .opt_opc = sshl_list,
4682 .vece = MO_16 },
4683 { .fni4 = gen_sshl_i32,
4684 .fniv = gen_sshl_vec,
4685 .opt_opc = sshl_list,
4686 .vece = MO_32 },
4687 { .fni8 = gen_sshl_i64,
4688 .fniv = gen_sshl_vec,
4689 .opt_opc = sshl_list,
4690 .vece = MO_64 },
4693 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4694 TCGv_vec a, TCGv_vec b)
4696 TCGv_vec x = tcg_temp_new_vec_matching(t);
4697 tcg_gen_add_vec(vece, x, a, b);
4698 tcg_gen_usadd_vec(vece, t, a, b);
4699 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4700 tcg_gen_or_vec(vece, sat, sat, x);
4701 tcg_temp_free_vec(x);
4704 static const TCGOpcode vecop_list_uqadd[] = {
4705 INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4708 const GVecGen4 uqadd_op[4] = {
4709 { .fniv = gen_uqadd_vec,
4710 .fno = gen_helper_gvec_uqadd_b,
4711 .write_aofs = true,
4712 .opt_opc = vecop_list_uqadd,
4713 .vece = MO_8 },
4714 { .fniv = gen_uqadd_vec,
4715 .fno = gen_helper_gvec_uqadd_h,
4716 .write_aofs = true,
4717 .opt_opc = vecop_list_uqadd,
4718 .vece = MO_16 },
4719 { .fniv = gen_uqadd_vec,
4720 .fno = gen_helper_gvec_uqadd_s,
4721 .write_aofs = true,
4722 .opt_opc = vecop_list_uqadd,
4723 .vece = MO_32 },
4724 { .fniv = gen_uqadd_vec,
4725 .fno = gen_helper_gvec_uqadd_d,
4726 .write_aofs = true,
4727 .opt_opc = vecop_list_uqadd,
4728 .vece = MO_64 },
4731 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4732 TCGv_vec a, TCGv_vec b)
4734 TCGv_vec x = tcg_temp_new_vec_matching(t);
4735 tcg_gen_add_vec(vece, x, a, b);
4736 tcg_gen_ssadd_vec(vece, t, a, b);
4737 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4738 tcg_gen_or_vec(vece, sat, sat, x);
4739 tcg_temp_free_vec(x);
4742 static const TCGOpcode vecop_list_sqadd[] = {
4743 INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4746 const GVecGen4 sqadd_op[4] = {
4747 { .fniv = gen_sqadd_vec,
4748 .fno = gen_helper_gvec_sqadd_b,
4749 .opt_opc = vecop_list_sqadd,
4750 .write_aofs = true,
4751 .vece = MO_8 },
4752 { .fniv = gen_sqadd_vec,
4753 .fno = gen_helper_gvec_sqadd_h,
4754 .opt_opc = vecop_list_sqadd,
4755 .write_aofs = true,
4756 .vece = MO_16 },
4757 { .fniv = gen_sqadd_vec,
4758 .fno = gen_helper_gvec_sqadd_s,
4759 .opt_opc = vecop_list_sqadd,
4760 .write_aofs = true,
4761 .vece = MO_32 },
4762 { .fniv = gen_sqadd_vec,
4763 .fno = gen_helper_gvec_sqadd_d,
4764 .opt_opc = vecop_list_sqadd,
4765 .write_aofs = true,
4766 .vece = MO_64 },
4769 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4770 TCGv_vec a, TCGv_vec b)
4772 TCGv_vec x = tcg_temp_new_vec_matching(t);
4773 tcg_gen_sub_vec(vece, x, a, b);
4774 tcg_gen_ussub_vec(vece, t, a, b);
4775 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4776 tcg_gen_or_vec(vece, sat, sat, x);
4777 tcg_temp_free_vec(x);
4780 static const TCGOpcode vecop_list_uqsub[] = {
4781 INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4784 const GVecGen4 uqsub_op[4] = {
4785 { .fniv = gen_uqsub_vec,
4786 .fno = gen_helper_gvec_uqsub_b,
4787 .opt_opc = vecop_list_uqsub,
4788 .write_aofs = true,
4789 .vece = MO_8 },
4790 { .fniv = gen_uqsub_vec,
4791 .fno = gen_helper_gvec_uqsub_h,
4792 .opt_opc = vecop_list_uqsub,
4793 .write_aofs = true,
4794 .vece = MO_16 },
4795 { .fniv = gen_uqsub_vec,
4796 .fno = gen_helper_gvec_uqsub_s,
4797 .opt_opc = vecop_list_uqsub,
4798 .write_aofs = true,
4799 .vece = MO_32 },
4800 { .fniv = gen_uqsub_vec,
4801 .fno = gen_helper_gvec_uqsub_d,
4802 .opt_opc = vecop_list_uqsub,
4803 .write_aofs = true,
4804 .vece = MO_64 },
4807 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4808 TCGv_vec a, TCGv_vec b)
4810 TCGv_vec x = tcg_temp_new_vec_matching(t);
4811 tcg_gen_sub_vec(vece, x, a, b);
4812 tcg_gen_sssub_vec(vece, t, a, b);
4813 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4814 tcg_gen_or_vec(vece, sat, sat, x);
4815 tcg_temp_free_vec(x);
4818 static const TCGOpcode vecop_list_sqsub[] = {
4819 INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4822 const GVecGen4 sqsub_op[4] = {
4823 { .fniv = gen_sqsub_vec,
4824 .fno = gen_helper_gvec_sqsub_b,
4825 .opt_opc = vecop_list_sqsub,
4826 .write_aofs = true,
4827 .vece = MO_8 },
4828 { .fniv = gen_sqsub_vec,
4829 .fno = gen_helper_gvec_sqsub_h,
4830 .opt_opc = vecop_list_sqsub,
4831 .write_aofs = true,
4832 .vece = MO_16 },
4833 { .fniv = gen_sqsub_vec,
4834 .fno = gen_helper_gvec_sqsub_s,
4835 .opt_opc = vecop_list_sqsub,
4836 .write_aofs = true,
4837 .vece = MO_32 },
4838 { .fniv = gen_sqsub_vec,
4839 .fno = gen_helper_gvec_sqsub_d,
4840 .opt_opc = vecop_list_sqsub,
4841 .write_aofs = true,
4842 .vece = MO_64 },
4845 /* Translate a NEON data processing instruction. Return nonzero if the
4846 instruction is invalid.
4847 We process data in a mixture of 32-bit and 64-bit chunks.
4848 Mostly we use 32-bit chunks so we can use normal scalar instructions. */
4850 static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
4852 int op;
4853 int q;
4854 int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
4855 int size;
4856 int shift;
4857 int pass;
4858 int count;
4859 int pairwise;
4860 int u;
4861 int vec_size;
4862 uint32_t imm;
4863 TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
4864 TCGv_ptr ptr1, ptr2, ptr3;
4865 TCGv_i64 tmp64;
4867 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
4868 return 1;
4871 /* FIXME: this access check should not take precedence over UNDEF
4872 * for invalid encodings; we will generate incorrect syndrome information
4873 * for attempts to execute invalid vfp/neon encodings with FP disabled.
4875 if (s->fp_excp_el) {
4876 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
4877 syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
4878 return 0;
4881 if (!s->vfp_enabled)
4882 return 1;
4883 q = (insn & (1 << 6)) != 0;
4884 u = (insn >> 24) & 1;
4885 VFP_DREG_D(rd, insn);
4886 VFP_DREG_N(rn, insn);
4887 VFP_DREG_M(rm, insn);
4888 size = (insn >> 20) & 3;
4889 vec_size = q ? 16 : 8;
4890 rd_ofs = neon_reg_offset(rd, 0);
4891 rn_ofs = neon_reg_offset(rn, 0);
4892 rm_ofs = neon_reg_offset(rm, 0);
4894 if ((insn & (1 << 23)) == 0) {
4895 /* Three register same length. */
4896 op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
4897 /* Catch invalid op and bad size combinations: UNDEF */
4898 if ((neon_3r_sizes[op] & (1 << size)) == 0) {
4899 return 1;
4901 /* All insns of this form UNDEF for either this condition or the
4902 * superset of cases "Q==1"; we catch the latter later.
4904 if (q && ((rd | rn | rm) & 1)) {
4905 return 1;
4907 switch (op) {
4908 case NEON_3R_SHA:
4909 /* The SHA-1/SHA-256 3-register instructions require special
4910 * treatment here, as their size field is overloaded as an
4911 * op type selector, and they all consume their input in a
4912 * single pass.
4914 if (!q) {
4915 return 1;
4917 if (!u) { /* SHA-1 */
4918 if (!dc_isar_feature(aa32_sha1, s)) {
4919 return 1;
4921 ptr1 = vfp_reg_ptr(true, rd);
4922 ptr2 = vfp_reg_ptr(true, rn);
4923 ptr3 = vfp_reg_ptr(true, rm);
4924 tmp4 = tcg_const_i32(size);
4925 gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp4);
4926 tcg_temp_free_i32(tmp4);
4927 } else { /* SHA-256 */
4928 if (!dc_isar_feature(aa32_sha2, s) || size == 3) {
4929 return 1;
4931 ptr1 = vfp_reg_ptr(true, rd);
4932 ptr2 = vfp_reg_ptr(true, rn);
4933 ptr3 = vfp_reg_ptr(true, rm);
4934 switch (size) {
4935 case 0:
4936 gen_helper_crypto_sha256h(ptr1, ptr2, ptr3);
4937 break;
4938 case 1:
4939 gen_helper_crypto_sha256h2(ptr1, ptr2, ptr3);
4940 break;
4941 case 2:
4942 gen_helper_crypto_sha256su1(ptr1, ptr2, ptr3);
4943 break;
4946 tcg_temp_free_ptr(ptr1);
4947 tcg_temp_free_ptr(ptr2);
4948 tcg_temp_free_ptr(ptr3);
4949 return 0;
4951 case NEON_3R_VPADD_VQRDMLAH:
4952 if (!u) {
4953 break; /* VPADD */
4955 /* VQRDMLAH */
4956 switch (size) {
4957 case 1:
4958 return do_v81_helper(s, gen_helper_gvec_qrdmlah_s16,
4959 q, rd, rn, rm);
4960 case 2:
4961 return do_v81_helper(s, gen_helper_gvec_qrdmlah_s32,
4962 q, rd, rn, rm);
4964 return 1;
4966 case NEON_3R_VFM_VQRDMLSH:
4967 if (!u) {
4968 /* VFM, VFMS */
4969 if (size == 1) {
4970 return 1;
4972 break;
4974 /* VQRDMLSH */
4975 switch (size) {
4976 case 1:
4977 return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s16,
4978 q, rd, rn, rm);
4979 case 2:
4980 return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s32,
4981 q, rd, rn, rm);
4983 return 1;
4985 case NEON_3R_LOGIC: /* Logic ops. */
4986 switch ((u << 2) | size) {
4987 case 0: /* VAND */
4988 tcg_gen_gvec_and(0, rd_ofs, rn_ofs, rm_ofs,
4989 vec_size, vec_size);
4990 break;
4991 case 1: /* VBIC */
4992 tcg_gen_gvec_andc(0, rd_ofs, rn_ofs, rm_ofs,
4993 vec_size, vec_size);
4994 break;
4995 case 2: /* VORR */
4996 tcg_gen_gvec_or(0, rd_ofs, rn_ofs, rm_ofs,
4997 vec_size, vec_size);
4998 break;
4999 case 3: /* VORN */
5000 tcg_gen_gvec_orc(0, rd_ofs, rn_ofs, rm_ofs,
5001 vec_size, vec_size);
5002 break;
5003 case 4: /* VEOR */
5004 tcg_gen_gvec_xor(0, rd_ofs, rn_ofs, rm_ofs,
5005 vec_size, vec_size);
5006 break;
5007 case 5: /* VBSL */
5008 tcg_gen_gvec_bitsel(MO_8, rd_ofs, rd_ofs, rn_ofs, rm_ofs,
5009 vec_size, vec_size);
5010 break;
5011 case 6: /* VBIT */
5012 tcg_gen_gvec_bitsel(MO_8, rd_ofs, rm_ofs, rn_ofs, rd_ofs,
5013 vec_size, vec_size);
5014 break;
5015 case 7: /* VBIF */
5016 tcg_gen_gvec_bitsel(MO_8, rd_ofs, rm_ofs, rd_ofs, rn_ofs,
5017 vec_size, vec_size);
5018 break;
5020 return 0;
5022 case NEON_3R_VADD_VSUB:
5023 if (u) {
5024 tcg_gen_gvec_sub(size, rd_ofs, rn_ofs, rm_ofs,
5025 vec_size, vec_size);
5026 } else {
5027 tcg_gen_gvec_add(size, rd_ofs, rn_ofs, rm_ofs,
5028 vec_size, vec_size);
5030 return 0;
5032 case NEON_3R_VQADD:
5033 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
5034 rn_ofs, rm_ofs, vec_size, vec_size,
5035 (u ? uqadd_op : sqadd_op) + size);
5036 return 0;
5038 case NEON_3R_VQSUB:
5039 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
5040 rn_ofs, rm_ofs, vec_size, vec_size,
5041 (u ? uqsub_op : sqsub_op) + size);
5042 return 0;
5044 case NEON_3R_VMUL: /* VMUL */
5045 if (u) {
5046 /* Polynomial case allows only P8. */
5047 if (size != 0) {
5048 return 1;
5050 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
5051 0, gen_helper_gvec_pmul_b);
5052 } else {
5053 tcg_gen_gvec_mul(size, rd_ofs, rn_ofs, rm_ofs,
5054 vec_size, vec_size);
5056 return 0;
5058 case NEON_3R_VML: /* VMLA, VMLS */
5059 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
5060 u ? &mls_op[size] : &mla_op[size]);
5061 return 0;
5063 case NEON_3R_VTST_VCEQ:
5064 if (u) { /* VCEQ */
5065 tcg_gen_gvec_cmp(TCG_COND_EQ, size, rd_ofs, rn_ofs, rm_ofs,
5066 vec_size, vec_size);
5067 } else { /* VTST */
5068 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs,
5069 vec_size, vec_size, &cmtst_op[size]);
5071 return 0;
5073 case NEON_3R_VCGT:
5074 tcg_gen_gvec_cmp(u ? TCG_COND_GTU : TCG_COND_GT, size,
5075 rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
5076 return 0;
5078 case NEON_3R_VCGE:
5079 tcg_gen_gvec_cmp(u ? TCG_COND_GEU : TCG_COND_GE, size,
5080 rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
5081 return 0;
5083 case NEON_3R_VMAX:
5084 if (u) {
5085 tcg_gen_gvec_umax(size, rd_ofs, rn_ofs, rm_ofs,
5086 vec_size, vec_size);
5087 } else {
5088 tcg_gen_gvec_smax(size, rd_ofs, rn_ofs, rm_ofs,
5089 vec_size, vec_size);
5091 return 0;
5092 case NEON_3R_VMIN:
5093 if (u) {
5094 tcg_gen_gvec_umin(size, rd_ofs, rn_ofs, rm_ofs,
5095 vec_size, vec_size);
5096 } else {
5097 tcg_gen_gvec_smin(size, rd_ofs, rn_ofs, rm_ofs,
5098 vec_size, vec_size);
5100 return 0;
5102 case NEON_3R_VSHL:
5103 /* Note the operation is vshl vd,vm,vn */
5104 tcg_gen_gvec_3(rd_ofs, rm_ofs, rn_ofs, vec_size, vec_size,
5105 u ? &ushl_op[size] : &sshl_op[size]);
5106 return 0;
5109 if (size == 3) {
5110 /* 64-bit element instructions. */
5111 for (pass = 0; pass < (q ? 2 : 1); pass++) {
5112 neon_load_reg64(cpu_V0, rn + pass);
5113 neon_load_reg64(cpu_V1, rm + pass);
5114 switch (op) {
5115 case NEON_3R_VQSHL:
5116 if (u) {
5117 gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5118 cpu_V1, cpu_V0);
5119 } else {
5120 gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5121 cpu_V1, cpu_V0);
5123 break;
5124 case NEON_3R_VRSHL:
5125 if (u) {
5126 gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
5127 } else {
5128 gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
5130 break;
5131 case NEON_3R_VQRSHL:
5132 if (u) {
5133 gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
5134 cpu_V1, cpu_V0);
5135 } else {
5136 gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
5137 cpu_V1, cpu_V0);
5139 break;
5140 default:
5141 abort();
5143 neon_store_reg64(cpu_V0, rd + pass);
5145 return 0;
5147 pairwise = 0;
5148 switch (op) {
5149 case NEON_3R_VQSHL:
5150 case NEON_3R_VRSHL:
5151 case NEON_3R_VQRSHL:
5153 int rtmp;
5154 /* Shift instruction operands are reversed. */
5155 rtmp = rn;
5156 rn = rm;
5157 rm = rtmp;
5159 break;
5160 case NEON_3R_VPADD_VQRDMLAH:
5161 case NEON_3R_VPMAX:
5162 case NEON_3R_VPMIN:
5163 pairwise = 1;
5164 break;
5165 case NEON_3R_FLOAT_ARITH:
5166 pairwise = (u && size < 2); /* if VPADD (float) */
5167 break;
5168 case NEON_3R_FLOAT_MINMAX:
5169 pairwise = u; /* if VPMIN/VPMAX (float) */
5170 break;
5171 case NEON_3R_FLOAT_CMP:
5172 if (!u && size) {
5173 /* no encoding for U=0 C=1x */
5174 return 1;
5176 break;
5177 case NEON_3R_FLOAT_ACMP:
5178 if (!u) {
5179 return 1;
5181 break;
5182 case NEON_3R_FLOAT_MISC:
5183 /* VMAXNM/VMINNM in ARMv8 */
5184 if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
5185 return 1;
5187 break;
5188 case NEON_3R_VFM_VQRDMLSH:
5189 if (!dc_isar_feature(aa32_simdfmac, s)) {
5190 return 1;
5192 break;
5193 default:
5194 break;
5197 if (pairwise && q) {
5198 /* All the pairwise insns UNDEF if Q is set */
5199 return 1;
5202 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5204 if (pairwise) {
5205 /* Pairwise. */
5206 if (pass < 1) {
5207 tmp = neon_load_reg(rn, 0);
5208 tmp2 = neon_load_reg(rn, 1);
5209 } else {
5210 tmp = neon_load_reg(rm, 0);
5211 tmp2 = neon_load_reg(rm, 1);
5213 } else {
5214 /* Elementwise. */
5215 tmp = neon_load_reg(rn, pass);
5216 tmp2 = neon_load_reg(rm, pass);
5218 switch (op) {
5219 case NEON_3R_VHADD:
5220 GEN_NEON_INTEGER_OP(hadd);
5221 break;
5222 case NEON_3R_VRHADD:
5223 GEN_NEON_INTEGER_OP(rhadd);
5224 break;
5225 case NEON_3R_VHSUB:
5226 GEN_NEON_INTEGER_OP(hsub);
5227 break;
5228 case NEON_3R_VQSHL:
5229 GEN_NEON_INTEGER_OP_ENV(qshl);
5230 break;
5231 case NEON_3R_VRSHL:
5232 GEN_NEON_INTEGER_OP(rshl);
5233 break;
5234 case NEON_3R_VQRSHL:
5235 GEN_NEON_INTEGER_OP_ENV(qrshl);
5236 break;
5237 case NEON_3R_VABD:
5238 GEN_NEON_INTEGER_OP(abd);
5239 break;
5240 case NEON_3R_VABA:
5241 GEN_NEON_INTEGER_OP(abd);
5242 tcg_temp_free_i32(tmp2);
5243 tmp2 = neon_load_reg(rd, pass);
5244 gen_neon_add(size, tmp, tmp2);
5245 break;
5246 case NEON_3R_VPMAX:
5247 GEN_NEON_INTEGER_OP(pmax);
5248 break;
5249 case NEON_3R_VPMIN:
5250 GEN_NEON_INTEGER_OP(pmin);
5251 break;
5252 case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high. */
5253 if (!u) { /* VQDMULH */
5254 switch (size) {
5255 case 1:
5256 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
5257 break;
5258 case 2:
5259 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
5260 break;
5261 default: abort();
5263 } else { /* VQRDMULH */
5264 switch (size) {
5265 case 1:
5266 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
5267 break;
5268 case 2:
5269 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
5270 break;
5271 default: abort();
5274 break;
5275 case NEON_3R_VPADD_VQRDMLAH:
5276 switch (size) {
5277 case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
5278 case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
5279 case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
5280 default: abort();
5282 break;
5283 case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
5285 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5286 switch ((u << 2) | size) {
5287 case 0: /* VADD */
5288 case 4: /* VPADD */
5289 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5290 break;
5291 case 2: /* VSUB */
5292 gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus);
5293 break;
5294 case 6: /* VABD */
5295 gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus);
5296 break;
5297 default:
5298 abort();
5300 tcg_temp_free_ptr(fpstatus);
5301 break;
5303 case NEON_3R_FLOAT_MULTIPLY:
5305 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5306 gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
5307 if (!u) {
5308 tcg_temp_free_i32(tmp2);
5309 tmp2 = neon_load_reg(rd, pass);
5310 if (size == 0) {
5311 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5312 } else {
5313 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
5316 tcg_temp_free_ptr(fpstatus);
5317 break;
5319 case NEON_3R_FLOAT_CMP:
5321 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5322 if (!u) {
5323 gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
5324 } else {
5325 if (size == 0) {
5326 gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
5327 } else {
5328 gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
5331 tcg_temp_free_ptr(fpstatus);
5332 break;
5334 case NEON_3R_FLOAT_ACMP:
5336 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5337 if (size == 0) {
5338 gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus);
5339 } else {
5340 gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus);
5342 tcg_temp_free_ptr(fpstatus);
5343 break;
5345 case NEON_3R_FLOAT_MINMAX:
5347 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5348 if (size == 0) {
5349 gen_helper_vfp_maxs(tmp, tmp, tmp2, fpstatus);
5350 } else {
5351 gen_helper_vfp_mins(tmp, tmp, tmp2, fpstatus);
5353 tcg_temp_free_ptr(fpstatus);
5354 break;
5356 case NEON_3R_FLOAT_MISC:
5357 if (u) {
5358 /* VMAXNM/VMINNM */
5359 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5360 if (size == 0) {
5361 gen_helper_vfp_maxnums(tmp, tmp, tmp2, fpstatus);
5362 } else {
5363 gen_helper_vfp_minnums(tmp, tmp, tmp2, fpstatus);
5365 tcg_temp_free_ptr(fpstatus);
5366 } else {
5367 if (size == 0) {
5368 gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
5369 } else {
5370 gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
5373 break;
5374 case NEON_3R_VFM_VQRDMLSH:
5376 /* VFMA, VFMS: fused multiply-add */
5377 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5378 TCGv_i32 tmp3 = neon_load_reg(rd, pass);
5379 if (size) {
5380 /* VFMS */
5381 gen_helper_vfp_negs(tmp, tmp);
5383 gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
5384 tcg_temp_free_i32(tmp3);
5385 tcg_temp_free_ptr(fpstatus);
5386 break;
5388 default:
5389 abort();
5391 tcg_temp_free_i32(tmp2);
5393 /* Save the result. For elementwise operations we can put it
5394 straight into the destination register. For pairwise operations
5395 we have to be careful to avoid clobbering the source operands. */
5396 if (pairwise && rd == rm) {
5397 neon_store_scratch(pass, tmp);
5398 } else {
5399 neon_store_reg(rd, pass, tmp);
5402 } /* for pass */
5403 if (pairwise && rd == rm) {
5404 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5405 tmp = neon_load_scratch(pass);
5406 neon_store_reg(rd, pass, tmp);
5409 /* End of 3 register same size operations. */
5410 } else if (insn & (1 << 4)) {
5411 if ((insn & 0x00380080) != 0) {
5412 /* Two registers and shift. */
5413 op = (insn >> 8) & 0xf;
5414 if (insn & (1 << 7)) {
5415 /* 64-bit shift. */
5416 if (op > 7) {
5417 return 1;
5419 size = 3;
5420 } else {
5421 size = 2;
5422 while ((insn & (1 << (size + 19))) == 0)
5423 size--;
5425 shift = (insn >> 16) & ((1 << (3 + size)) - 1);
5426 if (op < 8) {
5427 /* Shift by immediate:
5428 VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */
5429 if (q && ((rd | rm) & 1)) {
5430 return 1;
5432 if (!u && (op == 4 || op == 6)) {
5433 return 1;
5435 /* Right shifts are encoded as N - shift, where N is the
5436 element size in bits. */
5437 if (op <= 4) {
5438 shift = shift - (1 << (size + 3));
5441 switch (op) {
5442 case 0: /* VSHR */
5443 /* Right shift comes here negative. */
5444 shift = -shift;
5445 /* Shifts larger than the element size are architecturally
5446 * valid. Unsigned results in all zeros; signed results
5447 * in all sign bits.
5449 if (!u) {
5450 tcg_gen_gvec_sari(size, rd_ofs, rm_ofs,
5451 MIN(shift, (8 << size) - 1),
5452 vec_size, vec_size);
5453 } else if (shift >= 8 << size) {
5454 tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0);
5455 } else {
5456 tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift,
5457 vec_size, vec_size);
5459 return 0;
5461 case 1: /* VSRA */
5462 /* Right shift comes here negative. */
5463 shift = -shift;
5464 /* Shifts larger than the element size are architecturally
5465 * valid. Unsigned results in all zeros; signed results
5466 * in all sign bits.
5468 if (!u) {
5469 tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
5470 MIN(shift, (8 << size) - 1),
5471 &ssra_op[size]);
5472 } else if (shift >= 8 << size) {
5473 /* rd += 0 */
5474 } else {
5475 tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
5476 shift, &usra_op[size]);
5478 return 0;
5480 case 4: /* VSRI */
5481 if (!u) {
5482 return 1;
5484 /* Right shift comes here negative. */
5485 shift = -shift;
5486 /* Shift out of range leaves destination unchanged. */
5487 if (shift < 8 << size) {
5488 tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
5489 shift, &sri_op[size]);
5491 return 0;
5493 case 5: /* VSHL, VSLI */
5494 if (u) { /* VSLI */
5495 /* Shift out of range leaves destination unchanged. */
5496 if (shift < 8 << size) {
5497 tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size,
5498 vec_size, shift, &sli_op[size]);
5500 } else { /* VSHL */
5501 /* Shifts larger than the element size are
5502 * architecturally valid and results in zero.
5504 if (shift >= 8 << size) {
5505 tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0);
5506 } else {
5507 tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
5508 vec_size, vec_size);
5511 return 0;
5514 if (size == 3) {
5515 count = q + 1;
5516 } else {
5517 count = q ? 4: 2;
5520 /* To avoid excessive duplication of ops we implement shift
5521 * by immediate using the variable shift operations.
5523 imm = dup_const(size, shift);
5525 for (pass = 0; pass < count; pass++) {
5526 if (size == 3) {
5527 neon_load_reg64(cpu_V0, rm + pass);
5528 tcg_gen_movi_i64(cpu_V1, imm);
5529 switch (op) {
5530 case 2: /* VRSHR */
5531 case 3: /* VRSRA */
5532 if (u)
5533 gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1);
5534 else
5535 gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
5536 break;
5537 case 6: /* VQSHLU */
5538 gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
5539 cpu_V0, cpu_V1);
5540 break;
5541 case 7: /* VQSHL */
5542 if (u) {
5543 gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5544 cpu_V0, cpu_V1);
5545 } else {
5546 gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5547 cpu_V0, cpu_V1);
5549 break;
5550 default:
5551 g_assert_not_reached();
5553 if (op == 3) {
5554 /* Accumulate. */
5555 neon_load_reg64(cpu_V1, rd + pass);
5556 tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
5558 neon_store_reg64(cpu_V0, rd + pass);
5559 } else { /* size < 3 */
5560 /* Operands in T0 and T1. */
5561 tmp = neon_load_reg(rm, pass);
5562 tmp2 = tcg_temp_new_i32();
5563 tcg_gen_movi_i32(tmp2, imm);
5564 switch (op) {
5565 case 2: /* VRSHR */
5566 case 3: /* VRSRA */
5567 GEN_NEON_INTEGER_OP(rshl);
5568 break;
5569 case 6: /* VQSHLU */
5570 switch (size) {
5571 case 0:
5572 gen_helper_neon_qshlu_s8(tmp, cpu_env,
5573 tmp, tmp2);
5574 break;
5575 case 1:
5576 gen_helper_neon_qshlu_s16(tmp, cpu_env,
5577 tmp, tmp2);
5578 break;
5579 case 2:
5580 gen_helper_neon_qshlu_s32(tmp, cpu_env,
5581 tmp, tmp2);
5582 break;
5583 default:
5584 abort();
5586 break;
5587 case 7: /* VQSHL */
5588 GEN_NEON_INTEGER_OP_ENV(qshl);
5589 break;
5590 default:
5591 g_assert_not_reached();
5593 tcg_temp_free_i32(tmp2);
5595 if (op == 3) {
5596 /* Accumulate. */
5597 tmp2 = neon_load_reg(rd, pass);
5598 gen_neon_add(size, tmp, tmp2);
5599 tcg_temp_free_i32(tmp2);
5601 neon_store_reg(rd, pass, tmp);
5603 } /* for pass */
5604 } else if (op < 10) {
5605 /* Shift by immediate and narrow:
5606 VSHRN, VRSHRN, VQSHRN, VQRSHRN. */
5607 int input_unsigned = (op == 8) ? !u : u;
5608 if (rm & 1) {
5609 return 1;
5611 shift = shift - (1 << (size + 3));
5612 size++;
5613 if (size == 3) {
5614 tmp64 = tcg_const_i64(shift);
5615 neon_load_reg64(cpu_V0, rm);
5616 neon_load_reg64(cpu_V1, rm + 1);
5617 for (pass = 0; pass < 2; pass++) {
5618 TCGv_i64 in;
5619 if (pass == 0) {
5620 in = cpu_V0;
5621 } else {
5622 in = cpu_V1;
5624 if (q) {
5625 if (input_unsigned) {
5626 gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
5627 } else {
5628 gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
5630 } else {
5631 if (input_unsigned) {
5632 gen_ushl_i64(cpu_V0, in, tmp64);
5633 } else {
5634 gen_sshl_i64(cpu_V0, in, tmp64);
5637 tmp = tcg_temp_new_i32();
5638 gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5639 neon_store_reg(rd, pass, tmp);
5640 } /* for pass */
5641 tcg_temp_free_i64(tmp64);
5642 } else {
5643 if (size == 1) {
5644 imm = (uint16_t)shift;
5645 imm |= imm << 16;
5646 } else {
5647 /* size == 2 */
5648 imm = (uint32_t)shift;
5650 tmp2 = tcg_const_i32(imm);
5651 tmp4 = neon_load_reg(rm + 1, 0);
5652 tmp5 = neon_load_reg(rm + 1, 1);
5653 for (pass = 0; pass < 2; pass++) {
5654 if (pass == 0) {
5655 tmp = neon_load_reg(rm, 0);
5656 } else {
5657 tmp = tmp4;
5659 gen_neon_shift_narrow(size, tmp, tmp2, q,
5660 input_unsigned);
5661 if (pass == 0) {
5662 tmp3 = neon_load_reg(rm, 1);
5663 } else {
5664 tmp3 = tmp5;
5666 gen_neon_shift_narrow(size, tmp3, tmp2, q,
5667 input_unsigned);
5668 tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
5669 tcg_temp_free_i32(tmp);
5670 tcg_temp_free_i32(tmp3);
5671 tmp = tcg_temp_new_i32();
5672 gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5673 neon_store_reg(rd, pass, tmp);
5674 } /* for pass */
5675 tcg_temp_free_i32(tmp2);
5677 } else if (op == 10) {
5678 /* VSHLL, VMOVL */
5679 if (q || (rd & 1)) {
5680 return 1;
5682 tmp = neon_load_reg(rm, 0);
5683 tmp2 = neon_load_reg(rm, 1);
5684 for (pass = 0; pass < 2; pass++) {
5685 if (pass == 1)
5686 tmp = tmp2;
5688 gen_neon_widen(cpu_V0, tmp, size, u);
5690 if (shift != 0) {
5691 /* The shift is less than the width of the source
5692 type, so we can just shift the whole register. */
5693 tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
5694 /* Widen the result of shift: we need to clear
5695 * the potential overflow bits resulting from
5696 * left bits of the narrow input appearing as
5697 * right bits of left the neighbour narrow
5698 * input. */
5699 if (size < 2 || !u) {
5700 uint64_t imm64;
5701 if (size == 0) {
5702 imm = (0xffu >> (8 - shift));
5703 imm |= imm << 16;
5704 } else if (size == 1) {
5705 imm = 0xffff >> (16 - shift);
5706 } else {
5707 /* size == 2 */
5708 imm = 0xffffffff >> (32 - shift);
5710 if (size < 2) {
5711 imm64 = imm | (((uint64_t)imm) << 32);
5712 } else {
5713 imm64 = imm;
5715 tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
5718 neon_store_reg64(cpu_V0, rd + pass);
5720 } else if (op >= 14) {
5721 /* VCVT fixed-point. */
5722 TCGv_ptr fpst;
5723 TCGv_i32 shiftv;
5724 VFPGenFixPointFn *fn;
5726 if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
5727 return 1;
5730 if (!(op & 1)) {
5731 if (u) {
5732 fn = gen_helper_vfp_ultos;
5733 } else {
5734 fn = gen_helper_vfp_sltos;
5736 } else {
5737 if (u) {
5738 fn = gen_helper_vfp_touls_round_to_zero;
5739 } else {
5740 fn = gen_helper_vfp_tosls_round_to_zero;
5744 /* We have already masked out the must-be-1 top bit of imm6,
5745 * hence this 32-shift where the ARM ARM has 64-imm6.
5747 shift = 32 - shift;
5748 fpst = get_fpstatus_ptr(1);
5749 shiftv = tcg_const_i32(shift);
5750 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5751 TCGv_i32 tmpf = neon_load_reg(rm, pass);
5752 fn(tmpf, tmpf, shiftv, fpst);
5753 neon_store_reg(rd, pass, tmpf);
5755 tcg_temp_free_ptr(fpst);
5756 tcg_temp_free_i32(shiftv);
5757 } else {
5758 return 1;
5760 } else { /* (insn & 0x00380080) == 0 */
5761 int invert, reg_ofs, vec_size;
5763 if (q && (rd & 1)) {
5764 return 1;
5767 op = (insn >> 8) & 0xf;
5768 /* One register and immediate. */
5769 imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
5770 invert = (insn & (1 << 5)) != 0;
5771 /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
5772 * We choose to not special-case this and will behave as if a
5773 * valid constant encoding of 0 had been given.
5775 switch (op) {
5776 case 0: case 1:
5777 /* no-op */
5778 break;
5779 case 2: case 3:
5780 imm <<= 8;
5781 break;
5782 case 4: case 5:
5783 imm <<= 16;
5784 break;
5785 case 6: case 7:
5786 imm <<= 24;
5787 break;
5788 case 8: case 9:
5789 imm |= imm << 16;
5790 break;
5791 case 10: case 11:
5792 imm = (imm << 8) | (imm << 24);
5793 break;
5794 case 12:
5795 imm = (imm << 8) | 0xff;
5796 break;
5797 case 13:
5798 imm = (imm << 16) | 0xffff;
5799 break;
5800 case 14:
5801 imm |= (imm << 8) | (imm << 16) | (imm << 24);
5802 if (invert) {
5803 imm = ~imm;
5805 break;
5806 case 15:
5807 if (invert) {
5808 return 1;
5810 imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
5811 | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
5812 break;
5814 if (invert) {
5815 imm = ~imm;
5818 reg_ofs = neon_reg_offset(rd, 0);
5819 vec_size = q ? 16 : 8;
5821 if (op & 1 && op < 12) {
5822 if (invert) {
5823 /* The immediate value has already been inverted,
5824 * so BIC becomes AND.
5826 tcg_gen_gvec_andi(MO_32, reg_ofs, reg_ofs, imm,
5827 vec_size, vec_size);
5828 } else {
5829 tcg_gen_gvec_ori(MO_32, reg_ofs, reg_ofs, imm,
5830 vec_size, vec_size);
5832 } else {
5833 /* VMOV, VMVN. */
5834 if (op == 14 && invert) {
5835 TCGv_i64 t64 = tcg_temp_new_i64();
5837 for (pass = 0; pass <= q; ++pass) {
5838 uint64_t val = 0;
5839 int n;
5841 for (n = 0; n < 8; n++) {
5842 if (imm & (1 << (n + pass * 8))) {
5843 val |= 0xffull << (n * 8);
5846 tcg_gen_movi_i64(t64, val);
5847 neon_store_reg64(t64, rd + pass);
5849 tcg_temp_free_i64(t64);
5850 } else {
5851 tcg_gen_gvec_dup32i(reg_ofs, vec_size, vec_size, imm);
5855 } else { /* (insn & 0x00800010 == 0x00800000) */
5856 if (size != 3) {
5857 op = (insn >> 8) & 0xf;
5858 if ((insn & (1 << 6)) == 0) {
5859 /* Three registers of different lengths. */
5860 int src1_wide;
5861 int src2_wide;
5862 int prewiden;
5863 /* undefreq: bit 0 : UNDEF if size == 0
5864 * bit 1 : UNDEF if size == 1
5865 * bit 2 : UNDEF if size == 2
5866 * bit 3 : UNDEF if U == 1
5867 * Note that [2:0] set implies 'always UNDEF'
5869 int undefreq;
5870 /* prewiden, src1_wide, src2_wide, undefreq */
5871 static const int neon_3reg_wide[16][4] = {
5872 {1, 0, 0, 0}, /* VADDL */
5873 {1, 1, 0, 0}, /* VADDW */
5874 {1, 0, 0, 0}, /* VSUBL */
5875 {1, 1, 0, 0}, /* VSUBW */
5876 {0, 1, 1, 0}, /* VADDHN */
5877 {0, 0, 0, 0}, /* VABAL */
5878 {0, 1, 1, 0}, /* VSUBHN */
5879 {0, 0, 0, 0}, /* VABDL */
5880 {0, 0, 0, 0}, /* VMLAL */
5881 {0, 0, 0, 9}, /* VQDMLAL */
5882 {0, 0, 0, 0}, /* VMLSL */
5883 {0, 0, 0, 9}, /* VQDMLSL */
5884 {0, 0, 0, 0}, /* Integer VMULL */
5885 {0, 0, 0, 9}, /* VQDMULL */
5886 {0, 0, 0, 0xa}, /* Polynomial VMULL */
5887 {0, 0, 0, 7}, /* Reserved: always UNDEF */
5890 prewiden = neon_3reg_wide[op][0];
5891 src1_wide = neon_3reg_wide[op][1];
5892 src2_wide = neon_3reg_wide[op][2];
5893 undefreq = neon_3reg_wide[op][3];
5895 if ((undefreq & (1 << size)) ||
5896 ((undefreq & 8) && u)) {
5897 return 1;
5899 if ((src1_wide && (rn & 1)) ||
5900 (src2_wide && (rm & 1)) ||
5901 (!src2_wide && (rd & 1))) {
5902 return 1;
5905 /* Handle polynomial VMULL in a single pass. */
5906 if (op == 14) {
5907 if (size == 0) {
5908 /* VMULL.P8 */
5909 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
5910 0, gen_helper_neon_pmull_h);
5911 } else {
5912 /* VMULL.P64 */
5913 if (!dc_isar_feature(aa32_pmull, s)) {
5914 return 1;
5916 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
5917 0, gen_helper_gvec_pmull_q);
5919 return 0;
5922 /* Avoid overlapping operands. Wide source operands are
5923 always aligned so will never overlap with wide
5924 destinations in problematic ways. */
5925 if (rd == rm && !src2_wide) {
5926 tmp = neon_load_reg(rm, 1);
5927 neon_store_scratch(2, tmp);
5928 } else if (rd == rn && !src1_wide) {
5929 tmp = neon_load_reg(rn, 1);
5930 neon_store_scratch(2, tmp);
5932 tmp3 = NULL;
5933 for (pass = 0; pass < 2; pass++) {
5934 if (src1_wide) {
5935 neon_load_reg64(cpu_V0, rn + pass);
5936 tmp = NULL;
5937 } else {
5938 if (pass == 1 && rd == rn) {
5939 tmp = neon_load_scratch(2);
5940 } else {
5941 tmp = neon_load_reg(rn, pass);
5943 if (prewiden) {
5944 gen_neon_widen(cpu_V0, tmp, size, u);
5947 if (src2_wide) {
5948 neon_load_reg64(cpu_V1, rm + pass);
5949 tmp2 = NULL;
5950 } else {
5951 if (pass == 1 && rd == rm) {
5952 tmp2 = neon_load_scratch(2);
5953 } else {
5954 tmp2 = neon_load_reg(rm, pass);
5956 if (prewiden) {
5957 gen_neon_widen(cpu_V1, tmp2, size, u);
5960 switch (op) {
5961 case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
5962 gen_neon_addl(size);
5963 break;
5964 case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
5965 gen_neon_subl(size);
5966 break;
5967 case 5: case 7: /* VABAL, VABDL */
5968 switch ((size << 1) | u) {
5969 case 0:
5970 gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
5971 break;
5972 case 1:
5973 gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
5974 break;
5975 case 2:
5976 gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
5977 break;
5978 case 3:
5979 gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
5980 break;
5981 case 4:
5982 gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
5983 break;
5984 case 5:
5985 gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
5986 break;
5987 default: abort();
5989 tcg_temp_free_i32(tmp2);
5990 tcg_temp_free_i32(tmp);
5991 break;
5992 case 8: case 9: case 10: case 11: case 12: case 13:
5993 /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
5994 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
5995 break;
5996 default: /* 15 is RESERVED: caught earlier */
5997 abort();
5999 if (op == 13) {
6000 /* VQDMULL */
6001 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6002 neon_store_reg64(cpu_V0, rd + pass);
6003 } else if (op == 5 || (op >= 8 && op <= 11)) {
6004 /* Accumulate. */
6005 neon_load_reg64(cpu_V1, rd + pass);
6006 switch (op) {
6007 case 10: /* VMLSL */
6008 gen_neon_negl(cpu_V0, size);
6009 /* Fall through */
6010 case 5: case 8: /* VABAL, VMLAL */
6011 gen_neon_addl(size);
6012 break;
6013 case 9: case 11: /* VQDMLAL, VQDMLSL */
6014 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6015 if (op == 11) {
6016 gen_neon_negl(cpu_V0, size);
6018 gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
6019 break;
6020 default:
6021 abort();
6023 neon_store_reg64(cpu_V0, rd + pass);
6024 } else if (op == 4 || op == 6) {
6025 /* Narrowing operation. */
6026 tmp = tcg_temp_new_i32();
6027 if (!u) {
6028 switch (size) {
6029 case 0:
6030 gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
6031 break;
6032 case 1:
6033 gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
6034 break;
6035 case 2:
6036 tcg_gen_extrh_i64_i32(tmp, cpu_V0);
6037 break;
6038 default: abort();
6040 } else {
6041 switch (size) {
6042 case 0:
6043 gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
6044 break;
6045 case 1:
6046 gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
6047 break;
6048 case 2:
6049 tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
6050 tcg_gen_extrh_i64_i32(tmp, cpu_V0);
6051 break;
6052 default: abort();
6055 if (pass == 0) {
6056 tmp3 = tmp;
6057 } else {
6058 neon_store_reg(rd, 0, tmp3);
6059 neon_store_reg(rd, 1, tmp);
6061 } else {
6062 /* Write back the result. */
6063 neon_store_reg64(cpu_V0, rd + pass);
6066 } else {
6067 /* Two registers and a scalar. NB that for ops of this form
6068 * the ARM ARM labels bit 24 as Q, but it is in our variable
6069 * 'u', not 'q'.
6071 if (size == 0) {
6072 return 1;
6074 switch (op) {
6075 case 1: /* Float VMLA scalar */
6076 case 5: /* Floating point VMLS scalar */
6077 case 9: /* Floating point VMUL scalar */
6078 if (size == 1) {
6079 return 1;
6081 /* fall through */
6082 case 0: /* Integer VMLA scalar */
6083 case 4: /* Integer VMLS scalar */
6084 case 8: /* Integer VMUL scalar */
6085 case 12: /* VQDMULH scalar */
6086 case 13: /* VQRDMULH scalar */
6087 if (u && ((rd | rn) & 1)) {
6088 return 1;
6090 tmp = neon_get_scalar(size, rm);
6091 neon_store_scratch(0, tmp);
6092 for (pass = 0; pass < (u ? 4 : 2); pass++) {
6093 tmp = neon_load_scratch(0);
6094 tmp2 = neon_load_reg(rn, pass);
6095 if (op == 12) {
6096 if (size == 1) {
6097 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
6098 } else {
6099 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
6101 } else if (op == 13) {
6102 if (size == 1) {
6103 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
6104 } else {
6105 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
6107 } else if (op & 1) {
6108 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6109 gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
6110 tcg_temp_free_ptr(fpstatus);
6111 } else {
6112 switch (size) {
6113 case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
6114 case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
6115 case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
6116 default: abort();
6119 tcg_temp_free_i32(tmp2);
6120 if (op < 8) {
6121 /* Accumulate. */
6122 tmp2 = neon_load_reg(rd, pass);
6123 switch (op) {
6124 case 0:
6125 gen_neon_add(size, tmp, tmp2);
6126 break;
6127 case 1:
6129 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6130 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
6131 tcg_temp_free_ptr(fpstatus);
6132 break;
6134 case 4:
6135 gen_neon_rsb(size, tmp, tmp2);
6136 break;
6137 case 5:
6139 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6140 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
6141 tcg_temp_free_ptr(fpstatus);
6142 break;
6144 default:
6145 abort();
6147 tcg_temp_free_i32(tmp2);
6149 neon_store_reg(rd, pass, tmp);
6151 break;
6152 case 3: /* VQDMLAL scalar */
6153 case 7: /* VQDMLSL scalar */
6154 case 11: /* VQDMULL scalar */
6155 if (u == 1) {
6156 return 1;
6158 /* fall through */
6159 case 2: /* VMLAL sclar */
6160 case 6: /* VMLSL scalar */
6161 case 10: /* VMULL scalar */
6162 if (rd & 1) {
6163 return 1;
6165 tmp2 = neon_get_scalar(size, rm);
6166 /* We need a copy of tmp2 because gen_neon_mull
6167 * deletes it during pass 0. */
6168 tmp4 = tcg_temp_new_i32();
6169 tcg_gen_mov_i32(tmp4, tmp2);
6170 tmp3 = neon_load_reg(rn, 1);
6172 for (pass = 0; pass < 2; pass++) {
6173 if (pass == 0) {
6174 tmp = neon_load_reg(rn, 0);
6175 } else {
6176 tmp = tmp3;
6177 tmp2 = tmp4;
6179 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
6180 if (op != 11) {
6181 neon_load_reg64(cpu_V1, rd + pass);
6183 switch (op) {
6184 case 6:
6185 gen_neon_negl(cpu_V0, size);
6186 /* Fall through */
6187 case 2:
6188 gen_neon_addl(size);
6189 break;
6190 case 3: case 7:
6191 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6192 if (op == 7) {
6193 gen_neon_negl(cpu_V0, size);
6195 gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
6196 break;
6197 case 10:
6198 /* no-op */
6199 break;
6200 case 11:
6201 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6202 break;
6203 default:
6204 abort();
6206 neon_store_reg64(cpu_V0, rd + pass);
6208 break;
6209 case 14: /* VQRDMLAH scalar */
6210 case 15: /* VQRDMLSH scalar */
6212 NeonGenThreeOpEnvFn *fn;
6214 if (!dc_isar_feature(aa32_rdm, s)) {
6215 return 1;
6217 if (u && ((rd | rn) & 1)) {
6218 return 1;
6220 if (op == 14) {
6221 if (size == 1) {
6222 fn = gen_helper_neon_qrdmlah_s16;
6223 } else {
6224 fn = gen_helper_neon_qrdmlah_s32;
6226 } else {
6227 if (size == 1) {
6228 fn = gen_helper_neon_qrdmlsh_s16;
6229 } else {
6230 fn = gen_helper_neon_qrdmlsh_s32;
6234 tmp2 = neon_get_scalar(size, rm);
6235 for (pass = 0; pass < (u ? 4 : 2); pass++) {
6236 tmp = neon_load_reg(rn, pass);
6237 tmp3 = neon_load_reg(rd, pass);
6238 fn(tmp, cpu_env, tmp, tmp2, tmp3);
6239 tcg_temp_free_i32(tmp3);
6240 neon_store_reg(rd, pass, tmp);
6242 tcg_temp_free_i32(tmp2);
6244 break;
6245 default:
6246 g_assert_not_reached();
6249 } else { /* size == 3 */
6250 if (!u) {
6251 /* Extract. */
6252 imm = (insn >> 8) & 0xf;
6254 if (imm > 7 && !q)
6255 return 1;
6257 if (q && ((rd | rn | rm) & 1)) {
6258 return 1;
6261 if (imm == 0) {
6262 neon_load_reg64(cpu_V0, rn);
6263 if (q) {
6264 neon_load_reg64(cpu_V1, rn + 1);
6266 } else if (imm == 8) {
6267 neon_load_reg64(cpu_V0, rn + 1);
6268 if (q) {
6269 neon_load_reg64(cpu_V1, rm);
6271 } else if (q) {
6272 tmp64 = tcg_temp_new_i64();
6273 if (imm < 8) {
6274 neon_load_reg64(cpu_V0, rn);
6275 neon_load_reg64(tmp64, rn + 1);
6276 } else {
6277 neon_load_reg64(cpu_V0, rn + 1);
6278 neon_load_reg64(tmp64, rm);
6280 tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
6281 tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
6282 tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6283 if (imm < 8) {
6284 neon_load_reg64(cpu_V1, rm);
6285 } else {
6286 neon_load_reg64(cpu_V1, rm + 1);
6287 imm -= 8;
6289 tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6290 tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
6291 tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
6292 tcg_temp_free_i64(tmp64);
6293 } else {
6294 /* BUGFIX */
6295 neon_load_reg64(cpu_V0, rn);
6296 tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
6297 neon_load_reg64(cpu_V1, rm);
6298 tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6299 tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6301 neon_store_reg64(cpu_V0, rd);
6302 if (q) {
6303 neon_store_reg64(cpu_V1, rd + 1);
6305 } else if ((insn & (1 << 11)) == 0) {
6306 /* Two register misc. */
6307 op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
6308 size = (insn >> 18) & 3;
6309 /* UNDEF for unknown op values and bad op-size combinations */
6310 if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
6311 return 1;
6313 if (neon_2rm_is_v8_op(op) &&
6314 !arm_dc_feature(s, ARM_FEATURE_V8)) {
6315 return 1;
6317 if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
6318 q && ((rm | rd) & 1)) {
6319 return 1;
6321 switch (op) {
6322 case NEON_2RM_VREV64:
6323 for (pass = 0; pass < (q ? 2 : 1); pass++) {
6324 tmp = neon_load_reg(rm, pass * 2);
6325 tmp2 = neon_load_reg(rm, pass * 2 + 1);
6326 switch (size) {
6327 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6328 case 1: gen_swap_half(tmp); break;
6329 case 2: /* no-op */ break;
6330 default: abort();
6332 neon_store_reg(rd, pass * 2 + 1, tmp);
6333 if (size == 2) {
6334 neon_store_reg(rd, pass * 2, tmp2);
6335 } else {
6336 switch (size) {
6337 case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
6338 case 1: gen_swap_half(tmp2); break;
6339 default: abort();
6341 neon_store_reg(rd, pass * 2, tmp2);
6344 break;
6345 case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
6346 case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
6347 for (pass = 0; pass < q + 1; pass++) {
6348 tmp = neon_load_reg(rm, pass * 2);
6349 gen_neon_widen(cpu_V0, tmp, size, op & 1);
6350 tmp = neon_load_reg(rm, pass * 2 + 1);
6351 gen_neon_widen(cpu_V1, tmp, size, op & 1);
6352 switch (size) {
6353 case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
6354 case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
6355 case 2: tcg_gen_add_i64(CPU_V001); break;
6356 default: abort();
6358 if (op >= NEON_2RM_VPADAL) {
6359 /* Accumulate. */
6360 neon_load_reg64(cpu_V1, rd + pass);
6361 gen_neon_addl(size);
6363 neon_store_reg64(cpu_V0, rd + pass);
6365 break;
6366 case NEON_2RM_VTRN:
6367 if (size == 2) {
6368 int n;
6369 for (n = 0; n < (q ? 4 : 2); n += 2) {
6370 tmp = neon_load_reg(rm, n);
6371 tmp2 = neon_load_reg(rd, n + 1);
6372 neon_store_reg(rm, n, tmp2);
6373 neon_store_reg(rd, n + 1, tmp);
6375 } else {
6376 goto elementwise;
6378 break;
6379 case NEON_2RM_VUZP:
6380 if (gen_neon_unzip(rd, rm, size, q)) {
6381 return 1;
6383 break;
6384 case NEON_2RM_VZIP:
6385 if (gen_neon_zip(rd, rm, size, q)) {
6386 return 1;
6388 break;
6389 case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
6390 /* also VQMOVUN; op field and mnemonics don't line up */
6391 if (rm & 1) {
6392 return 1;
6394 tmp2 = NULL;
6395 for (pass = 0; pass < 2; pass++) {
6396 neon_load_reg64(cpu_V0, rm + pass);
6397 tmp = tcg_temp_new_i32();
6398 gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
6399 tmp, cpu_V0);
6400 if (pass == 0) {
6401 tmp2 = tmp;
6402 } else {
6403 neon_store_reg(rd, 0, tmp2);
6404 neon_store_reg(rd, 1, tmp);
6407 break;
6408 case NEON_2RM_VSHLL:
6409 if (q || (rd & 1)) {
6410 return 1;
6412 tmp = neon_load_reg(rm, 0);
6413 tmp2 = neon_load_reg(rm, 1);
6414 for (pass = 0; pass < 2; pass++) {
6415 if (pass == 1)
6416 tmp = tmp2;
6417 gen_neon_widen(cpu_V0, tmp, size, 1);
6418 tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
6419 neon_store_reg64(cpu_V0, rd + pass);
6421 break;
6422 case NEON_2RM_VCVT_F16_F32:
6424 TCGv_ptr fpst;
6425 TCGv_i32 ahp;
6427 if (!dc_isar_feature(aa32_fp16_spconv, s) ||
6428 q || (rm & 1)) {
6429 return 1;
6431 fpst = get_fpstatus_ptr(true);
6432 ahp = get_ahp_flag();
6433 tmp = neon_load_reg(rm, 0);
6434 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
6435 tmp2 = neon_load_reg(rm, 1);
6436 gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
6437 tcg_gen_shli_i32(tmp2, tmp2, 16);
6438 tcg_gen_or_i32(tmp2, tmp2, tmp);
6439 tcg_temp_free_i32(tmp);
6440 tmp = neon_load_reg(rm, 2);
6441 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
6442 tmp3 = neon_load_reg(rm, 3);
6443 neon_store_reg(rd, 0, tmp2);
6444 gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
6445 tcg_gen_shli_i32(tmp3, tmp3, 16);
6446 tcg_gen_or_i32(tmp3, tmp3, tmp);
6447 neon_store_reg(rd, 1, tmp3);
6448 tcg_temp_free_i32(tmp);
6449 tcg_temp_free_i32(ahp);
6450 tcg_temp_free_ptr(fpst);
6451 break;
6453 case NEON_2RM_VCVT_F32_F16:
6455 TCGv_ptr fpst;
6456 TCGv_i32 ahp;
6457 if (!dc_isar_feature(aa32_fp16_spconv, s) ||
6458 q || (rd & 1)) {
6459 return 1;
6461 fpst = get_fpstatus_ptr(true);
6462 ahp = get_ahp_flag();
6463 tmp3 = tcg_temp_new_i32();
6464 tmp = neon_load_reg(rm, 0);
6465 tmp2 = neon_load_reg(rm, 1);
6466 tcg_gen_ext16u_i32(tmp3, tmp);
6467 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
6468 neon_store_reg(rd, 0, tmp3);
6469 tcg_gen_shri_i32(tmp, tmp, 16);
6470 gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
6471 neon_store_reg(rd, 1, tmp);
6472 tmp3 = tcg_temp_new_i32();
6473 tcg_gen_ext16u_i32(tmp3, tmp2);
6474 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
6475 neon_store_reg(rd, 2, tmp3);
6476 tcg_gen_shri_i32(tmp2, tmp2, 16);
6477 gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
6478 neon_store_reg(rd, 3, tmp2);
6479 tcg_temp_free_i32(ahp);
6480 tcg_temp_free_ptr(fpst);
6481 break;
6483 case NEON_2RM_AESE: case NEON_2RM_AESMC:
6484 if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) {
6485 return 1;
6487 ptr1 = vfp_reg_ptr(true, rd);
6488 ptr2 = vfp_reg_ptr(true, rm);
6490 /* Bit 6 is the lowest opcode bit; it distinguishes between
6491 * encryption (AESE/AESMC) and decryption (AESD/AESIMC)
6493 tmp3 = tcg_const_i32(extract32(insn, 6, 1));
6495 if (op == NEON_2RM_AESE) {
6496 gen_helper_crypto_aese(ptr1, ptr2, tmp3);
6497 } else {
6498 gen_helper_crypto_aesmc(ptr1, ptr2, tmp3);
6500 tcg_temp_free_ptr(ptr1);
6501 tcg_temp_free_ptr(ptr2);
6502 tcg_temp_free_i32(tmp3);
6503 break;
6504 case NEON_2RM_SHA1H:
6505 if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) {
6506 return 1;
6508 ptr1 = vfp_reg_ptr(true, rd);
6509 ptr2 = vfp_reg_ptr(true, rm);
6511 gen_helper_crypto_sha1h(ptr1, ptr2);
6513 tcg_temp_free_ptr(ptr1);
6514 tcg_temp_free_ptr(ptr2);
6515 break;
6516 case NEON_2RM_SHA1SU1:
6517 if ((rm | rd) & 1) {
6518 return 1;
6520 /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
6521 if (q) {
6522 if (!dc_isar_feature(aa32_sha2, s)) {
6523 return 1;
6525 } else if (!dc_isar_feature(aa32_sha1, s)) {
6526 return 1;
6528 ptr1 = vfp_reg_ptr(true, rd);
6529 ptr2 = vfp_reg_ptr(true, rm);
6530 if (q) {
6531 gen_helper_crypto_sha256su0(ptr1, ptr2);
6532 } else {
6533 gen_helper_crypto_sha1su1(ptr1, ptr2);
6535 tcg_temp_free_ptr(ptr1);
6536 tcg_temp_free_ptr(ptr2);
6537 break;
6539 case NEON_2RM_VMVN:
6540 tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size);
6541 break;
6542 case NEON_2RM_VNEG:
6543 tcg_gen_gvec_neg(size, rd_ofs, rm_ofs, vec_size, vec_size);
6544 break;
6545 case NEON_2RM_VABS:
6546 tcg_gen_gvec_abs(size, rd_ofs, rm_ofs, vec_size, vec_size);
6547 break;
6549 case NEON_2RM_VCEQ0:
6550 tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
6551 vec_size, &ceq0_op[size]);
6552 break;
6553 case NEON_2RM_VCGT0:
6554 tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
6555 vec_size, &cgt0_op[size]);
6556 break;
6557 case NEON_2RM_VCLE0:
6558 tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
6559 vec_size, &cle0_op[size]);
6560 break;
6561 case NEON_2RM_VCGE0:
6562 tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
6563 vec_size, &cge0_op[size]);
6564 break;
6565 case NEON_2RM_VCLT0:
6566 tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
6567 vec_size, &clt0_op[size]);
6568 break;
6570 default:
6571 elementwise:
6572 for (pass = 0; pass < (q ? 4 : 2); pass++) {
6573 tmp = neon_load_reg(rm, pass);
6574 switch (op) {
6575 case NEON_2RM_VREV32:
6576 switch (size) {
6577 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6578 case 1: gen_swap_half(tmp); break;
6579 default: abort();
6581 break;
6582 case NEON_2RM_VREV16:
6583 gen_rev16(tmp, tmp);
6584 break;
6585 case NEON_2RM_VCLS:
6586 switch (size) {
6587 case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
6588 case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
6589 case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
6590 default: abort();
6592 break;
6593 case NEON_2RM_VCLZ:
6594 switch (size) {
6595 case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
6596 case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
6597 case 2: tcg_gen_clzi_i32(tmp, tmp, 32); break;
6598 default: abort();
6600 break;
6601 case NEON_2RM_VCNT:
6602 gen_helper_neon_cnt_u8(tmp, tmp);
6603 break;
6604 case NEON_2RM_VQABS:
6605 switch (size) {
6606 case 0:
6607 gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
6608 break;
6609 case 1:
6610 gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
6611 break;
6612 case 2:
6613 gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
6614 break;
6615 default: abort();
6617 break;
6618 case NEON_2RM_VQNEG:
6619 switch (size) {
6620 case 0:
6621 gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
6622 break;
6623 case 1:
6624 gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
6625 break;
6626 case 2:
6627 gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
6628 break;
6629 default: abort();
6631 break;
6632 case NEON_2RM_VCGT0_F:
6634 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6635 tmp2 = tcg_const_i32(0);
6636 gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
6637 tcg_temp_free_i32(tmp2);
6638 tcg_temp_free_ptr(fpstatus);
6639 break;
6641 case NEON_2RM_VCGE0_F:
6643 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6644 tmp2 = tcg_const_i32(0);
6645 gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
6646 tcg_temp_free_i32(tmp2);
6647 tcg_temp_free_ptr(fpstatus);
6648 break;
6650 case NEON_2RM_VCEQ0_F:
6652 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6653 tmp2 = tcg_const_i32(0);
6654 gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
6655 tcg_temp_free_i32(tmp2);
6656 tcg_temp_free_ptr(fpstatus);
6657 break;
6659 case NEON_2RM_VCLE0_F:
6661 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6662 tmp2 = tcg_const_i32(0);
6663 gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
6664 tcg_temp_free_i32(tmp2);
6665 tcg_temp_free_ptr(fpstatus);
6666 break;
6668 case NEON_2RM_VCLT0_F:
6670 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6671 tmp2 = tcg_const_i32(0);
6672 gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
6673 tcg_temp_free_i32(tmp2);
6674 tcg_temp_free_ptr(fpstatus);
6675 break;
6677 case NEON_2RM_VABS_F:
6678 gen_helper_vfp_abss(tmp, tmp);
6679 break;
6680 case NEON_2RM_VNEG_F:
6681 gen_helper_vfp_negs(tmp, tmp);
6682 break;
6683 case NEON_2RM_VSWP:
6684 tmp2 = neon_load_reg(rd, pass);
6685 neon_store_reg(rm, pass, tmp2);
6686 break;
6687 case NEON_2RM_VTRN:
6688 tmp2 = neon_load_reg(rd, pass);
6689 switch (size) {
6690 case 0: gen_neon_trn_u8(tmp, tmp2); break;
6691 case 1: gen_neon_trn_u16(tmp, tmp2); break;
6692 default: abort();
6694 neon_store_reg(rm, pass, tmp2);
6695 break;
6696 case NEON_2RM_VRINTN:
6697 case NEON_2RM_VRINTA:
6698 case NEON_2RM_VRINTM:
6699 case NEON_2RM_VRINTP:
6700 case NEON_2RM_VRINTZ:
6702 TCGv_i32 tcg_rmode;
6703 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6704 int rmode;
6706 if (op == NEON_2RM_VRINTZ) {
6707 rmode = FPROUNDING_ZERO;
6708 } else {
6709 rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
6712 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6713 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6714 cpu_env);
6715 gen_helper_rints(tmp, tmp, fpstatus);
6716 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6717 cpu_env);
6718 tcg_temp_free_ptr(fpstatus);
6719 tcg_temp_free_i32(tcg_rmode);
6720 break;
6722 case NEON_2RM_VRINTX:
6724 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6725 gen_helper_rints_exact(tmp, tmp, fpstatus);
6726 tcg_temp_free_ptr(fpstatus);
6727 break;
6729 case NEON_2RM_VCVTAU:
6730 case NEON_2RM_VCVTAS:
6731 case NEON_2RM_VCVTNU:
6732 case NEON_2RM_VCVTNS:
6733 case NEON_2RM_VCVTPU:
6734 case NEON_2RM_VCVTPS:
6735 case NEON_2RM_VCVTMU:
6736 case NEON_2RM_VCVTMS:
6738 bool is_signed = !extract32(insn, 7, 1);
6739 TCGv_ptr fpst = get_fpstatus_ptr(1);
6740 TCGv_i32 tcg_rmode, tcg_shift;
6741 int rmode = fp_decode_rm[extract32(insn, 8, 2)];
6743 tcg_shift = tcg_const_i32(0);
6744 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6745 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6746 cpu_env);
6748 if (is_signed) {
6749 gen_helper_vfp_tosls(tmp, tmp,
6750 tcg_shift, fpst);
6751 } else {
6752 gen_helper_vfp_touls(tmp, tmp,
6753 tcg_shift, fpst);
6756 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6757 cpu_env);
6758 tcg_temp_free_i32(tcg_rmode);
6759 tcg_temp_free_i32(tcg_shift);
6760 tcg_temp_free_ptr(fpst);
6761 break;
6763 case NEON_2RM_VRECPE:
6765 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6766 gen_helper_recpe_u32(tmp, tmp, fpstatus);
6767 tcg_temp_free_ptr(fpstatus);
6768 break;
6770 case NEON_2RM_VRSQRTE:
6772 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6773 gen_helper_rsqrte_u32(tmp, tmp, fpstatus);
6774 tcg_temp_free_ptr(fpstatus);
6775 break;
6777 case NEON_2RM_VRECPE_F:
6779 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6780 gen_helper_recpe_f32(tmp, tmp, fpstatus);
6781 tcg_temp_free_ptr(fpstatus);
6782 break;
6784 case NEON_2RM_VRSQRTE_F:
6786 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6787 gen_helper_rsqrte_f32(tmp, tmp, fpstatus);
6788 tcg_temp_free_ptr(fpstatus);
6789 break;
6791 case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
6793 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6794 gen_helper_vfp_sitos(tmp, tmp, fpstatus);
6795 tcg_temp_free_ptr(fpstatus);
6796 break;
6798 case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
6800 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6801 gen_helper_vfp_uitos(tmp, tmp, fpstatus);
6802 tcg_temp_free_ptr(fpstatus);
6803 break;
6805 case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
6807 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6808 gen_helper_vfp_tosizs(tmp, tmp, fpstatus);
6809 tcg_temp_free_ptr(fpstatus);
6810 break;
6812 case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
6814 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6815 gen_helper_vfp_touizs(tmp, tmp, fpstatus);
6816 tcg_temp_free_ptr(fpstatus);
6817 break;
6819 default:
6820 /* Reserved op values were caught by the
6821 * neon_2rm_sizes[] check earlier.
6823 abort();
6825 neon_store_reg(rd, pass, tmp);
6827 break;
6829 } else if ((insn & (1 << 10)) == 0) {
6830 /* VTBL, VTBX. */
6831 int n = ((insn >> 8) & 3) + 1;
6832 if ((rn + n) > 32) {
6833 /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
6834 * helper function running off the end of the register file.
6836 return 1;
6838 n <<= 3;
6839 if (insn & (1 << 6)) {
6840 tmp = neon_load_reg(rd, 0);
6841 } else {
6842 tmp = tcg_temp_new_i32();
6843 tcg_gen_movi_i32(tmp, 0);
6845 tmp2 = neon_load_reg(rm, 0);
6846 ptr1 = vfp_reg_ptr(true, rn);
6847 tmp5 = tcg_const_i32(n);
6848 gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5);
6849 tcg_temp_free_i32(tmp);
6850 if (insn & (1 << 6)) {
6851 tmp = neon_load_reg(rd, 1);
6852 } else {
6853 tmp = tcg_temp_new_i32();
6854 tcg_gen_movi_i32(tmp, 0);
6856 tmp3 = neon_load_reg(rm, 1);
6857 gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5);
6858 tcg_temp_free_i32(tmp5);
6859 tcg_temp_free_ptr(ptr1);
6860 neon_store_reg(rd, 0, tmp2);
6861 neon_store_reg(rd, 1, tmp3);
6862 tcg_temp_free_i32(tmp);
6863 } else if ((insn & 0x380) == 0) {
6864 /* VDUP */
6865 int element;
6866 MemOp size;
6868 if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
6869 return 1;
6871 if (insn & (1 << 16)) {
6872 size = MO_8;
6873 element = (insn >> 17) & 7;
6874 } else if (insn & (1 << 17)) {
6875 size = MO_16;
6876 element = (insn >> 18) & 3;
6877 } else {
6878 size = MO_32;
6879 element = (insn >> 19) & 1;
6881 tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0),
6882 neon_element_offset(rm, element, size),
6883 q ? 16 : 8, q ? 16 : 8);
6884 } else {
6885 return 1;
6889 return 0;
6892 static int disas_coproc_insn(DisasContext *s, uint32_t insn)
6894 int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
6895 const ARMCPRegInfo *ri;
6897 cpnum = (insn >> 8) & 0xf;
6899 /* First check for coprocessor space used for XScale/iwMMXt insns */
6900 if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cpnum < 2)) {
6901 if (extract32(s->c15_cpar, cpnum, 1) == 0) {
6902 return 1;
6904 if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
6905 return disas_iwmmxt_insn(s, insn);
6906 } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
6907 return disas_dsp_insn(s, insn);
6909 return 1;
6912 /* Otherwise treat as a generic register access */
6913 is64 = (insn & (1 << 25)) == 0;
6914 if (!is64 && ((insn & (1 << 4)) == 0)) {
6915 /* cdp */
6916 return 1;
6919 crm = insn & 0xf;
6920 if (is64) {
6921 crn = 0;
6922 opc1 = (insn >> 4) & 0xf;
6923 opc2 = 0;
6924 rt2 = (insn >> 16) & 0xf;
6925 } else {
6926 crn = (insn >> 16) & 0xf;
6927 opc1 = (insn >> 21) & 7;
6928 opc2 = (insn >> 5) & 7;
6929 rt2 = 0;
6931 isread = (insn >> 20) & 1;
6932 rt = (insn >> 12) & 0xf;
6934 ri = get_arm_cp_reginfo(s->cp_regs,
6935 ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
6936 if (ri) {
6937 bool need_exit_tb;
6939 /* Check access permissions */
6940 if (!cp_access_ok(s->current_el, ri, isread)) {
6941 return 1;
6944 if (s->hstr_active || ri->accessfn ||
6945 (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
6946 /* Emit code to perform further access permissions checks at
6947 * runtime; this may result in an exception.
6948 * Note that on XScale all cp0..c13 registers do an access check
6949 * call in order to handle c15_cpar.
6951 TCGv_ptr tmpptr;
6952 TCGv_i32 tcg_syn, tcg_isread;
6953 uint32_t syndrome;
6955 /* Note that since we are an implementation which takes an
6956 * exception on a trapped conditional instruction only if the
6957 * instruction passes its condition code check, we can take
6958 * advantage of the clause in the ARM ARM that allows us to set
6959 * the COND field in the instruction to 0xE in all cases.
6960 * We could fish the actual condition out of the insn (ARM)
6961 * or the condexec bits (Thumb) but it isn't necessary.
6963 switch (cpnum) {
6964 case 14:
6965 if (is64) {
6966 syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
6967 isread, false);
6968 } else {
6969 syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
6970 rt, isread, false);
6972 break;
6973 case 15:
6974 if (is64) {
6975 syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
6976 isread, false);
6977 } else {
6978 syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
6979 rt, isread, false);
6981 break;
6982 default:
6983 /* ARMv8 defines that only coprocessors 14 and 15 exist,
6984 * so this can only happen if this is an ARMv7 or earlier CPU,
6985 * in which case the syndrome information won't actually be
6986 * guest visible.
6988 assert(!arm_dc_feature(s, ARM_FEATURE_V8));
6989 syndrome = syn_uncategorized();
6990 break;
6993 gen_set_condexec(s);
6994 gen_set_pc_im(s, s->pc_curr);
6995 tmpptr = tcg_const_ptr(ri);
6996 tcg_syn = tcg_const_i32(syndrome);
6997 tcg_isread = tcg_const_i32(isread);
6998 gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn,
6999 tcg_isread);
7000 tcg_temp_free_ptr(tmpptr);
7001 tcg_temp_free_i32(tcg_syn);
7002 tcg_temp_free_i32(tcg_isread);
7003 } else if (ri->type & ARM_CP_RAISES_EXC) {
7005 * The readfn or writefn might raise an exception;
7006 * synchronize the CPU state in case it does.
7008 gen_set_condexec(s);
7009 gen_set_pc_im(s, s->pc_curr);
7012 /* Handle special cases first */
7013 switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
7014 case ARM_CP_NOP:
7015 return 0;
7016 case ARM_CP_WFI:
7017 if (isread) {
7018 return 1;
7020 gen_set_pc_im(s, s->base.pc_next);
7021 s->base.is_jmp = DISAS_WFI;
7022 return 0;
7023 default:
7024 break;
7027 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
7028 gen_io_start();
7031 if (isread) {
7032 /* Read */
7033 if (is64) {
7034 TCGv_i64 tmp64;
7035 TCGv_i32 tmp;
7036 if (ri->type & ARM_CP_CONST) {
7037 tmp64 = tcg_const_i64(ri->resetvalue);
7038 } else if (ri->readfn) {
7039 TCGv_ptr tmpptr;
7040 tmp64 = tcg_temp_new_i64();
7041 tmpptr = tcg_const_ptr(ri);
7042 gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
7043 tcg_temp_free_ptr(tmpptr);
7044 } else {
7045 tmp64 = tcg_temp_new_i64();
7046 tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
7048 tmp = tcg_temp_new_i32();
7049 tcg_gen_extrl_i64_i32(tmp, tmp64);
7050 store_reg(s, rt, tmp);
7051 tmp = tcg_temp_new_i32();
7052 tcg_gen_extrh_i64_i32(tmp, tmp64);
7053 tcg_temp_free_i64(tmp64);
7054 store_reg(s, rt2, tmp);
7055 } else {
7056 TCGv_i32 tmp;
7057 if (ri->type & ARM_CP_CONST) {
7058 tmp = tcg_const_i32(ri->resetvalue);
7059 } else if (ri->readfn) {
7060 TCGv_ptr tmpptr;
7061 tmp = tcg_temp_new_i32();
7062 tmpptr = tcg_const_ptr(ri);
7063 gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
7064 tcg_temp_free_ptr(tmpptr);
7065 } else {
7066 tmp = load_cpu_offset(ri->fieldoffset);
7068 if (rt == 15) {
7069 /* Destination register of r15 for 32 bit loads sets
7070 * the condition codes from the high 4 bits of the value
7072 gen_set_nzcv(tmp);
7073 tcg_temp_free_i32(tmp);
7074 } else {
7075 store_reg(s, rt, tmp);
7078 } else {
7079 /* Write */
7080 if (ri->type & ARM_CP_CONST) {
7081 /* If not forbidden by access permissions, treat as WI */
7082 return 0;
7085 if (is64) {
7086 TCGv_i32 tmplo, tmphi;
7087 TCGv_i64 tmp64 = tcg_temp_new_i64();
7088 tmplo = load_reg(s, rt);
7089 tmphi = load_reg(s, rt2);
7090 tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
7091 tcg_temp_free_i32(tmplo);
7092 tcg_temp_free_i32(tmphi);
7093 if (ri->writefn) {
7094 TCGv_ptr tmpptr = tcg_const_ptr(ri);
7095 gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
7096 tcg_temp_free_ptr(tmpptr);
7097 } else {
7098 tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
7100 tcg_temp_free_i64(tmp64);
7101 } else {
7102 if (ri->writefn) {
7103 TCGv_i32 tmp;
7104 TCGv_ptr tmpptr;
7105 tmp = load_reg(s, rt);
7106 tmpptr = tcg_const_ptr(ri);
7107 gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
7108 tcg_temp_free_ptr(tmpptr);
7109 tcg_temp_free_i32(tmp);
7110 } else {
7111 TCGv_i32 tmp = load_reg(s, rt);
7112 store_cpu_offset(tmp, ri->fieldoffset);
7117 /* I/O operations must end the TB here (whether read or write) */
7118 need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
7119 (ri->type & ARM_CP_IO));
7121 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
7123 * A write to any coprocessor register that ends a TB
7124 * must rebuild the hflags for the next TB.
7126 TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
7127 if (arm_dc_feature(s, ARM_FEATURE_M)) {
7128 gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
7129 } else {
7130 if (ri->type & ARM_CP_NEWEL) {
7131 gen_helper_rebuild_hflags_a32_newel(cpu_env);
7132 } else {
7133 gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
7136 tcg_temp_free_i32(tcg_el);
7138 * We default to ending the TB on a coprocessor register write,
7139 * but allow this to be suppressed by the register definition
7140 * (usually only necessary to work around guest bugs).
7142 need_exit_tb = true;
7144 if (need_exit_tb) {
7145 gen_lookup_tb(s);
7148 return 0;
7151 /* Unknown register; this might be a guest error or a QEMU
7152 * unimplemented feature.
7154 if (is64) {
7155 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7156 "64 bit system register cp:%d opc1: %d crm:%d "
7157 "(%s)\n",
7158 isread ? "read" : "write", cpnum, opc1, crm,
7159 s->ns ? "non-secure" : "secure");
7160 } else {
7161 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7162 "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
7163 "(%s)\n",
7164 isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
7165 s->ns ? "non-secure" : "secure");
7168 return 1;
7172 /* Store a 64-bit value to a register pair. Clobbers val. */
7173 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
7175 TCGv_i32 tmp;
7176 tmp = tcg_temp_new_i32();
7177 tcg_gen_extrl_i64_i32(tmp, val);
7178 store_reg(s, rlow, tmp);
7179 tmp = tcg_temp_new_i32();
7180 tcg_gen_extrh_i64_i32(tmp, val);
7181 store_reg(s, rhigh, tmp);
7184 /* load and add a 64-bit value from a register pair. */
7185 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
7187 TCGv_i64 tmp;
7188 TCGv_i32 tmpl;
7189 TCGv_i32 tmph;
7191 /* Load 64-bit value rd:rn. */
7192 tmpl = load_reg(s, rlow);
7193 tmph = load_reg(s, rhigh);
7194 tmp = tcg_temp_new_i64();
7195 tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
7196 tcg_temp_free_i32(tmpl);
7197 tcg_temp_free_i32(tmph);
7198 tcg_gen_add_i64(val, val, tmp);
7199 tcg_temp_free_i64(tmp);
7202 /* Set N and Z flags from hi|lo. */
7203 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
7205 tcg_gen_mov_i32(cpu_NF, hi);
7206 tcg_gen_or_i32(cpu_ZF, lo, hi);
7209 /* Load/Store exclusive instructions are implemented by remembering
7210 the value/address loaded, and seeing if these are the same
7211 when the store is performed. This should be sufficient to implement
7212 the architecturally mandated semantics, and avoids having to monitor
7213 regular stores. The compare vs the remembered value is done during
7214 the cmpxchg operation, but we must compare the addresses manually. */
7215 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
7216 TCGv_i32 addr, int size)
7218 TCGv_i32 tmp = tcg_temp_new_i32();
7219 MemOp opc = size | MO_ALIGN | s->be_data;
7221 s->is_ldex = true;
7223 if (size == 3) {
7224 TCGv_i32 tmp2 = tcg_temp_new_i32();
7225 TCGv_i64 t64 = tcg_temp_new_i64();
7227 /* For AArch32, architecturally the 32-bit word at the lowest
7228 * address is always Rt and the one at addr+4 is Rt2, even if
7229 * the CPU is big-endian. That means we don't want to do a
7230 * gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if
7231 * for an architecturally 64-bit access, but instead do a
7232 * 64-bit access using MO_BE if appropriate and then split
7233 * the two halves.
7234 * This only makes a difference for BE32 user-mode, where
7235 * frob64() must not flip the two halves of the 64-bit data
7236 * but this code must treat BE32 user-mode like BE32 system.
7238 TCGv taddr = gen_aa32_addr(s, addr, opc);
7240 tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
7241 tcg_temp_free(taddr);
7242 tcg_gen_mov_i64(cpu_exclusive_val, t64);
7243 if (s->be_data == MO_BE) {
7244 tcg_gen_extr_i64_i32(tmp2, tmp, t64);
7245 } else {
7246 tcg_gen_extr_i64_i32(tmp, tmp2, t64);
7248 tcg_temp_free_i64(t64);
7250 store_reg(s, rt2, tmp2);
7251 } else {
7252 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
7253 tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
7256 store_reg(s, rt, tmp);
7257 tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
7260 static void gen_clrex(DisasContext *s)
7262 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7265 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
7266 TCGv_i32 addr, int size)
7268 TCGv_i32 t0, t1, t2;
7269 TCGv_i64 extaddr;
7270 TCGv taddr;
7271 TCGLabel *done_label;
7272 TCGLabel *fail_label;
7273 MemOp opc = size | MO_ALIGN | s->be_data;
7275 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
7276 [addr] = {Rt};
7277 {Rd} = 0;
7278 } else {
7279 {Rd} = 1;
7280 } */
7281 fail_label = gen_new_label();
7282 done_label = gen_new_label();
7283 extaddr = tcg_temp_new_i64();
7284 tcg_gen_extu_i32_i64(extaddr, addr);
7285 tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
7286 tcg_temp_free_i64(extaddr);
7288 taddr = gen_aa32_addr(s, addr, opc);
7289 t0 = tcg_temp_new_i32();
7290 t1 = load_reg(s, rt);
7291 if (size == 3) {
7292 TCGv_i64 o64 = tcg_temp_new_i64();
7293 TCGv_i64 n64 = tcg_temp_new_i64();
7295 t2 = load_reg(s, rt2);
7296 /* For AArch32, architecturally the 32-bit word at the lowest
7297 * address is always Rt and the one at addr+4 is Rt2, even if
7298 * the CPU is big-endian. Since we're going to treat this as a
7299 * single 64-bit BE store, we need to put the two halves in the
7300 * opposite order for BE to LE, so that they end up in the right
7301 * places.
7302 * We don't want gen_aa32_frob64() because that does the wrong
7303 * thing for BE32 usermode.
7305 if (s->be_data == MO_BE) {
7306 tcg_gen_concat_i32_i64(n64, t2, t1);
7307 } else {
7308 tcg_gen_concat_i32_i64(n64, t1, t2);
7310 tcg_temp_free_i32(t2);
7312 tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
7313 get_mem_index(s), opc);
7314 tcg_temp_free_i64(n64);
7316 tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
7317 tcg_gen_extrl_i64_i32(t0, o64);
7319 tcg_temp_free_i64(o64);
7320 } else {
7321 t2 = tcg_temp_new_i32();
7322 tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
7323 tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
7324 tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
7325 tcg_temp_free_i32(t2);
7327 tcg_temp_free_i32(t1);
7328 tcg_temp_free(taddr);
7329 tcg_gen_mov_i32(cpu_R[rd], t0);
7330 tcg_temp_free_i32(t0);
7331 tcg_gen_br(done_label);
7333 gen_set_label(fail_label);
7334 tcg_gen_movi_i32(cpu_R[rd], 1);
7335 gen_set_label(done_label);
7336 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7339 /* gen_srs:
7340 * @env: CPUARMState
7341 * @s: DisasContext
7342 * @mode: mode field from insn (which stack to store to)
7343 * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
7344 * @writeback: true if writeback bit set
7346 * Generate code for the SRS (Store Return State) insn.
7348 static void gen_srs(DisasContext *s,
7349 uint32_t mode, uint32_t amode, bool writeback)
7351 int32_t offset;
7352 TCGv_i32 addr, tmp;
7353 bool undef = false;
7355 /* SRS is:
7356 * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
7357 * and specified mode is monitor mode
7358 * - UNDEFINED in Hyp mode
7359 * - UNPREDICTABLE in User or System mode
7360 * - UNPREDICTABLE if the specified mode is:
7361 * -- not implemented
7362 * -- not a valid mode number
7363 * -- a mode that's at a higher exception level
7364 * -- Monitor, if we are Non-secure
7365 * For the UNPREDICTABLE cases we choose to UNDEF.
7367 if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
7368 gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(), 3);
7369 return;
7372 if (s->current_el == 0 || s->current_el == 2) {
7373 undef = true;
7376 switch (mode) {
7377 case ARM_CPU_MODE_USR:
7378 case ARM_CPU_MODE_FIQ:
7379 case ARM_CPU_MODE_IRQ:
7380 case ARM_CPU_MODE_SVC:
7381 case ARM_CPU_MODE_ABT:
7382 case ARM_CPU_MODE_UND:
7383 case ARM_CPU_MODE_SYS:
7384 break;
7385 case ARM_CPU_MODE_HYP:
7386 if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
7387 undef = true;
7389 break;
7390 case ARM_CPU_MODE_MON:
7391 /* No need to check specifically for "are we non-secure" because
7392 * we've already made EL0 UNDEF and handled the trap for S-EL1;
7393 * so if this isn't EL3 then we must be non-secure.
7395 if (s->current_el != 3) {
7396 undef = true;
7398 break;
7399 default:
7400 undef = true;
7403 if (undef) {
7404 unallocated_encoding(s);
7405 return;
7408 addr = tcg_temp_new_i32();
7409 tmp = tcg_const_i32(mode);
7410 /* get_r13_banked() will raise an exception if called from System mode */
7411 gen_set_condexec(s);
7412 gen_set_pc_im(s, s->pc_curr);
7413 gen_helper_get_r13_banked(addr, cpu_env, tmp);
7414 tcg_temp_free_i32(tmp);
7415 switch (amode) {
7416 case 0: /* DA */
7417 offset = -4;
7418 break;
7419 case 1: /* IA */
7420 offset = 0;
7421 break;
7422 case 2: /* DB */
7423 offset = -8;
7424 break;
7425 case 3: /* IB */
7426 offset = 4;
7427 break;
7428 default:
7429 abort();
7431 tcg_gen_addi_i32(addr, addr, offset);
7432 tmp = load_reg(s, 14);
7433 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
7434 tcg_temp_free_i32(tmp);
7435 tmp = load_cpu_field(spsr);
7436 tcg_gen_addi_i32(addr, addr, 4);
7437 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
7438 tcg_temp_free_i32(tmp);
7439 if (writeback) {
7440 switch (amode) {
7441 case 0:
7442 offset = -8;
7443 break;
7444 case 1:
7445 offset = 4;
7446 break;
7447 case 2:
7448 offset = -4;
7449 break;
7450 case 3:
7451 offset = 0;
7452 break;
7453 default:
7454 abort();
7456 tcg_gen_addi_i32(addr, addr, offset);
7457 tmp = tcg_const_i32(mode);
7458 gen_helper_set_r13_banked(cpu_env, tmp, addr);
7459 tcg_temp_free_i32(tmp);
7461 tcg_temp_free_i32(addr);
7462 s->base.is_jmp = DISAS_UPDATE;
7465 /* Generate a label used for skipping this instruction */
7466 static void arm_gen_condlabel(DisasContext *s)
7468 if (!s->condjmp) {
7469 s->condlabel = gen_new_label();
7470 s->condjmp = 1;
7474 /* Skip this instruction if the ARM condition is false */
7475 static void arm_skip_unless(DisasContext *s, uint32_t cond)
7477 arm_gen_condlabel(s);
7478 arm_gen_test_cc(cond ^ 1, s->condlabel);
7483 * Constant expanders for the decoders.
7486 static int negate(DisasContext *s, int x)
7488 return -x;
7491 static int plus_2(DisasContext *s, int x)
7493 return x + 2;
7496 static int times_2(DisasContext *s, int x)
7498 return x * 2;
7501 static int times_4(DisasContext *s, int x)
7503 return x * 4;
7506 /* Return only the rotation part of T32ExpandImm. */
7507 static int t32_expandimm_rot(DisasContext *s, int x)
7509 return x & 0xc00 ? extract32(x, 7, 5) : 0;
7512 /* Return the unrotated immediate from T32ExpandImm. */
7513 static int t32_expandimm_imm(DisasContext *s, int x)
7515 int imm = extract32(x, 0, 8);
7517 switch (extract32(x, 8, 4)) {
7518 case 0: /* XY */
7519 /* Nothing to do. */
7520 break;
7521 case 1: /* 00XY00XY */
7522 imm *= 0x00010001;
7523 break;
7524 case 2: /* XY00XY00 */
7525 imm *= 0x01000100;
7526 break;
7527 case 3: /* XYXYXYXY */
7528 imm *= 0x01010101;
7529 break;
7530 default:
7531 /* Rotated constant. */
7532 imm |= 0x80;
7533 break;
7535 return imm;
7538 static int t32_branch24(DisasContext *s, int x)
7540 /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S. */
7541 x ^= !(x < 0) * (3 << 21);
7542 /* Append the final zero. */
7543 return x << 1;
7546 static int t16_setflags(DisasContext *s)
7548 return s->condexec_mask == 0;
7551 static int t16_push_list(DisasContext *s, int x)
7553 return (x & 0xff) | (x & 0x100) << (14 - 8);
7556 static int t16_pop_list(DisasContext *s, int x)
7558 return (x & 0xff) | (x & 0x100) << (15 - 8);
7562 * Include the generated decoders.
7565 #include "decode-a32.inc.c"
7566 #include "decode-a32-uncond.inc.c"
7567 #include "decode-t32.inc.c"
7568 #include "decode-t16.inc.c"
7570 /* Helpers to swap operands for reverse-subtract. */
7571 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
7573 tcg_gen_sub_i32(dst, b, a);
7576 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
7578 gen_sub_CC(dst, b, a);
7581 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
7583 gen_sub_carry(dest, b, a);
7586 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
7588 gen_sbc_CC(dest, b, a);
7592 * Helpers for the data processing routines.
7594 * After the computation store the results back.
7595 * This may be suppressed altogether (STREG_NONE), require a runtime
7596 * check against the stack limits (STREG_SP_CHECK), or generate an
7597 * exception return. Oh, or store into a register.
7599 * Always return true, indicating success for a trans_* function.
7601 typedef enum {
7602 STREG_NONE,
7603 STREG_NORMAL,
7604 STREG_SP_CHECK,
7605 STREG_EXC_RET,
7606 } StoreRegKind;
7608 static bool store_reg_kind(DisasContext *s, int rd,
7609 TCGv_i32 val, StoreRegKind kind)
7611 switch (kind) {
7612 case STREG_NONE:
7613 tcg_temp_free_i32(val);
7614 return true;
7615 case STREG_NORMAL:
7616 /* See ALUWritePC: Interworking only from a32 mode. */
7617 if (s->thumb) {
7618 store_reg(s, rd, val);
7619 } else {
7620 store_reg_bx(s, rd, val);
7622 return true;
7623 case STREG_SP_CHECK:
7624 store_sp_checked(s, val);
7625 return true;
7626 case STREG_EXC_RET:
7627 gen_exception_return(s, val);
7628 return true;
7630 g_assert_not_reached();
7634 * Data Processing (register)
7636 * Operate, with set flags, one register source,
7637 * one immediate shifted register source, and a destination.
7639 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
7640 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7641 int logic_cc, StoreRegKind kind)
7643 TCGv_i32 tmp1, tmp2;
7645 tmp2 = load_reg(s, a->rm);
7646 gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
7647 tmp1 = load_reg(s, a->rn);
7649 gen(tmp1, tmp1, tmp2);
7650 tcg_temp_free_i32(tmp2);
7652 if (logic_cc) {
7653 gen_logic_CC(tmp1);
7655 return store_reg_kind(s, a->rd, tmp1, kind);
7658 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
7659 void (*gen)(TCGv_i32, TCGv_i32),
7660 int logic_cc, StoreRegKind kind)
7662 TCGv_i32 tmp;
7664 tmp = load_reg(s, a->rm);
7665 gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
7667 gen(tmp, tmp);
7668 if (logic_cc) {
7669 gen_logic_CC(tmp);
7671 return store_reg_kind(s, a->rd, tmp, kind);
7675 * Data-processing (register-shifted register)
7677 * Operate, with set flags, one register source,
7678 * one register shifted register source, and a destination.
7680 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
7681 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7682 int logic_cc, StoreRegKind kind)
7684 TCGv_i32 tmp1, tmp2;
7686 tmp1 = load_reg(s, a->rs);
7687 tmp2 = load_reg(s, a->rm);
7688 gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
7689 tmp1 = load_reg(s, a->rn);
7691 gen(tmp1, tmp1, tmp2);
7692 tcg_temp_free_i32(tmp2);
7694 if (logic_cc) {
7695 gen_logic_CC(tmp1);
7697 return store_reg_kind(s, a->rd, tmp1, kind);
7700 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
7701 void (*gen)(TCGv_i32, TCGv_i32),
7702 int logic_cc, StoreRegKind kind)
7704 TCGv_i32 tmp1, tmp2;
7706 tmp1 = load_reg(s, a->rs);
7707 tmp2 = load_reg(s, a->rm);
7708 gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
7710 gen(tmp2, tmp2);
7711 if (logic_cc) {
7712 gen_logic_CC(tmp2);
7714 return store_reg_kind(s, a->rd, tmp2, kind);
7718 * Data-processing (immediate)
7720 * Operate, with set flags, one register source,
7721 * one rotated immediate, and a destination.
7723 * Note that logic_cc && a->rot setting CF based on the msb of the
7724 * immediate is the reason why we must pass in the unrotated form
7725 * of the immediate.
7727 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
7728 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7729 int logic_cc, StoreRegKind kind)
7731 TCGv_i32 tmp1, tmp2;
7732 uint32_t imm;
7734 imm = ror32(a->imm, a->rot);
7735 if (logic_cc && a->rot) {
7736 tcg_gen_movi_i32(cpu_CF, imm >> 31);
7738 tmp2 = tcg_const_i32(imm);
7739 tmp1 = load_reg(s, a->rn);
7741 gen(tmp1, tmp1, tmp2);
7742 tcg_temp_free_i32(tmp2);
7744 if (logic_cc) {
7745 gen_logic_CC(tmp1);
7747 return store_reg_kind(s, a->rd, tmp1, kind);
7750 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
7751 void (*gen)(TCGv_i32, TCGv_i32),
7752 int logic_cc, StoreRegKind kind)
7754 TCGv_i32 tmp;
7755 uint32_t imm;
7757 imm = ror32(a->imm, a->rot);
7758 if (logic_cc && a->rot) {
7759 tcg_gen_movi_i32(cpu_CF, imm >> 31);
7761 tmp = tcg_const_i32(imm);
7763 gen(tmp, tmp);
7764 if (logic_cc) {
7765 gen_logic_CC(tmp);
7767 return store_reg_kind(s, a->rd, tmp, kind);
7770 #define DO_ANY3(NAME, OP, L, K) \
7771 static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a) \
7772 { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); } \
7773 static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a) \
7774 { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); } \
7775 static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a) \
7776 { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
7778 #define DO_ANY2(NAME, OP, L, K) \
7779 static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a) \
7780 { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); } \
7781 static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a) \
7782 { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); } \
7783 static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a) \
7784 { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
7786 #define DO_CMP2(NAME, OP, L) \
7787 static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a) \
7788 { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); } \
7789 static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a) \
7790 { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); } \
7791 static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a) \
7792 { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
7794 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
7795 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
7796 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
7797 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
7799 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
7800 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
7801 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
7802 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
7804 DO_CMP2(TST, tcg_gen_and_i32, true)
7805 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
7806 DO_CMP2(CMN, gen_add_CC, false)
7807 DO_CMP2(CMP, gen_sub_CC, false)
7809 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
7810 a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
7813 * Note for the computation of StoreRegKind we return out of the
7814 * middle of the functions that are expanded by DO_ANY3, and that
7815 * we modify a->s via that parameter before it is used by OP.
7817 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
7819 StoreRegKind ret = STREG_NORMAL;
7820 if (a->rd == 15 && a->s) {
7822 * See ALUExceptionReturn:
7823 * In User mode, UNPREDICTABLE; we choose UNDEF.
7824 * In Hyp mode, UNDEFINED.
7826 if (IS_USER(s) || s->current_el == 2) {
7827 unallocated_encoding(s);
7828 return true;
7830 /* There is no writeback of nzcv to PSTATE. */
7831 a->s = 0;
7832 ret = STREG_EXC_RET;
7833 } else if (a->rd == 13 && a->rn == 13) {
7834 ret = STREG_SP_CHECK;
7836 ret;
7839 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
7841 StoreRegKind ret = STREG_NORMAL;
7842 if (a->rd == 15 && a->s) {
7844 * See ALUExceptionReturn:
7845 * In User mode, UNPREDICTABLE; we choose UNDEF.
7846 * In Hyp mode, UNDEFINED.
7848 if (IS_USER(s) || s->current_el == 2) {
7849 unallocated_encoding(s);
7850 return true;
7852 /* There is no writeback of nzcv to PSTATE. */
7853 a->s = 0;
7854 ret = STREG_EXC_RET;
7855 } else if (a->rd == 13) {
7856 ret = STREG_SP_CHECK;
7858 ret;
7861 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
7864 * ORN is only available with T32, so there is no register-shifted-register
7865 * form of the insn. Using the DO_ANY3 macro would create an unused function.
7867 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
7869 return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
7872 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
7874 return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
7877 #undef DO_ANY3
7878 #undef DO_ANY2
7879 #undef DO_CMP2
7881 static bool trans_ADR(DisasContext *s, arg_ri *a)
7883 store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
7884 return true;
7887 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
7889 TCGv_i32 tmp;
7891 if (!ENABLE_ARCH_6T2) {
7892 return false;
7895 tmp = tcg_const_i32(a->imm);
7896 store_reg(s, a->rd, tmp);
7897 return true;
7900 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
7902 TCGv_i32 tmp;
7904 if (!ENABLE_ARCH_6T2) {
7905 return false;
7908 tmp = load_reg(s, a->rd);
7909 tcg_gen_ext16u_i32(tmp, tmp);
7910 tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
7911 store_reg(s, a->rd, tmp);
7912 return true;
7916 * Multiply and multiply accumulate
7919 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
7921 TCGv_i32 t1, t2;
7923 t1 = load_reg(s, a->rn);
7924 t2 = load_reg(s, a->rm);
7925 tcg_gen_mul_i32(t1, t1, t2);
7926 tcg_temp_free_i32(t2);
7927 if (add) {
7928 t2 = load_reg(s, a->ra);
7929 tcg_gen_add_i32(t1, t1, t2);
7930 tcg_temp_free_i32(t2);
7932 if (a->s) {
7933 gen_logic_CC(t1);
7935 store_reg(s, a->rd, t1);
7936 return true;
7939 static bool trans_MUL(DisasContext *s, arg_MUL *a)
7941 return op_mla(s, a, false);
7944 static bool trans_MLA(DisasContext *s, arg_MLA *a)
7946 return op_mla(s, a, true);
7949 static bool trans_MLS(DisasContext *s, arg_MLS *a)
7951 TCGv_i32 t1, t2;
7953 if (!ENABLE_ARCH_6T2) {
7954 return false;
7956 t1 = load_reg(s, a->rn);
7957 t2 = load_reg(s, a->rm);
7958 tcg_gen_mul_i32(t1, t1, t2);
7959 tcg_temp_free_i32(t2);
7960 t2 = load_reg(s, a->ra);
7961 tcg_gen_sub_i32(t1, t2, t1);
7962 tcg_temp_free_i32(t2);
7963 store_reg(s, a->rd, t1);
7964 return true;
7967 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
7969 TCGv_i32 t0, t1, t2, t3;
7971 t0 = load_reg(s, a->rm);
7972 t1 = load_reg(s, a->rn);
7973 if (uns) {
7974 tcg_gen_mulu2_i32(t0, t1, t0, t1);
7975 } else {
7976 tcg_gen_muls2_i32(t0, t1, t0, t1);
7978 if (add) {
7979 t2 = load_reg(s, a->ra);
7980 t3 = load_reg(s, a->rd);
7981 tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
7982 tcg_temp_free_i32(t2);
7983 tcg_temp_free_i32(t3);
7985 if (a->s) {
7986 gen_logicq_cc(t0, t1);
7988 store_reg(s, a->ra, t0);
7989 store_reg(s, a->rd, t1);
7990 return true;
7993 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
7995 return op_mlal(s, a, true, false);
7998 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
8000 return op_mlal(s, a, false, false);
8003 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
8005 return op_mlal(s, a, true, true);
8008 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
8010 return op_mlal(s, a, false, true);
8013 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
8015 TCGv_i32 t0, t1, t2, zero;
8017 if (s->thumb
8018 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8019 : !ENABLE_ARCH_6) {
8020 return false;
8023 t0 = load_reg(s, a->rm);
8024 t1 = load_reg(s, a->rn);
8025 tcg_gen_mulu2_i32(t0, t1, t0, t1);
8026 zero = tcg_const_i32(0);
8027 t2 = load_reg(s, a->ra);
8028 tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
8029 tcg_temp_free_i32(t2);
8030 t2 = load_reg(s, a->rd);
8031 tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
8032 tcg_temp_free_i32(t2);
8033 tcg_temp_free_i32(zero);
8034 store_reg(s, a->ra, t0);
8035 store_reg(s, a->rd, t1);
8036 return true;
8040 * Saturating addition and subtraction
8043 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
8045 TCGv_i32 t0, t1;
8047 if (s->thumb
8048 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8049 : !ENABLE_ARCH_5TE) {
8050 return false;
8053 t0 = load_reg(s, a->rm);
8054 t1 = load_reg(s, a->rn);
8055 if (doub) {
8056 gen_helper_add_saturate(t1, cpu_env, t1, t1);
8058 if (add) {
8059 gen_helper_add_saturate(t0, cpu_env, t0, t1);
8060 } else {
8061 gen_helper_sub_saturate(t0, cpu_env, t0, t1);
8063 tcg_temp_free_i32(t1);
8064 store_reg(s, a->rd, t0);
8065 return true;
8068 #define DO_QADDSUB(NAME, ADD, DOUB) \
8069 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
8071 return op_qaddsub(s, a, ADD, DOUB); \
8074 DO_QADDSUB(QADD, true, false)
8075 DO_QADDSUB(QSUB, false, false)
8076 DO_QADDSUB(QDADD, true, true)
8077 DO_QADDSUB(QDSUB, false, true)
8079 #undef DO_QADDSUB
8082 * Halfword multiply and multiply accumulate
8085 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
8086 int add_long, bool nt, bool mt)
8088 TCGv_i32 t0, t1, tl, th;
8090 if (s->thumb
8091 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8092 : !ENABLE_ARCH_5TE) {
8093 return false;
8096 t0 = load_reg(s, a->rn);
8097 t1 = load_reg(s, a->rm);
8098 gen_mulxy(t0, t1, nt, mt);
8099 tcg_temp_free_i32(t1);
8101 switch (add_long) {
8102 case 0:
8103 store_reg(s, a->rd, t0);
8104 break;
8105 case 1:
8106 t1 = load_reg(s, a->ra);
8107 gen_helper_add_setq(t0, cpu_env, t0, t1);
8108 tcg_temp_free_i32(t1);
8109 store_reg(s, a->rd, t0);
8110 break;
8111 case 2:
8112 tl = load_reg(s, a->ra);
8113 th = load_reg(s, a->rd);
8114 /* Sign-extend the 32-bit product to 64 bits. */
8115 t1 = tcg_temp_new_i32();
8116 tcg_gen_sari_i32(t1, t0, 31);
8117 tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
8118 tcg_temp_free_i32(t0);
8119 tcg_temp_free_i32(t1);
8120 store_reg(s, a->ra, tl);
8121 store_reg(s, a->rd, th);
8122 break;
8123 default:
8124 g_assert_not_reached();
8126 return true;
8129 #define DO_SMLAX(NAME, add, nt, mt) \
8130 static bool trans_##NAME(DisasContext *s, arg_rrrr *a) \
8132 return op_smlaxxx(s, a, add, nt, mt); \
8135 DO_SMLAX(SMULBB, 0, 0, 0)
8136 DO_SMLAX(SMULBT, 0, 0, 1)
8137 DO_SMLAX(SMULTB, 0, 1, 0)
8138 DO_SMLAX(SMULTT, 0, 1, 1)
8140 DO_SMLAX(SMLABB, 1, 0, 0)
8141 DO_SMLAX(SMLABT, 1, 0, 1)
8142 DO_SMLAX(SMLATB, 1, 1, 0)
8143 DO_SMLAX(SMLATT, 1, 1, 1)
8145 DO_SMLAX(SMLALBB, 2, 0, 0)
8146 DO_SMLAX(SMLALBT, 2, 0, 1)
8147 DO_SMLAX(SMLALTB, 2, 1, 0)
8148 DO_SMLAX(SMLALTT, 2, 1, 1)
8150 #undef DO_SMLAX
8152 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
8154 TCGv_i32 t0, t1;
8156 if (!ENABLE_ARCH_5TE) {
8157 return false;
8160 t0 = load_reg(s, a->rn);
8161 t1 = load_reg(s, a->rm);
8163 * Since the nominal result is product<47:16>, shift the 16-bit
8164 * input up by 16 bits, so that the result is at product<63:32>.
8166 if (mt) {
8167 tcg_gen_andi_i32(t1, t1, 0xffff0000);
8168 } else {
8169 tcg_gen_shli_i32(t1, t1, 16);
8171 tcg_gen_muls2_i32(t0, t1, t0, t1);
8172 tcg_temp_free_i32(t0);
8173 if (add) {
8174 t0 = load_reg(s, a->ra);
8175 gen_helper_add_setq(t1, cpu_env, t1, t0);
8176 tcg_temp_free_i32(t0);
8178 store_reg(s, a->rd, t1);
8179 return true;
8182 #define DO_SMLAWX(NAME, add, mt) \
8183 static bool trans_##NAME(DisasContext *s, arg_rrrr *a) \
8185 return op_smlawx(s, a, add, mt); \
8188 DO_SMLAWX(SMULWB, 0, 0)
8189 DO_SMLAWX(SMULWT, 0, 1)
8190 DO_SMLAWX(SMLAWB, 1, 0)
8191 DO_SMLAWX(SMLAWT, 1, 1)
8193 #undef DO_SMLAWX
8196 * MSR (immediate) and hints
8199 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
8202 * When running single-threaded TCG code, use the helper to ensure that
8203 * the next round-robin scheduled vCPU gets a crack. When running in
8204 * MTTCG we don't generate jumps to the helper as it won't affect the
8205 * scheduling of other vCPUs.
8207 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
8208 gen_set_pc_im(s, s->base.pc_next);
8209 s->base.is_jmp = DISAS_YIELD;
8211 return true;
8214 static bool trans_WFE(DisasContext *s, arg_WFE *a)
8217 * When running single-threaded TCG code, use the helper to ensure that
8218 * the next round-robin scheduled vCPU gets a crack. In MTTCG mode we
8219 * just skip this instruction. Currently the SEV/SEVL instructions,
8220 * which are *one* of many ways to wake the CPU from WFE, are not
8221 * implemented so we can't sleep like WFI does.
8223 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
8224 gen_set_pc_im(s, s->base.pc_next);
8225 s->base.is_jmp = DISAS_WFE;
8227 return true;
8230 static bool trans_WFI(DisasContext *s, arg_WFI *a)
8232 /* For WFI, halt the vCPU until an IRQ. */
8233 gen_set_pc_im(s, s->base.pc_next);
8234 s->base.is_jmp = DISAS_WFI;
8235 return true;
8238 static bool trans_NOP(DisasContext *s, arg_NOP *a)
8240 return true;
8243 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
8245 uint32_t val = ror32(a->imm, a->rot * 2);
8246 uint32_t mask = msr_mask(s, a->mask, a->r);
8248 if (gen_set_psr_im(s, mask, a->r, val)) {
8249 unallocated_encoding(s);
8251 return true;
8255 * Cyclic Redundancy Check
8258 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
8260 TCGv_i32 t1, t2, t3;
8262 if (!dc_isar_feature(aa32_crc32, s)) {
8263 return false;
8266 t1 = load_reg(s, a->rn);
8267 t2 = load_reg(s, a->rm);
8268 switch (sz) {
8269 case MO_8:
8270 gen_uxtb(t2);
8271 break;
8272 case MO_16:
8273 gen_uxth(t2);
8274 break;
8275 case MO_32:
8276 break;
8277 default:
8278 g_assert_not_reached();
8280 t3 = tcg_const_i32(1 << sz);
8281 if (c) {
8282 gen_helper_crc32c(t1, t1, t2, t3);
8283 } else {
8284 gen_helper_crc32(t1, t1, t2, t3);
8286 tcg_temp_free_i32(t2);
8287 tcg_temp_free_i32(t3);
8288 store_reg(s, a->rd, t1);
8289 return true;
8292 #define DO_CRC32(NAME, c, sz) \
8293 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
8294 { return op_crc32(s, a, c, sz); }
8296 DO_CRC32(CRC32B, false, MO_8)
8297 DO_CRC32(CRC32H, false, MO_16)
8298 DO_CRC32(CRC32W, false, MO_32)
8299 DO_CRC32(CRC32CB, true, MO_8)
8300 DO_CRC32(CRC32CH, true, MO_16)
8301 DO_CRC32(CRC32CW, true, MO_32)
8303 #undef DO_CRC32
8306 * Miscellaneous instructions
8309 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
8311 if (arm_dc_feature(s, ARM_FEATURE_M)) {
8312 return false;
8314 gen_mrs_banked(s, a->r, a->sysm, a->rd);
8315 return true;
8318 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
8320 if (arm_dc_feature(s, ARM_FEATURE_M)) {
8321 return false;
8323 gen_msr_banked(s, a->r, a->sysm, a->rn);
8324 return true;
8327 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
8329 TCGv_i32 tmp;
8331 if (arm_dc_feature(s, ARM_FEATURE_M)) {
8332 return false;
8334 if (a->r) {
8335 if (IS_USER(s)) {
8336 unallocated_encoding(s);
8337 return true;
8339 tmp = load_cpu_field(spsr);
8340 } else {
8341 tmp = tcg_temp_new_i32();
8342 gen_helper_cpsr_read(tmp, cpu_env);
8344 store_reg(s, a->rd, tmp);
8345 return true;
8348 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
8350 TCGv_i32 tmp;
8351 uint32_t mask = msr_mask(s, a->mask, a->r);
8353 if (arm_dc_feature(s, ARM_FEATURE_M)) {
8354 return false;
8356 tmp = load_reg(s, a->rn);
8357 if (gen_set_psr(s, mask, a->r, tmp)) {
8358 unallocated_encoding(s);
8360 return true;
8363 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
8365 TCGv_i32 tmp;
8367 if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8368 return false;
8370 tmp = tcg_const_i32(a->sysm);
8371 gen_helper_v7m_mrs(tmp, cpu_env, tmp);
8372 store_reg(s, a->rd, tmp);
8373 return true;
8376 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
8378 TCGv_i32 addr, reg;
8380 if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8381 return false;
8383 addr = tcg_const_i32((a->mask << 10) | a->sysm);
8384 reg = load_reg(s, a->rn);
8385 gen_helper_v7m_msr(cpu_env, addr, reg);
8386 tcg_temp_free_i32(addr);
8387 tcg_temp_free_i32(reg);
8388 /* If we wrote to CONTROL, the EL might have changed */
8389 gen_helper_rebuild_hflags_m32_newel(cpu_env);
8390 gen_lookup_tb(s);
8391 return true;
8394 static bool trans_BX(DisasContext *s, arg_BX *a)
8396 if (!ENABLE_ARCH_4T) {
8397 return false;
8399 gen_bx_excret(s, load_reg(s, a->rm));
8400 return true;
8403 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
8405 if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
8406 return false;
8408 /* Trivial implementation equivalent to bx. */
8409 gen_bx(s, load_reg(s, a->rm));
8410 return true;
8413 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
8415 TCGv_i32 tmp;
8417 if (!ENABLE_ARCH_5) {
8418 return false;
8420 tmp = load_reg(s, a->rm);
8421 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
8422 gen_bx(s, tmp);
8423 return true;
8427 * BXNS/BLXNS: only exist for v8M with the security extensions,
8428 * and always UNDEF if NonSecure. We don't implement these in
8429 * the user-only mode either (in theory you can use them from
8430 * Secure User mode but they are too tied in to system emulation).
8432 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
8434 if (!s->v8m_secure || IS_USER_ONLY) {
8435 unallocated_encoding(s);
8436 } else {
8437 gen_bxns(s, a->rm);
8439 return true;
8442 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
8444 if (!s->v8m_secure || IS_USER_ONLY) {
8445 unallocated_encoding(s);
8446 } else {
8447 gen_blxns(s, a->rm);
8449 return true;
8452 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
8454 TCGv_i32 tmp;
8456 if (!ENABLE_ARCH_5) {
8457 return false;
8459 tmp = load_reg(s, a->rm);
8460 tcg_gen_clzi_i32(tmp, tmp, 32);
8461 store_reg(s, a->rd, tmp);
8462 return true;
8465 static bool trans_ERET(DisasContext *s, arg_ERET *a)
8467 TCGv_i32 tmp;
8469 if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
8470 return false;
8472 if (IS_USER(s)) {
8473 unallocated_encoding(s);
8474 return true;
8476 if (s->current_el == 2) {
8477 /* ERET from Hyp uses ELR_Hyp, not LR */
8478 tmp = load_cpu_field(elr_el[2]);
8479 } else {
8480 tmp = load_reg(s, 14);
8482 gen_exception_return(s, tmp);
8483 return true;
8486 static bool trans_HLT(DisasContext *s, arg_HLT *a)
8488 gen_hlt(s, a->imm);
8489 return true;
8492 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
8494 if (!ENABLE_ARCH_5) {
8495 return false;
8497 if (arm_dc_feature(s, ARM_FEATURE_M) &&
8498 semihosting_enabled() &&
8499 #ifndef CONFIG_USER_ONLY
8500 !IS_USER(s) &&
8501 #endif
8502 (a->imm == 0xab)) {
8503 gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
8504 } else {
8505 gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
8507 return true;
8510 static bool trans_HVC(DisasContext *s, arg_HVC *a)
8512 if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
8513 return false;
8515 if (IS_USER(s)) {
8516 unallocated_encoding(s);
8517 } else {
8518 gen_hvc(s, a->imm);
8520 return true;
8523 static bool trans_SMC(DisasContext *s, arg_SMC *a)
8525 if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
8526 return false;
8528 if (IS_USER(s)) {
8529 unallocated_encoding(s);
8530 } else {
8531 gen_smc(s);
8533 return true;
8536 static bool trans_SG(DisasContext *s, arg_SG *a)
8538 if (!arm_dc_feature(s, ARM_FEATURE_M) ||
8539 !arm_dc_feature(s, ARM_FEATURE_V8)) {
8540 return false;
8543 * SG (v8M only)
8544 * The bulk of the behaviour for this instruction is implemented
8545 * in v7m_handle_execute_nsc(), which deals with the insn when
8546 * it is executed by a CPU in non-secure state from memory
8547 * which is Secure & NonSecure-Callable.
8548 * Here we only need to handle the remaining cases:
8549 * * in NS memory (including the "security extension not
8550 * implemented" case) : NOP
8551 * * in S memory but CPU already secure (clear IT bits)
8552 * We know that the attribute for the memory this insn is
8553 * in must match the current CPU state, because otherwise
8554 * get_phys_addr_pmsav8 would have generated an exception.
8556 if (s->v8m_secure) {
8557 /* Like the IT insn, we don't need to generate any code */
8558 s->condexec_cond = 0;
8559 s->condexec_mask = 0;
8561 return true;
8564 static bool trans_TT(DisasContext *s, arg_TT *a)
8566 TCGv_i32 addr, tmp;
8568 if (!arm_dc_feature(s, ARM_FEATURE_M) ||
8569 !arm_dc_feature(s, ARM_FEATURE_V8)) {
8570 return false;
8572 if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
8573 /* We UNDEF for these UNPREDICTABLE cases */
8574 unallocated_encoding(s);
8575 return true;
8577 if (a->A && !s->v8m_secure) {
8578 /* This case is UNDEFINED. */
8579 unallocated_encoding(s);
8580 return true;
8583 addr = load_reg(s, a->rn);
8584 tmp = tcg_const_i32((a->A << 1) | a->T);
8585 gen_helper_v7m_tt(tmp, cpu_env, addr, tmp);
8586 tcg_temp_free_i32(addr);
8587 store_reg(s, a->rd, tmp);
8588 return true;
8592 * Load/store register index
8595 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
8597 ISSInfo ret;
8599 /* ISS not valid if writeback */
8600 if (p && !w) {
8601 ret = rd;
8602 if (s->base.pc_next - s->pc_curr == 2) {
8603 ret |= ISSIs16Bit;
8605 } else {
8606 ret = ISSInvalid;
8608 return ret;
8611 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
8613 TCGv_i32 addr = load_reg(s, a->rn);
8615 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8616 gen_helper_v8m_stackcheck(cpu_env, addr);
8619 if (a->p) {
8620 TCGv_i32 ofs = load_reg(s, a->rm);
8621 gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
8622 if (a->u) {
8623 tcg_gen_add_i32(addr, addr, ofs);
8624 } else {
8625 tcg_gen_sub_i32(addr, addr, ofs);
8627 tcg_temp_free_i32(ofs);
8629 return addr;
8632 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
8633 TCGv_i32 addr, int address_offset)
8635 if (!a->p) {
8636 TCGv_i32 ofs = load_reg(s, a->rm);
8637 gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
8638 if (a->u) {
8639 tcg_gen_add_i32(addr, addr, ofs);
8640 } else {
8641 tcg_gen_sub_i32(addr, addr, ofs);
8643 tcg_temp_free_i32(ofs);
8644 } else if (!a->w) {
8645 tcg_temp_free_i32(addr);
8646 return;
8648 tcg_gen_addi_i32(addr, addr, address_offset);
8649 store_reg(s, a->rn, addr);
8652 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
8653 MemOp mop, int mem_idx)
8655 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
8656 TCGv_i32 addr, tmp;
8658 addr = op_addr_rr_pre(s, a);
8660 tmp = tcg_temp_new_i32();
8661 gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8662 disas_set_da_iss(s, mop, issinfo);
8665 * Perform base writeback before the loaded value to
8666 * ensure correct behavior with overlapping index registers.
8668 op_addr_rr_post(s, a, addr, 0);
8669 store_reg_from_load(s, a->rt, tmp);
8670 return true;
8673 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
8674 MemOp mop, int mem_idx)
8676 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
8677 TCGv_i32 addr, tmp;
8679 addr = op_addr_rr_pre(s, a);
8681 tmp = load_reg(s, a->rt);
8682 gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8683 disas_set_da_iss(s, mop, issinfo);
8684 tcg_temp_free_i32(tmp);
8686 op_addr_rr_post(s, a, addr, 0);
8687 return true;
8690 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
8692 int mem_idx = get_mem_index(s);
8693 TCGv_i32 addr, tmp;
8695 if (!ENABLE_ARCH_5TE) {
8696 return false;
8698 if (a->rt & 1) {
8699 unallocated_encoding(s);
8700 return true;
8702 addr = op_addr_rr_pre(s, a);
8704 tmp = tcg_temp_new_i32();
8705 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8706 store_reg(s, a->rt, tmp);
8708 tcg_gen_addi_i32(addr, addr, 4);
8710 tmp = tcg_temp_new_i32();
8711 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8712 store_reg(s, a->rt + 1, tmp);
8714 /* LDRD w/ base writeback is undefined if the registers overlap. */
8715 op_addr_rr_post(s, a, addr, -4);
8716 return true;
8719 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
8721 int mem_idx = get_mem_index(s);
8722 TCGv_i32 addr, tmp;
8724 if (!ENABLE_ARCH_5TE) {
8725 return false;
8727 if (a->rt & 1) {
8728 unallocated_encoding(s);
8729 return true;
8731 addr = op_addr_rr_pre(s, a);
8733 tmp = load_reg(s, a->rt);
8734 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8735 tcg_temp_free_i32(tmp);
8737 tcg_gen_addi_i32(addr, addr, 4);
8739 tmp = load_reg(s, a->rt + 1);
8740 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8741 tcg_temp_free_i32(tmp);
8743 op_addr_rr_post(s, a, addr, -4);
8744 return true;
8748 * Load/store immediate index
8751 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
8753 int ofs = a->imm;
8755 if (!a->u) {
8756 ofs = -ofs;
8759 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8761 * Stackcheck. Here we know 'addr' is the current SP;
8762 * U is set if we're moving SP up, else down. It is
8763 * UNKNOWN whether the limit check triggers when SP starts
8764 * below the limit and ends up above it; we chose to do so.
8766 if (!a->u) {
8767 TCGv_i32 newsp = tcg_temp_new_i32();
8768 tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
8769 gen_helper_v8m_stackcheck(cpu_env, newsp);
8770 tcg_temp_free_i32(newsp);
8771 } else {
8772 gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
8776 return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
8779 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
8780 TCGv_i32 addr, int address_offset)
8782 if (!a->p) {
8783 if (a->u) {
8784 address_offset += a->imm;
8785 } else {
8786 address_offset -= a->imm;
8788 } else if (!a->w) {
8789 tcg_temp_free_i32(addr);
8790 return;
8792 tcg_gen_addi_i32(addr, addr, address_offset);
8793 store_reg(s, a->rn, addr);
8796 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
8797 MemOp mop, int mem_idx)
8799 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
8800 TCGv_i32 addr, tmp;
8802 addr = op_addr_ri_pre(s, a);
8804 tmp = tcg_temp_new_i32();
8805 gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8806 disas_set_da_iss(s, mop, issinfo);
8809 * Perform base writeback before the loaded value to
8810 * ensure correct behavior with overlapping index registers.
8812 op_addr_ri_post(s, a, addr, 0);
8813 store_reg_from_load(s, a->rt, tmp);
8814 return true;
8817 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
8818 MemOp mop, int mem_idx)
8820 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
8821 TCGv_i32 addr, tmp;
8823 addr = op_addr_ri_pre(s, a);
8825 tmp = load_reg(s, a->rt);
8826 gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8827 disas_set_da_iss(s, mop, issinfo);
8828 tcg_temp_free_i32(tmp);
8830 op_addr_ri_post(s, a, addr, 0);
8831 return true;
8834 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
8836 int mem_idx = get_mem_index(s);
8837 TCGv_i32 addr, tmp;
8839 addr = op_addr_ri_pre(s, a);
8841 tmp = tcg_temp_new_i32();
8842 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8843 store_reg(s, a->rt, tmp);
8845 tcg_gen_addi_i32(addr, addr, 4);
8847 tmp = tcg_temp_new_i32();
8848 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8849 store_reg(s, rt2, tmp);
8851 /* LDRD w/ base writeback is undefined if the registers overlap. */
8852 op_addr_ri_post(s, a, addr, -4);
8853 return true;
8856 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
8858 if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
8859 return false;
8861 return op_ldrd_ri(s, a, a->rt + 1);
8864 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
8866 arg_ldst_ri b = {
8867 .u = a->u, .w = a->w, .p = a->p,
8868 .rn = a->rn, .rt = a->rt, .imm = a->imm
8870 return op_ldrd_ri(s, &b, a->rt2);
8873 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
8875 int mem_idx = get_mem_index(s);
8876 TCGv_i32 addr, tmp;
8878 addr = op_addr_ri_pre(s, a);
8880 tmp = load_reg(s, a->rt);
8881 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8882 tcg_temp_free_i32(tmp);
8884 tcg_gen_addi_i32(addr, addr, 4);
8886 tmp = load_reg(s, rt2);
8887 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8888 tcg_temp_free_i32(tmp);
8890 op_addr_ri_post(s, a, addr, -4);
8891 return true;
8894 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
8896 if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
8897 return false;
8899 return op_strd_ri(s, a, a->rt + 1);
8902 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
8904 arg_ldst_ri b = {
8905 .u = a->u, .w = a->w, .p = a->p,
8906 .rn = a->rn, .rt = a->rt, .imm = a->imm
8908 return op_strd_ri(s, &b, a->rt2);
8911 #define DO_LDST(NAME, WHICH, MEMOP) \
8912 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a) \
8914 return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s)); \
8916 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a) \
8918 return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s)); \
8920 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a) \
8922 return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s)); \
8924 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a) \
8926 return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s)); \
8929 DO_LDST(LDR, load, MO_UL)
8930 DO_LDST(LDRB, load, MO_UB)
8931 DO_LDST(LDRH, load, MO_UW)
8932 DO_LDST(LDRSB, load, MO_SB)
8933 DO_LDST(LDRSH, load, MO_SW)
8935 DO_LDST(STR, store, MO_UL)
8936 DO_LDST(STRB, store, MO_UB)
8937 DO_LDST(STRH, store, MO_UW)
8939 #undef DO_LDST
8942 * Synchronization primitives
8945 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
8947 TCGv_i32 addr, tmp;
8948 TCGv taddr;
8950 opc |= s->be_data;
8951 addr = load_reg(s, a->rn);
8952 taddr = gen_aa32_addr(s, addr, opc);
8953 tcg_temp_free_i32(addr);
8955 tmp = load_reg(s, a->rt2);
8956 tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
8957 tcg_temp_free(taddr);
8959 store_reg(s, a->rt, tmp);
8960 return true;
8963 static bool trans_SWP(DisasContext *s, arg_SWP *a)
8965 return op_swp(s, a, MO_UL | MO_ALIGN);
8968 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
8970 return op_swp(s, a, MO_UB);
8974 * Load/Store Exclusive and Load-Acquire/Store-Release
8977 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
8979 TCGv_i32 addr;
8980 /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
8981 bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
8983 /* We UNDEF for these UNPREDICTABLE cases. */
8984 if (a->rd == 15 || a->rn == 15 || a->rt == 15
8985 || a->rd == a->rn || a->rd == a->rt
8986 || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
8987 || (mop == MO_64
8988 && (a->rt2 == 15
8989 || a->rd == a->rt2
8990 || (!v8a && s->thumb && a->rt2 == 13)))) {
8991 unallocated_encoding(s);
8992 return true;
8995 if (rel) {
8996 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
8999 addr = tcg_temp_local_new_i32();
9000 load_reg_var(s, addr, a->rn);
9001 tcg_gen_addi_i32(addr, addr, a->imm);
9003 gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
9004 tcg_temp_free_i32(addr);
9005 return true;
9008 static bool trans_STREX(DisasContext *s, arg_STREX *a)
9010 if (!ENABLE_ARCH_6) {
9011 return false;
9013 return op_strex(s, a, MO_32, false);
9016 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
9018 if (!ENABLE_ARCH_6K) {
9019 return false;
9021 /* We UNDEF for these UNPREDICTABLE cases. */
9022 if (a->rt & 1) {
9023 unallocated_encoding(s);
9024 return true;
9026 a->rt2 = a->rt + 1;
9027 return op_strex(s, a, MO_64, false);
9030 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
9032 return op_strex(s, a, MO_64, false);
9035 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
9037 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9038 return false;
9040 return op_strex(s, a, MO_8, false);
9043 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
9045 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9046 return false;
9048 return op_strex(s, a, MO_16, false);
9051 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
9053 if (!ENABLE_ARCH_8) {
9054 return false;
9056 return op_strex(s, a, MO_32, true);
9059 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
9061 if (!ENABLE_ARCH_8) {
9062 return false;
9064 /* We UNDEF for these UNPREDICTABLE cases. */
9065 if (a->rt & 1) {
9066 unallocated_encoding(s);
9067 return true;
9069 a->rt2 = a->rt + 1;
9070 return op_strex(s, a, MO_64, true);
9073 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
9075 if (!ENABLE_ARCH_8) {
9076 return false;
9078 return op_strex(s, a, MO_64, true);
9081 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
9083 if (!ENABLE_ARCH_8) {
9084 return false;
9086 return op_strex(s, a, MO_8, true);
9089 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
9091 if (!ENABLE_ARCH_8) {
9092 return false;
9094 return op_strex(s, a, MO_16, true);
9097 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
9099 TCGv_i32 addr, tmp;
9101 if (!ENABLE_ARCH_8) {
9102 return false;
9104 /* We UNDEF for these UNPREDICTABLE cases. */
9105 if (a->rn == 15 || a->rt == 15) {
9106 unallocated_encoding(s);
9107 return true;
9110 addr = load_reg(s, a->rn);
9111 tmp = load_reg(s, a->rt);
9112 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
9113 gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
9114 disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
9116 tcg_temp_free_i32(tmp);
9117 tcg_temp_free_i32(addr);
9118 return true;
9121 static bool trans_STL(DisasContext *s, arg_STL *a)
9123 return op_stl(s, a, MO_UL);
9126 static bool trans_STLB(DisasContext *s, arg_STL *a)
9128 return op_stl(s, a, MO_UB);
9131 static bool trans_STLH(DisasContext *s, arg_STL *a)
9133 return op_stl(s, a, MO_UW);
9136 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
9138 TCGv_i32 addr;
9139 /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
9140 bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
9142 /* We UNDEF for these UNPREDICTABLE cases. */
9143 if (a->rn == 15 || a->rt == 15
9144 || (!v8a && s->thumb && a->rt == 13)
9145 || (mop == MO_64
9146 && (a->rt2 == 15 || a->rt == a->rt2
9147 || (!v8a && s->thumb && a->rt2 == 13)))) {
9148 unallocated_encoding(s);
9149 return true;
9152 addr = tcg_temp_local_new_i32();
9153 load_reg_var(s, addr, a->rn);
9154 tcg_gen_addi_i32(addr, addr, a->imm);
9156 gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
9157 tcg_temp_free_i32(addr);
9159 if (acq) {
9160 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
9162 return true;
9165 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
9167 if (!ENABLE_ARCH_6) {
9168 return false;
9170 return op_ldrex(s, a, MO_32, false);
9173 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
9175 if (!ENABLE_ARCH_6K) {
9176 return false;
9178 /* We UNDEF for these UNPREDICTABLE cases. */
9179 if (a->rt & 1) {
9180 unallocated_encoding(s);
9181 return true;
9183 a->rt2 = a->rt + 1;
9184 return op_ldrex(s, a, MO_64, false);
9187 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
9189 return op_ldrex(s, a, MO_64, false);
9192 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
9194 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9195 return false;
9197 return op_ldrex(s, a, MO_8, false);
9200 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
9202 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9203 return false;
9205 return op_ldrex(s, a, MO_16, false);
9208 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
9210 if (!ENABLE_ARCH_8) {
9211 return false;
9213 return op_ldrex(s, a, MO_32, true);
9216 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
9218 if (!ENABLE_ARCH_8) {
9219 return false;
9221 /* We UNDEF for these UNPREDICTABLE cases. */
9222 if (a->rt & 1) {
9223 unallocated_encoding(s);
9224 return true;
9226 a->rt2 = a->rt + 1;
9227 return op_ldrex(s, a, MO_64, true);
9230 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
9232 if (!ENABLE_ARCH_8) {
9233 return false;
9235 return op_ldrex(s, a, MO_64, true);
9238 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
9240 if (!ENABLE_ARCH_8) {
9241 return false;
9243 return op_ldrex(s, a, MO_8, true);
9246 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
9248 if (!ENABLE_ARCH_8) {
9249 return false;
9251 return op_ldrex(s, a, MO_16, true);
9254 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
9256 TCGv_i32 addr, tmp;
9258 if (!ENABLE_ARCH_8) {
9259 return false;
9261 /* We UNDEF for these UNPREDICTABLE cases. */
9262 if (a->rn == 15 || a->rt == 15) {
9263 unallocated_encoding(s);
9264 return true;
9267 addr = load_reg(s, a->rn);
9268 tmp = tcg_temp_new_i32();
9269 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
9270 disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
9271 tcg_temp_free_i32(addr);
9273 store_reg(s, a->rt, tmp);
9274 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
9275 return true;
9278 static bool trans_LDA(DisasContext *s, arg_LDA *a)
9280 return op_lda(s, a, MO_UL);
9283 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
9285 return op_lda(s, a, MO_UB);
9288 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
9290 return op_lda(s, a, MO_UW);
9294 * Media instructions
9297 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
9299 TCGv_i32 t1, t2;
9301 if (!ENABLE_ARCH_6) {
9302 return false;
9305 t1 = load_reg(s, a->rn);
9306 t2 = load_reg(s, a->rm);
9307 gen_helper_usad8(t1, t1, t2);
9308 tcg_temp_free_i32(t2);
9309 if (a->ra != 15) {
9310 t2 = load_reg(s, a->ra);
9311 tcg_gen_add_i32(t1, t1, t2);
9312 tcg_temp_free_i32(t2);
9314 store_reg(s, a->rd, t1);
9315 return true;
9318 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
9320 TCGv_i32 tmp;
9321 int width = a->widthm1 + 1;
9322 int shift = a->lsb;
9324 if (!ENABLE_ARCH_6T2) {
9325 return false;
9327 if (shift + width > 32) {
9328 /* UNPREDICTABLE; we choose to UNDEF */
9329 unallocated_encoding(s);
9330 return true;
9333 tmp = load_reg(s, a->rn);
9334 if (u) {
9335 tcg_gen_extract_i32(tmp, tmp, shift, width);
9336 } else {
9337 tcg_gen_sextract_i32(tmp, tmp, shift, width);
9339 store_reg(s, a->rd, tmp);
9340 return true;
9343 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
9345 return op_bfx(s, a, false);
9348 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
9350 return op_bfx(s, a, true);
9353 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
9355 TCGv_i32 tmp;
9356 int msb = a->msb, lsb = a->lsb;
9357 int width;
9359 if (!ENABLE_ARCH_6T2) {
9360 return false;
9362 if (msb < lsb) {
9363 /* UNPREDICTABLE; we choose to UNDEF */
9364 unallocated_encoding(s);
9365 return true;
9368 width = msb + 1 - lsb;
9369 if (a->rn == 15) {
9370 /* BFC */
9371 tmp = tcg_const_i32(0);
9372 } else {
9373 /* BFI */
9374 tmp = load_reg(s, a->rn);
9376 if (width != 32) {
9377 TCGv_i32 tmp2 = load_reg(s, a->rd);
9378 tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
9379 tcg_temp_free_i32(tmp2);
9381 store_reg(s, a->rd, tmp);
9382 return true;
9385 static bool trans_UDF(DisasContext *s, arg_UDF *a)
9387 unallocated_encoding(s);
9388 return true;
9392 * Parallel addition and subtraction
9395 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
9396 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
9398 TCGv_i32 t0, t1;
9400 if (s->thumb
9401 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9402 : !ENABLE_ARCH_6) {
9403 return false;
9406 t0 = load_reg(s, a->rn);
9407 t1 = load_reg(s, a->rm);
9409 gen(t0, t0, t1);
9411 tcg_temp_free_i32(t1);
9412 store_reg(s, a->rd, t0);
9413 return true;
9416 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
9417 void (*gen)(TCGv_i32, TCGv_i32,
9418 TCGv_i32, TCGv_ptr))
9420 TCGv_i32 t0, t1;
9421 TCGv_ptr ge;
9423 if (s->thumb
9424 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9425 : !ENABLE_ARCH_6) {
9426 return false;
9429 t0 = load_reg(s, a->rn);
9430 t1 = load_reg(s, a->rm);
9432 ge = tcg_temp_new_ptr();
9433 tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
9434 gen(t0, t0, t1, ge);
9436 tcg_temp_free_ptr(ge);
9437 tcg_temp_free_i32(t1);
9438 store_reg(s, a->rd, t0);
9439 return true;
9442 #define DO_PAR_ADDSUB(NAME, helper) \
9443 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
9445 return op_par_addsub(s, a, helper); \
9448 #define DO_PAR_ADDSUB_GE(NAME, helper) \
9449 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
9451 return op_par_addsub_ge(s, a, helper); \
9454 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
9455 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
9456 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
9457 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
9458 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
9459 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
9461 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
9462 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
9463 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
9464 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
9465 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
9466 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
9468 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
9469 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
9470 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
9471 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
9472 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
9473 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
9475 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
9476 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
9477 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
9478 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
9479 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
9480 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
9482 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
9483 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
9484 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
9485 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
9486 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
9487 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
9489 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
9490 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
9491 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
9492 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
9493 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
9494 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
9496 #undef DO_PAR_ADDSUB
9497 #undef DO_PAR_ADDSUB_GE
9500 * Packing, unpacking, saturation, and reversal
9503 static bool trans_PKH(DisasContext *s, arg_PKH *a)
9505 TCGv_i32 tn, tm;
9506 int shift = a->imm;
9508 if (s->thumb
9509 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9510 : !ENABLE_ARCH_6) {
9511 return false;
9514 tn = load_reg(s, a->rn);
9515 tm = load_reg(s, a->rm);
9516 if (a->tb) {
9517 /* PKHTB */
9518 if (shift == 0) {
9519 shift = 31;
9521 tcg_gen_sari_i32(tm, tm, shift);
9522 tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
9523 } else {
9524 /* PKHBT */
9525 tcg_gen_shli_i32(tm, tm, shift);
9526 tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
9528 tcg_temp_free_i32(tm);
9529 store_reg(s, a->rd, tn);
9530 return true;
9533 static bool op_sat(DisasContext *s, arg_sat *a,
9534 void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
9536 TCGv_i32 tmp, satimm;
9537 int shift = a->imm;
9539 if (!ENABLE_ARCH_6) {
9540 return false;
9543 tmp = load_reg(s, a->rn);
9544 if (a->sh) {
9545 tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
9546 } else {
9547 tcg_gen_shli_i32(tmp, tmp, shift);
9550 satimm = tcg_const_i32(a->satimm);
9551 gen(tmp, cpu_env, tmp, satimm);
9552 tcg_temp_free_i32(satimm);
9554 store_reg(s, a->rd, tmp);
9555 return true;
9558 static bool trans_SSAT(DisasContext *s, arg_sat *a)
9560 return op_sat(s, a, gen_helper_ssat);
9563 static bool trans_USAT(DisasContext *s, arg_sat *a)
9565 return op_sat(s, a, gen_helper_usat);
9568 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
9570 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9571 return false;
9573 return op_sat(s, a, gen_helper_ssat16);
9576 static bool trans_USAT16(DisasContext *s, arg_sat *a)
9578 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9579 return false;
9581 return op_sat(s, a, gen_helper_usat16);
9584 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
9585 void (*gen_extract)(TCGv_i32, TCGv_i32),
9586 void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
9588 TCGv_i32 tmp;
9590 if (!ENABLE_ARCH_6) {
9591 return false;
9594 tmp = load_reg(s, a->rm);
9596 * TODO: In many cases we could do a shift instead of a rotate.
9597 * Combined with a simple extend, that becomes an extract.
9599 tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
9600 gen_extract(tmp, tmp);
9602 if (a->rn != 15) {
9603 TCGv_i32 tmp2 = load_reg(s, a->rn);
9604 gen_add(tmp, tmp, tmp2);
9605 tcg_temp_free_i32(tmp2);
9607 store_reg(s, a->rd, tmp);
9608 return true;
9611 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
9613 return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
9616 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
9618 return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
9621 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
9623 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9624 return false;
9626 return op_xta(s, a, gen_helper_sxtb16, gen_add16);
9629 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
9631 return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
9634 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
9636 return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
9639 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
9641 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9642 return false;
9644 return op_xta(s, a, gen_helper_uxtb16, gen_add16);
9647 static bool trans_SEL(DisasContext *s, arg_rrr *a)
9649 TCGv_i32 t1, t2, t3;
9651 if (s->thumb
9652 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9653 : !ENABLE_ARCH_6) {
9654 return false;
9657 t1 = load_reg(s, a->rn);
9658 t2 = load_reg(s, a->rm);
9659 t3 = tcg_temp_new_i32();
9660 tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
9661 gen_helper_sel_flags(t1, t3, t1, t2);
9662 tcg_temp_free_i32(t3);
9663 tcg_temp_free_i32(t2);
9664 store_reg(s, a->rd, t1);
9665 return true;
9668 static bool op_rr(DisasContext *s, arg_rr *a,
9669 void (*gen)(TCGv_i32, TCGv_i32))
9671 TCGv_i32 tmp;
9673 tmp = load_reg(s, a->rm);
9674 gen(tmp, tmp);
9675 store_reg(s, a->rd, tmp);
9676 return true;
9679 static bool trans_REV(DisasContext *s, arg_rr *a)
9681 if (!ENABLE_ARCH_6) {
9682 return false;
9684 return op_rr(s, a, tcg_gen_bswap32_i32);
9687 static bool trans_REV16(DisasContext *s, arg_rr *a)
9689 if (!ENABLE_ARCH_6) {
9690 return false;
9692 return op_rr(s, a, gen_rev16);
9695 static bool trans_REVSH(DisasContext *s, arg_rr *a)
9697 if (!ENABLE_ARCH_6) {
9698 return false;
9700 return op_rr(s, a, gen_revsh);
9703 static bool trans_RBIT(DisasContext *s, arg_rr *a)
9705 if (!ENABLE_ARCH_6T2) {
9706 return false;
9708 return op_rr(s, a, gen_helper_rbit);
9712 * Signed multiply, signed and unsigned divide
9715 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
9717 TCGv_i32 t1, t2;
9719 if (!ENABLE_ARCH_6) {
9720 return false;
9723 t1 = load_reg(s, a->rn);
9724 t2 = load_reg(s, a->rm);
9725 if (m_swap) {
9726 gen_swap_half(t2);
9728 gen_smul_dual(t1, t2);
9730 if (sub) {
9731 /* This subtraction cannot overflow. */
9732 tcg_gen_sub_i32(t1, t1, t2);
9733 } else {
9735 * This addition cannot overflow 32 bits; however it may
9736 * overflow considered as a signed operation, in which case
9737 * we must set the Q flag.
9739 gen_helper_add_setq(t1, cpu_env, t1, t2);
9741 tcg_temp_free_i32(t2);
9743 if (a->ra != 15) {
9744 t2 = load_reg(s, a->ra);
9745 gen_helper_add_setq(t1, cpu_env, t1, t2);
9746 tcg_temp_free_i32(t2);
9748 store_reg(s, a->rd, t1);
9749 return true;
9752 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
9754 return op_smlad(s, a, false, false);
9757 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
9759 return op_smlad(s, a, true, false);
9762 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
9764 return op_smlad(s, a, false, true);
9767 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
9769 return op_smlad(s, a, true, true);
9772 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
9774 TCGv_i32 t1, t2;
9775 TCGv_i64 l1, l2;
9777 if (!ENABLE_ARCH_6) {
9778 return false;
9781 t1 = load_reg(s, a->rn);
9782 t2 = load_reg(s, a->rm);
9783 if (m_swap) {
9784 gen_swap_half(t2);
9786 gen_smul_dual(t1, t2);
9788 l1 = tcg_temp_new_i64();
9789 l2 = tcg_temp_new_i64();
9790 tcg_gen_ext_i32_i64(l1, t1);
9791 tcg_gen_ext_i32_i64(l2, t2);
9792 tcg_temp_free_i32(t1);
9793 tcg_temp_free_i32(t2);
9795 if (sub) {
9796 tcg_gen_sub_i64(l1, l1, l2);
9797 } else {
9798 tcg_gen_add_i64(l1, l1, l2);
9800 tcg_temp_free_i64(l2);
9802 gen_addq(s, l1, a->ra, a->rd);
9803 gen_storeq_reg(s, a->ra, a->rd, l1);
9804 tcg_temp_free_i64(l1);
9805 return true;
9808 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
9810 return op_smlald(s, a, false, false);
9813 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
9815 return op_smlald(s, a, true, false);
9818 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
9820 return op_smlald(s, a, false, true);
9823 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
9825 return op_smlald(s, a, true, true);
9828 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
9830 TCGv_i32 t1, t2;
9832 if (s->thumb
9833 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9834 : !ENABLE_ARCH_6) {
9835 return false;
9838 t1 = load_reg(s, a->rn);
9839 t2 = load_reg(s, a->rm);
9840 tcg_gen_muls2_i32(t2, t1, t1, t2);
9842 if (a->ra != 15) {
9843 TCGv_i32 t3 = load_reg(s, a->ra);
9844 if (sub) {
9846 * For SMMLS, we need a 64-bit subtract. Borrow caused by
9847 * a non-zero multiplicand lowpart, and the correct result
9848 * lowpart for rounding.
9850 TCGv_i32 zero = tcg_const_i32(0);
9851 tcg_gen_sub2_i32(t2, t1, zero, t3, t2, t1);
9852 tcg_temp_free_i32(zero);
9853 } else {
9854 tcg_gen_add_i32(t1, t1, t3);
9856 tcg_temp_free_i32(t3);
9858 if (round) {
9860 * Adding 0x80000000 to the 64-bit quantity means that we have
9861 * carry in to the high word when the low word has the msb set.
9863 tcg_gen_shri_i32(t2, t2, 31);
9864 tcg_gen_add_i32(t1, t1, t2);
9866 tcg_temp_free_i32(t2);
9867 store_reg(s, a->rd, t1);
9868 return true;
9871 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
9873 return op_smmla(s, a, false, false);
9876 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
9878 return op_smmla(s, a, true, false);
9881 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
9883 return op_smmla(s, a, false, true);
9886 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
9888 return op_smmla(s, a, true, true);
9891 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
9893 TCGv_i32 t1, t2;
9895 if (s->thumb
9896 ? !dc_isar_feature(aa32_thumb_div, s)
9897 : !dc_isar_feature(aa32_arm_div, s)) {
9898 return false;
9901 t1 = load_reg(s, a->rn);
9902 t2 = load_reg(s, a->rm);
9903 if (u) {
9904 gen_helper_udiv(t1, t1, t2);
9905 } else {
9906 gen_helper_sdiv(t1, t1, t2);
9908 tcg_temp_free_i32(t2);
9909 store_reg(s, a->rd, t1);
9910 return true;
9913 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
9915 return op_div(s, a, false);
9918 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
9920 return op_div(s, a, true);
9924 * Block data transfer
9927 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
9929 TCGv_i32 addr = load_reg(s, a->rn);
9931 if (a->b) {
9932 if (a->i) {
9933 /* pre increment */
9934 tcg_gen_addi_i32(addr, addr, 4);
9935 } else {
9936 /* pre decrement */
9937 tcg_gen_addi_i32(addr, addr, -(n * 4));
9939 } else if (!a->i && n != 1) {
9940 /* post decrement */
9941 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
9944 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
9946 * If the writeback is incrementing SP rather than
9947 * decrementing it, and the initial SP is below the
9948 * stack limit but the final written-back SP would
9949 * be above, then then we must not perform any memory
9950 * accesses, but it is IMPDEF whether we generate
9951 * an exception. We choose to do so in this case.
9952 * At this point 'addr' is the lowest address, so
9953 * either the original SP (if incrementing) or our
9954 * final SP (if decrementing), so that's what we check.
9956 gen_helper_v8m_stackcheck(cpu_env, addr);
9959 return addr;
9962 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
9963 TCGv_i32 addr, int n)
9965 if (a->w) {
9966 /* write back */
9967 if (!a->b) {
9968 if (a->i) {
9969 /* post increment */
9970 tcg_gen_addi_i32(addr, addr, 4);
9971 } else {
9972 /* post decrement */
9973 tcg_gen_addi_i32(addr, addr, -(n * 4));
9975 } else if (!a->i && n != 1) {
9976 /* pre decrement */
9977 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
9979 store_reg(s, a->rn, addr);
9980 } else {
9981 tcg_temp_free_i32(addr);
9985 static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
9987 int i, j, n, list, mem_idx;
9988 bool user = a->u;
9989 TCGv_i32 addr, tmp, tmp2;
9991 if (user) {
9992 /* STM (user) */
9993 if (IS_USER(s)) {
9994 /* Only usable in supervisor mode. */
9995 unallocated_encoding(s);
9996 return true;
10000 list = a->list;
10001 n = ctpop16(list);
10002 if (n < min_n || a->rn == 15) {
10003 unallocated_encoding(s);
10004 return true;
10007 addr = op_addr_block_pre(s, a, n);
10008 mem_idx = get_mem_index(s);
10010 for (i = j = 0; i < 16; i++) {
10011 if (!(list & (1 << i))) {
10012 continue;
10015 if (user && i != 15) {
10016 tmp = tcg_temp_new_i32();
10017 tmp2 = tcg_const_i32(i);
10018 gen_helper_get_user_reg(tmp, cpu_env, tmp2);
10019 tcg_temp_free_i32(tmp2);
10020 } else {
10021 tmp = load_reg(s, i);
10023 gen_aa32_st32(s, tmp, addr, mem_idx);
10024 tcg_temp_free_i32(tmp);
10026 /* No need to add after the last transfer. */
10027 if (++j != n) {
10028 tcg_gen_addi_i32(addr, addr, 4);
10032 op_addr_block_post(s, a, addr, n);
10033 return true;
10036 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
10038 /* BitCount(list) < 1 is UNPREDICTABLE */
10039 return op_stm(s, a, 1);
10042 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
10044 /* Writeback register in register list is UNPREDICTABLE for T32. */
10045 if (a->w && (a->list & (1 << a->rn))) {
10046 unallocated_encoding(s);
10047 return true;
10049 /* BitCount(list) < 2 is UNPREDICTABLE */
10050 return op_stm(s, a, 2);
10053 static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
10055 int i, j, n, list, mem_idx;
10056 bool loaded_base;
10057 bool user = a->u;
10058 bool exc_return = false;
10059 TCGv_i32 addr, tmp, tmp2, loaded_var;
10061 if (user) {
10062 /* LDM (user), LDM (exception return) */
10063 if (IS_USER(s)) {
10064 /* Only usable in supervisor mode. */
10065 unallocated_encoding(s);
10066 return true;
10068 if (extract32(a->list, 15, 1)) {
10069 exc_return = true;
10070 user = false;
10071 } else {
10072 /* LDM (user) does not allow writeback. */
10073 if (a->w) {
10074 unallocated_encoding(s);
10075 return true;
10080 list = a->list;
10081 n = ctpop16(list);
10082 if (n < min_n || a->rn == 15) {
10083 unallocated_encoding(s);
10084 return true;
10087 addr = op_addr_block_pre(s, a, n);
10088 mem_idx = get_mem_index(s);
10089 loaded_base = false;
10090 loaded_var = NULL;
10092 for (i = j = 0; i < 16; i++) {
10093 if (!(list & (1 << i))) {
10094 continue;
10097 tmp = tcg_temp_new_i32();
10098 gen_aa32_ld32u(s, tmp, addr, mem_idx);
10099 if (user) {
10100 tmp2 = tcg_const_i32(i);
10101 gen_helper_set_user_reg(cpu_env, tmp2, tmp);
10102 tcg_temp_free_i32(tmp2);
10103 tcg_temp_free_i32(tmp);
10104 } else if (i == a->rn) {
10105 loaded_var = tmp;
10106 loaded_base = true;
10107 } else if (i == 15 && exc_return) {
10108 store_pc_exc_ret(s, tmp);
10109 } else {
10110 store_reg_from_load(s, i, tmp);
10113 /* No need to add after the last transfer. */
10114 if (++j != n) {
10115 tcg_gen_addi_i32(addr, addr, 4);
10119 op_addr_block_post(s, a, addr, n);
10121 if (loaded_base) {
10122 /* Note that we reject base == pc above. */
10123 store_reg(s, a->rn, loaded_var);
10126 if (exc_return) {
10127 /* Restore CPSR from SPSR. */
10128 tmp = load_cpu_field(spsr);
10129 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
10130 gen_io_start();
10132 gen_helper_cpsr_write_eret(cpu_env, tmp);
10133 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
10134 gen_io_end();
10136 tcg_temp_free_i32(tmp);
10137 /* Must exit loop to check un-masked IRQs */
10138 s->base.is_jmp = DISAS_EXIT;
10140 return true;
10143 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
10146 * Writeback register in register list is UNPREDICTABLE
10147 * for ArchVersion() >= 7. Prior to v7, A32 would write
10148 * an UNKNOWN value to the base register.
10150 if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
10151 unallocated_encoding(s);
10152 return true;
10154 /* BitCount(list) < 1 is UNPREDICTABLE */
10155 return do_ldm(s, a, 1);
10158 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
10160 /* Writeback register in register list is UNPREDICTABLE for T32. */
10161 if (a->w && (a->list & (1 << a->rn))) {
10162 unallocated_encoding(s);
10163 return true;
10165 /* BitCount(list) < 2 is UNPREDICTABLE */
10166 return do_ldm(s, a, 2);
10169 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
10171 /* Writeback is conditional on the base register not being loaded. */
10172 a->w = !(a->list & (1 << a->rn));
10173 /* BitCount(list) < 1 is UNPREDICTABLE */
10174 return do_ldm(s, a, 1);
10178 * Branch, branch with link
10181 static bool trans_B(DisasContext *s, arg_i *a)
10183 gen_jmp(s, read_pc(s) + a->imm);
10184 return true;
10187 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
10189 /* This has cond from encoding, required to be outside IT block. */
10190 if (a->cond >= 0xe) {
10191 return false;
10193 if (s->condexec_mask) {
10194 unallocated_encoding(s);
10195 return true;
10197 arm_skip_unless(s, a->cond);
10198 gen_jmp(s, read_pc(s) + a->imm);
10199 return true;
10202 static bool trans_BL(DisasContext *s, arg_i *a)
10204 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
10205 gen_jmp(s, read_pc(s) + a->imm);
10206 return true;
10209 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
10211 TCGv_i32 tmp;
10213 /* For A32, ARCH(5) is checked near the start of the uncond block. */
10214 if (s->thumb && (a->imm & 2)) {
10215 return false;
10217 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
10218 tmp = tcg_const_i32(!s->thumb);
10219 store_cpu_field(tmp, thumb);
10220 gen_jmp(s, (read_pc(s) & ~3) + a->imm);
10221 return true;
10224 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
10226 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10227 tcg_gen_movi_i32(cpu_R[14], read_pc(s) + (a->imm << 12));
10228 return true;
10231 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
10233 TCGv_i32 tmp = tcg_temp_new_i32();
10235 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10236 tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
10237 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
10238 gen_bx(s, tmp);
10239 return true;
10242 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
10244 TCGv_i32 tmp;
10246 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10247 if (!ENABLE_ARCH_5) {
10248 return false;
10250 tmp = tcg_temp_new_i32();
10251 tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
10252 tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
10253 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
10254 gen_bx(s, tmp);
10255 return true;
10258 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
10260 TCGv_i32 addr, tmp;
10262 tmp = load_reg(s, a->rm);
10263 if (half) {
10264 tcg_gen_add_i32(tmp, tmp, tmp);
10266 addr = load_reg(s, a->rn);
10267 tcg_gen_add_i32(addr, addr, tmp);
10269 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
10270 half ? MO_UW | s->be_data : MO_UB);
10271 tcg_temp_free_i32(addr);
10273 tcg_gen_add_i32(tmp, tmp, tmp);
10274 tcg_gen_addi_i32(tmp, tmp, read_pc(s));
10275 store_reg(s, 15, tmp);
10276 return true;
10279 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
10281 return op_tbranch(s, a, false);
10284 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
10286 return op_tbranch(s, a, true);
10289 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
10291 TCGv_i32 tmp = load_reg(s, a->rn);
10293 arm_gen_condlabel(s);
10294 tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
10295 tmp, 0, s->condlabel);
10296 tcg_temp_free_i32(tmp);
10297 gen_jmp(s, read_pc(s) + a->imm);
10298 return true;
10302 * Supervisor call - both T32 & A32 come here so we need to check
10303 * which mode we are in when checking for semihosting.
10306 static bool trans_SVC(DisasContext *s, arg_SVC *a)
10308 const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
10310 if (!arm_dc_feature(s, ARM_FEATURE_M) && semihosting_enabled() &&
10311 #ifndef CONFIG_USER_ONLY
10312 !IS_USER(s) &&
10313 #endif
10314 (a->imm == semihost_imm)) {
10315 gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
10316 } else {
10317 gen_set_pc_im(s, s->base.pc_next);
10318 s->svc_imm = a->imm;
10319 s->base.is_jmp = DISAS_SWI;
10321 return true;
10325 * Unconditional system instructions
10328 static bool trans_RFE(DisasContext *s, arg_RFE *a)
10330 static const int8_t pre_offset[4] = {
10331 /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
10333 static const int8_t post_offset[4] = {
10334 /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
10336 TCGv_i32 addr, t1, t2;
10338 if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10339 return false;
10341 if (IS_USER(s)) {
10342 unallocated_encoding(s);
10343 return true;
10346 addr = load_reg(s, a->rn);
10347 tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
10349 /* Load PC into tmp and CPSR into tmp2. */
10350 t1 = tcg_temp_new_i32();
10351 gen_aa32_ld32u(s, t1, addr, get_mem_index(s));
10352 tcg_gen_addi_i32(addr, addr, 4);
10353 t2 = tcg_temp_new_i32();
10354 gen_aa32_ld32u(s, t2, addr, get_mem_index(s));
10356 if (a->w) {
10357 /* Base writeback. */
10358 tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
10359 store_reg(s, a->rn, addr);
10360 } else {
10361 tcg_temp_free_i32(addr);
10363 gen_rfe(s, t1, t2);
10364 return true;
10367 static bool trans_SRS(DisasContext *s, arg_SRS *a)
10369 if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10370 return false;
10372 gen_srs(s, a->mode, a->pu, a->w);
10373 return true;
10376 static bool trans_CPS(DisasContext *s, arg_CPS *a)
10378 uint32_t mask, val;
10380 if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10381 return false;
10383 if (IS_USER(s)) {
10384 /* Implemented as NOP in user mode. */
10385 return true;
10387 /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
10389 mask = val = 0;
10390 if (a->imod & 2) {
10391 if (a->A) {
10392 mask |= CPSR_A;
10394 if (a->I) {
10395 mask |= CPSR_I;
10397 if (a->F) {
10398 mask |= CPSR_F;
10400 if (a->imod & 1) {
10401 val |= mask;
10404 if (a->M) {
10405 mask |= CPSR_M;
10406 val |= a->mode;
10408 if (mask) {
10409 gen_set_psr_im(s, mask, 0, val);
10411 return true;
10414 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
10416 TCGv_i32 tmp, addr, el;
10418 if (!arm_dc_feature(s, ARM_FEATURE_M)) {
10419 return false;
10421 if (IS_USER(s)) {
10422 /* Implemented as NOP in user mode. */
10423 return true;
10426 tmp = tcg_const_i32(a->im);
10427 /* FAULTMASK */
10428 if (a->F) {
10429 addr = tcg_const_i32(19);
10430 gen_helper_v7m_msr(cpu_env, addr, tmp);
10431 tcg_temp_free_i32(addr);
10433 /* PRIMASK */
10434 if (a->I) {
10435 addr = tcg_const_i32(16);
10436 gen_helper_v7m_msr(cpu_env, addr, tmp);
10437 tcg_temp_free_i32(addr);
10439 el = tcg_const_i32(s->current_el);
10440 gen_helper_rebuild_hflags_m32(cpu_env, el);
10441 tcg_temp_free_i32(el);
10442 tcg_temp_free_i32(tmp);
10443 gen_lookup_tb(s);
10444 return true;
10448 * Clear-Exclusive, Barriers
10451 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
10453 if (s->thumb
10454 ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
10455 : !ENABLE_ARCH_6K) {
10456 return false;
10458 gen_clrex(s);
10459 return true;
10462 static bool trans_DSB(DisasContext *s, arg_DSB *a)
10464 if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
10465 return false;
10467 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
10468 return true;
10471 static bool trans_DMB(DisasContext *s, arg_DMB *a)
10473 return trans_DSB(s, NULL);
10476 static bool trans_ISB(DisasContext *s, arg_ISB *a)
10478 if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
10479 return false;
10482 * We need to break the TB after this insn to execute
10483 * self-modifying code correctly and also to take
10484 * any pending interrupts immediately.
10486 gen_goto_tb(s, 0, s->base.pc_next);
10487 return true;
10490 static bool trans_SB(DisasContext *s, arg_SB *a)
10492 if (!dc_isar_feature(aa32_sb, s)) {
10493 return false;
10496 * TODO: There is no speculation barrier opcode
10497 * for TCG; MB and end the TB instead.
10499 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
10500 gen_goto_tb(s, 0, s->base.pc_next);
10501 return true;
10504 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
10506 if (!ENABLE_ARCH_6) {
10507 return false;
10509 if (a->E != (s->be_data == MO_BE)) {
10510 gen_helper_setend(cpu_env);
10511 s->base.is_jmp = DISAS_UPDATE;
10513 return true;
10517 * Preload instructions
10518 * All are nops, contingent on the appropriate arch level.
10521 static bool trans_PLD(DisasContext *s, arg_PLD *a)
10523 return ENABLE_ARCH_5TE;
10526 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
10528 return arm_dc_feature(s, ARM_FEATURE_V7MP);
10531 static bool trans_PLI(DisasContext *s, arg_PLD *a)
10533 return ENABLE_ARCH_7;
10537 * If-then
10540 static bool trans_IT(DisasContext *s, arg_IT *a)
10542 int cond_mask = a->cond_mask;
10545 * No actual code generated for this insn, just setup state.
10547 * Combinations of firstcond and mask which set up an 0b1111
10548 * condition are UNPREDICTABLE; we take the CONSTRAINED
10549 * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
10550 * i.e. both meaning "execute always".
10552 s->condexec_cond = (cond_mask >> 4) & 0xe;
10553 s->condexec_mask = cond_mask & 0x1f;
10554 return true;
10558 * Legacy decoder.
10561 static void disas_arm_insn(DisasContext *s, unsigned int insn)
10563 unsigned int cond = insn >> 28;
10565 /* M variants do not implement ARM mode; this must raise the INVSTATE
10566 * UsageFault exception.
10568 if (arm_dc_feature(s, ARM_FEATURE_M)) {
10569 gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
10570 default_exception_el(s));
10571 return;
10574 if (cond == 0xf) {
10575 /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
10576 * choose to UNDEF. In ARMv5 and above the space is used
10577 * for miscellaneous unconditional instructions.
10579 ARCH(5);
10581 /* Unconditional instructions. */
10582 /* TODO: Perhaps merge these into one decodetree output file. */
10583 if (disas_a32_uncond(s, insn) ||
10584 disas_vfp_uncond(s, insn) ||
10585 disas_neon_dp(s, insn) ||
10586 disas_neon_ls(s, insn) ||
10587 disas_neon_shared(s, insn)) {
10588 return;
10590 /* fall back to legacy decoder */
10592 if (((insn >> 25) & 7) == 1) {
10593 /* NEON Data processing. */
10594 if (disas_neon_data_insn(s, insn)) {
10595 goto illegal_op;
10597 return;
10599 if ((insn & 0x0f100000) == 0x04000000) {
10600 /* NEON load/store. */
10601 if (disas_neon_ls_insn(s, insn)) {
10602 goto illegal_op;
10604 return;
10606 if ((insn & 0x0e000f00) == 0x0c000100) {
10607 if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
10608 /* iWMMXt register transfer. */
10609 if (extract32(s->c15_cpar, 1, 1)) {
10610 if (!disas_iwmmxt_insn(s, insn)) {
10611 return;
10616 goto illegal_op;
10618 if (cond != 0xe) {
10619 /* if not always execute, we generate a conditional jump to
10620 next instruction */
10621 arm_skip_unless(s, cond);
10624 /* TODO: Perhaps merge these into one decodetree output file. */
10625 if (disas_a32(s, insn) ||
10626 disas_vfp(s, insn)) {
10627 return;
10629 /* fall back to legacy decoder */
10631 switch ((insn >> 24) & 0xf) {
10632 case 0xc:
10633 case 0xd:
10634 case 0xe:
10635 if (((insn >> 8) & 0xe) == 10) {
10636 /* VFP, but failed disas_vfp. */
10637 goto illegal_op;
10639 if (disas_coproc_insn(s, insn)) {
10640 /* Coprocessor. */
10641 goto illegal_op;
10643 break;
10644 default:
10645 illegal_op:
10646 unallocated_encoding(s);
10647 break;
10651 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
10654 * Return true if this is a 16 bit instruction. We must be precise
10655 * about this (matching the decode).
10657 if ((insn >> 11) < 0x1d) {
10658 /* Definitely a 16-bit instruction */
10659 return true;
10662 /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
10663 * first half of a 32-bit Thumb insn. Thumb-1 cores might
10664 * end up actually treating this as two 16-bit insns, though,
10665 * if it's half of a bl/blx pair that might span a page boundary.
10667 if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
10668 arm_dc_feature(s, ARM_FEATURE_M)) {
10669 /* Thumb2 cores (including all M profile ones) always treat
10670 * 32-bit insns as 32-bit.
10672 return false;
10675 if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
10676 /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
10677 * is not on the next page; we merge this into a 32-bit
10678 * insn.
10680 return false;
10682 /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
10683 * 0b1111_1xxx_xxxx_xxxx : BL suffix;
10684 * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
10685 * -- handle as single 16 bit insn
10687 return true;
10690 /* Translate a 32-bit thumb instruction. */
10691 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
10694 * ARMv6-M supports a limited subset of Thumb2 instructions.
10695 * Other Thumb1 architectures allow only 32-bit
10696 * combined BL/BLX prefix and suffix.
10698 if (arm_dc_feature(s, ARM_FEATURE_M) &&
10699 !arm_dc_feature(s, ARM_FEATURE_V7)) {
10700 int i;
10701 bool found = false;
10702 static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
10703 0xf3b08040 /* dsb */,
10704 0xf3b08050 /* dmb */,
10705 0xf3b08060 /* isb */,
10706 0xf3e08000 /* mrs */,
10707 0xf000d000 /* bl */};
10708 static const uint32_t armv6m_mask[] = {0xffe0d000,
10709 0xfff0d0f0,
10710 0xfff0d0f0,
10711 0xfff0d0f0,
10712 0xffe0d000,
10713 0xf800d000};
10715 for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
10716 if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
10717 found = true;
10718 break;
10721 if (!found) {
10722 goto illegal_op;
10724 } else if ((insn & 0xf800e800) != 0xf000e800) {
10725 ARCH(6T2);
10728 if ((insn & 0xef000000) == 0xef000000) {
10730 * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
10731 * transform into
10732 * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
10734 uint32_t a32_insn = (insn & 0xe2ffffff) |
10735 ((insn & (1 << 28)) >> 4) | (1 << 28);
10737 if (disas_neon_dp(s, a32_insn)) {
10738 return;
10742 if ((insn & 0xff100000) == 0xf9000000) {
10744 * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
10745 * transform into
10746 * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
10748 uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
10750 if (disas_neon_ls(s, a32_insn)) {
10751 return;
10756 * TODO: Perhaps merge these into one decodetree output file.
10757 * Note disas_vfp is written for a32 with cond field in the
10758 * top nibble. The t32 encoding requires 0xe in the top nibble.
10760 if (disas_t32(s, insn) ||
10761 disas_vfp_uncond(s, insn) ||
10762 disas_neon_shared(s, insn) ||
10763 ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
10764 return;
10766 /* fall back to legacy decoder */
10768 switch ((insn >> 25) & 0xf) {
10769 case 0: case 1: case 2: case 3:
10770 /* 16-bit instructions. Should never happen. */
10771 abort();
10772 case 6: case 7: case 14: case 15:
10773 /* Coprocessor. */
10774 if (arm_dc_feature(s, ARM_FEATURE_M)) {
10775 /* 0b111x_11xx_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx */
10776 if (extract32(insn, 24, 2) == 3) {
10777 goto illegal_op; /* op0 = 0b11 : unallocated */
10780 if (((insn >> 8) & 0xe) == 10 &&
10781 dc_isar_feature(aa32_fpsp_v2, s)) {
10782 /* FP, and the CPU supports it */
10783 goto illegal_op;
10784 } else {
10785 /* All other insns: NOCP */
10786 gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
10787 syn_uncategorized(),
10788 default_exception_el(s));
10790 break;
10792 if (((insn >> 24) & 3) == 3) {
10793 /* Translate into the equivalent ARM encoding. */
10794 insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
10795 if (disas_neon_data_insn(s, insn)) {
10796 goto illegal_op;
10798 } else if (((insn >> 8) & 0xe) == 10) {
10799 /* VFP, but failed disas_vfp. */
10800 goto illegal_op;
10801 } else {
10802 if (insn & (1 << 28))
10803 goto illegal_op;
10804 if (disas_coproc_insn(s, insn)) {
10805 goto illegal_op;
10808 break;
10809 case 12:
10810 if ((insn & 0x01100000) == 0x01000000) {
10811 if (disas_neon_ls_insn(s, insn)) {
10812 goto illegal_op;
10814 break;
10816 goto illegal_op;
10817 default:
10818 illegal_op:
10819 unallocated_encoding(s);
10823 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
10825 if (!disas_t16(s, insn)) {
10826 unallocated_encoding(s);
10830 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
10832 /* Return true if the insn at dc->base.pc_next might cross a page boundary.
10833 * (False positives are OK, false negatives are not.)
10834 * We know this is a Thumb insn, and our caller ensures we are
10835 * only called if dc->base.pc_next is less than 4 bytes from the page
10836 * boundary, so we cross the page if the first 16 bits indicate
10837 * that this is a 32 bit insn.
10839 uint16_t insn = arm_lduw_code(env, s->base.pc_next, s->sctlr_b);
10841 return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
10844 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
10846 DisasContext *dc = container_of(dcbase, DisasContext, base);
10847 CPUARMState *env = cs->env_ptr;
10848 ARMCPU *cpu = env_archcpu(env);
10849 uint32_t tb_flags = dc->base.tb->flags;
10850 uint32_t condexec, core_mmu_idx;
10852 dc->isar = &cpu->isar;
10853 dc->condjmp = 0;
10855 dc->aarch64 = 0;
10856 /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
10857 * there is no secure EL1, so we route exceptions to EL3.
10859 dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
10860 !arm_el_is_aa64(env, 3);
10861 dc->thumb = FIELD_EX32(tb_flags, TBFLAG_AM32, THUMB);
10862 dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
10863 condexec = FIELD_EX32(tb_flags, TBFLAG_AM32, CONDEXEC);
10864 dc->condexec_mask = (condexec & 0xf) << 1;
10865 dc->condexec_cond = condexec >> 4;
10867 core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
10868 dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
10869 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
10870 #if !defined(CONFIG_USER_ONLY)
10871 dc->user = (dc->current_el == 0);
10872 #endif
10873 dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
10875 if (arm_feature(env, ARM_FEATURE_M)) {
10876 dc->vfp_enabled = 1;
10877 dc->be_data = MO_TE;
10878 dc->v7m_handler_mode = FIELD_EX32(tb_flags, TBFLAG_M32, HANDLER);
10879 dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
10880 regime_is_secure(env, dc->mmu_idx);
10881 dc->v8m_stackcheck = FIELD_EX32(tb_flags, TBFLAG_M32, STACKCHECK);
10882 dc->v8m_fpccr_s_wrong =
10883 FIELD_EX32(tb_flags, TBFLAG_M32, FPCCR_S_WRONG);
10884 dc->v7m_new_fp_ctxt_needed =
10885 FIELD_EX32(tb_flags, TBFLAG_M32, NEW_FP_CTXT_NEEDED);
10886 dc->v7m_lspact = FIELD_EX32(tb_flags, TBFLAG_M32, LSPACT);
10887 } else {
10888 dc->be_data =
10889 FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
10890 dc->debug_target_el =
10891 FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL);
10892 dc->sctlr_b = FIELD_EX32(tb_flags, TBFLAG_A32, SCTLR_B);
10893 dc->hstr_active = FIELD_EX32(tb_flags, TBFLAG_A32, HSTR_ACTIVE);
10894 dc->ns = FIELD_EX32(tb_flags, TBFLAG_A32, NS);
10895 dc->vfp_enabled = FIELD_EX32(tb_flags, TBFLAG_A32, VFPEN);
10896 if (arm_feature(env, ARM_FEATURE_XSCALE)) {
10897 dc->c15_cpar = FIELD_EX32(tb_flags, TBFLAG_A32, XSCALE_CPAR);
10898 } else {
10899 dc->vec_len = FIELD_EX32(tb_flags, TBFLAG_A32, VECLEN);
10900 dc->vec_stride = FIELD_EX32(tb_flags, TBFLAG_A32, VECSTRIDE);
10903 dc->cp_regs = cpu->cp_regs;
10904 dc->features = env->features;
10906 /* Single step state. The code-generation logic here is:
10907 * SS_ACTIVE == 0:
10908 * generate code with no special handling for single-stepping (except
10909 * that anything that can make us go to SS_ACTIVE == 1 must end the TB;
10910 * this happens anyway because those changes are all system register or
10911 * PSTATE writes).
10912 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
10913 * emit code for one insn
10914 * emit code to clear PSTATE.SS
10915 * emit code to generate software step exception for completed step
10916 * end TB (as usual for having generated an exception)
10917 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
10918 * emit code to generate a software step exception
10919 * end the TB
10921 dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
10922 dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
10923 dc->is_ldex = false;
10925 dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
10927 /* If architectural single step active, limit to 1. */
10928 if (is_singlestepping(dc)) {
10929 dc->base.max_insns = 1;
10932 /* ARM is a fixed-length ISA. Bound the number of insns to execute
10933 to those left on the page. */
10934 if (!dc->thumb) {
10935 int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
10936 dc->base.max_insns = MIN(dc->base.max_insns, bound);
10939 cpu_V0 = tcg_temp_new_i64();
10940 cpu_V1 = tcg_temp_new_i64();
10941 /* FIXME: cpu_M0 can probably be the same as cpu_V0. */
10942 cpu_M0 = tcg_temp_new_i64();
10945 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
10947 DisasContext *dc = container_of(dcbase, DisasContext, base);
10949 /* A note on handling of the condexec (IT) bits:
10951 * We want to avoid the overhead of having to write the updated condexec
10952 * bits back to the CPUARMState for every instruction in an IT block. So:
10953 * (1) if the condexec bits are not already zero then we write
10954 * zero back into the CPUARMState now. This avoids complications trying
10955 * to do it at the end of the block. (For example if we don't do this
10956 * it's hard to identify whether we can safely skip writing condexec
10957 * at the end of the TB, which we definitely want to do for the case
10958 * where a TB doesn't do anything with the IT state at all.)
10959 * (2) if we are going to leave the TB then we call gen_set_condexec()
10960 * which will write the correct value into CPUARMState if zero is wrong.
10961 * This is done both for leaving the TB at the end, and for leaving
10962 * it because of an exception we know will happen, which is done in
10963 * gen_exception_insn(). The latter is necessary because we need to
10964 * leave the TB with the PC/IT state just prior to execution of the
10965 * instruction which caused the exception.
10966 * (3) if we leave the TB unexpectedly (eg a data abort on a load)
10967 * then the CPUARMState will be wrong and we need to reset it.
10968 * This is handled in the same way as restoration of the
10969 * PC in these situations; we save the value of the condexec bits
10970 * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
10971 * then uses this to restore them after an exception.
10973 * Note that there are no instructions which can read the condexec
10974 * bits, and none which can write non-static values to them, so
10975 * we don't need to care about whether CPUARMState is correct in the
10976 * middle of a TB.
10979 /* Reset the conditional execution bits immediately. This avoids
10980 complications trying to do it at the end of the block. */
10981 if (dc->condexec_mask || dc->condexec_cond) {
10982 TCGv_i32 tmp = tcg_temp_new_i32();
10983 tcg_gen_movi_i32(tmp, 0);
10984 store_cpu_field(tmp, condexec_bits);
10988 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
10990 DisasContext *dc = container_of(dcbase, DisasContext, base);
10992 tcg_gen_insn_start(dc->base.pc_next,
10993 (dc->condexec_cond << 4) | (dc->condexec_mask >> 1),
10995 dc->insn_start = tcg_last_op();
10998 static bool arm_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
10999 const CPUBreakpoint *bp)
11001 DisasContext *dc = container_of(dcbase, DisasContext, base);
11003 if (bp->flags & BP_CPU) {
11004 gen_set_condexec(dc);
11005 gen_set_pc_im(dc, dc->base.pc_next);
11006 gen_helper_check_breakpoints(cpu_env);
11007 /* End the TB early; it's likely not going to be executed */
11008 dc->base.is_jmp = DISAS_TOO_MANY;
11009 } else {
11010 gen_exception_internal_insn(dc, dc->base.pc_next, EXCP_DEBUG);
11011 /* The address covered by the breakpoint must be
11012 included in [tb->pc, tb->pc + tb->size) in order
11013 to for it to be properly cleared -- thus we
11014 increment the PC here so that the logic setting
11015 tb->size below does the right thing. */
11016 /* TODO: Advance PC by correct instruction length to
11017 * avoid disassembler error messages */
11018 dc->base.pc_next += 2;
11019 dc->base.is_jmp = DISAS_NORETURN;
11022 return true;
11025 static bool arm_pre_translate_insn(DisasContext *dc)
11027 #ifdef CONFIG_USER_ONLY
11028 /* Intercept jump to the magic kernel page. */
11029 if (dc->base.pc_next >= 0xffff0000) {
11030 /* We always get here via a jump, so know we are not in a
11031 conditional execution block. */
11032 gen_exception_internal(EXCP_KERNEL_TRAP);
11033 dc->base.is_jmp = DISAS_NORETURN;
11034 return true;
11036 #endif
11038 if (dc->ss_active && !dc->pstate_ss) {
11039 /* Singlestep state is Active-pending.
11040 * If we're in this state at the start of a TB then either
11041 * a) we just took an exception to an EL which is being debugged
11042 * and this is the first insn in the exception handler
11043 * b) debug exceptions were masked and we just unmasked them
11044 * without changing EL (eg by clearing PSTATE.D)
11045 * In either case we're going to take a swstep exception in the
11046 * "did not step an insn" case, and so the syndrome ISV and EX
11047 * bits should be zero.
11049 assert(dc->base.num_insns == 1);
11050 gen_swstep_exception(dc, 0, 0);
11051 dc->base.is_jmp = DISAS_NORETURN;
11052 return true;
11055 return false;
11058 static void arm_post_translate_insn(DisasContext *dc)
11060 if (dc->condjmp && !dc->base.is_jmp) {
11061 gen_set_label(dc->condlabel);
11062 dc->condjmp = 0;
11064 translator_loop_temp_check(&dc->base);
11067 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
11069 DisasContext *dc = container_of(dcbase, DisasContext, base);
11070 CPUARMState *env = cpu->env_ptr;
11071 unsigned int insn;
11073 if (arm_pre_translate_insn(dc)) {
11074 return;
11077 dc->pc_curr = dc->base.pc_next;
11078 insn = arm_ldl_code(env, dc->base.pc_next, dc->sctlr_b);
11079 dc->insn = insn;
11080 dc->base.pc_next += 4;
11081 disas_arm_insn(dc, insn);
11083 arm_post_translate_insn(dc);
11085 /* ARM is a fixed-length ISA. We performed the cross-page check
11086 in init_disas_context by adjusting max_insns. */
11089 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
11091 /* Return true if this Thumb insn is always unconditional,
11092 * even inside an IT block. This is true of only a very few
11093 * instructions: BKPT, HLT, and SG.
11095 * A larger class of instructions are UNPREDICTABLE if used
11096 * inside an IT block; we do not need to detect those here, because
11097 * what we do by default (perform the cc check and update the IT
11098 * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
11099 * choice for those situations.
11101 * insn is either a 16-bit or a 32-bit instruction; the two are
11102 * distinguishable because for the 16-bit case the top 16 bits
11103 * are zeroes, and that isn't a valid 32-bit encoding.
11105 if ((insn & 0xffffff00) == 0xbe00) {
11106 /* BKPT */
11107 return true;
11110 if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
11111 !arm_dc_feature(s, ARM_FEATURE_M)) {
11112 /* HLT: v8A only. This is unconditional even when it is going to
11113 * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
11114 * For v7 cores this was a plain old undefined encoding and so
11115 * honours its cc check. (We might be using the encoding as
11116 * a semihosting trap, but we don't change the cc check behaviour
11117 * on that account, because a debugger connected to a real v7A
11118 * core and emulating semihosting traps by catching the UNDEF
11119 * exception would also only see cases where the cc check passed.
11120 * No guest code should be trying to do a HLT semihosting trap
11121 * in an IT block anyway.
11123 return true;
11126 if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
11127 arm_dc_feature(s, ARM_FEATURE_M)) {
11128 /* SG: v8M only */
11129 return true;
11132 return false;
11135 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
11137 DisasContext *dc = container_of(dcbase, DisasContext, base);
11138 CPUARMState *env = cpu->env_ptr;
11139 uint32_t insn;
11140 bool is_16bit;
11142 if (arm_pre_translate_insn(dc)) {
11143 return;
11146 dc->pc_curr = dc->base.pc_next;
11147 insn = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
11148 is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
11149 dc->base.pc_next += 2;
11150 if (!is_16bit) {
11151 uint32_t insn2 = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
11153 insn = insn << 16 | insn2;
11154 dc->base.pc_next += 2;
11156 dc->insn = insn;
11158 if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
11159 uint32_t cond = dc->condexec_cond;
11162 * Conditionally skip the insn. Note that both 0xe and 0xf mean
11163 * "always"; 0xf is not "never".
11165 if (cond < 0x0e) {
11166 arm_skip_unless(dc, cond);
11170 if (is_16bit) {
11171 disas_thumb_insn(dc, insn);
11172 } else {
11173 disas_thumb2_insn(dc, insn);
11176 /* Advance the Thumb condexec condition. */
11177 if (dc->condexec_mask) {
11178 dc->condexec_cond = ((dc->condexec_cond & 0xe) |
11179 ((dc->condexec_mask >> 4) & 1));
11180 dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
11181 if (dc->condexec_mask == 0) {
11182 dc->condexec_cond = 0;
11186 arm_post_translate_insn(dc);
11188 /* Thumb is a variable-length ISA. Stop translation when the next insn
11189 * will touch a new page. This ensures that prefetch aborts occur at
11190 * the right place.
11192 * We want to stop the TB if the next insn starts in a new page,
11193 * or if it spans between this page and the next. This means that
11194 * if we're looking at the last halfword in the page we need to
11195 * see if it's a 16-bit Thumb insn (which will fit in this TB)
11196 * or a 32-bit Thumb insn (which won't).
11197 * This is to avoid generating a silly TB with a single 16-bit insn
11198 * in it at the end of this page (which would execute correctly
11199 * but isn't very efficient).
11201 if (dc->base.is_jmp == DISAS_NEXT
11202 && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
11203 || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
11204 && insn_crosses_page(env, dc)))) {
11205 dc->base.is_jmp = DISAS_TOO_MANY;
11209 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
11211 DisasContext *dc = container_of(dcbase, DisasContext, base);
11213 if (tb_cflags(dc->base.tb) & CF_LAST_IO && dc->condjmp) {
11214 /* FIXME: This can theoretically happen with self-modifying code. */
11215 cpu_abort(cpu, "IO on conditional branch instruction");
11218 /* At this stage dc->condjmp will only be set when the skipped
11219 instruction was a conditional branch or trap, and the PC has
11220 already been written. */
11221 gen_set_condexec(dc);
11222 if (dc->base.is_jmp == DISAS_BX_EXCRET) {
11223 /* Exception return branches need some special case code at the
11224 * end of the TB, which is complex enough that it has to
11225 * handle the single-step vs not and the condition-failed
11226 * insn codepath itself.
11228 gen_bx_excret_final_code(dc);
11229 } else if (unlikely(is_singlestepping(dc))) {
11230 /* Unconditional and "condition passed" instruction codepath. */
11231 switch (dc->base.is_jmp) {
11232 case DISAS_SWI:
11233 gen_ss_advance(dc);
11234 gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11235 default_exception_el(dc));
11236 break;
11237 case DISAS_HVC:
11238 gen_ss_advance(dc);
11239 gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11240 break;
11241 case DISAS_SMC:
11242 gen_ss_advance(dc);
11243 gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11244 break;
11245 case DISAS_NEXT:
11246 case DISAS_TOO_MANY:
11247 case DISAS_UPDATE:
11248 gen_set_pc_im(dc, dc->base.pc_next);
11249 /* fall through */
11250 default:
11251 /* FIXME: Single stepping a WFI insn will not halt the CPU. */
11252 gen_singlestep_exception(dc);
11253 break;
11254 case DISAS_NORETURN:
11255 break;
11257 } else {
11258 /* While branches must always occur at the end of an IT block,
11259 there are a few other things that can cause us to terminate
11260 the TB in the middle of an IT block:
11261 - Exception generating instructions (bkpt, swi, undefined).
11262 - Page boundaries.
11263 - Hardware watchpoints.
11264 Hardware breakpoints have already been handled and skip this code.
11266 switch(dc->base.is_jmp) {
11267 case DISAS_NEXT:
11268 case DISAS_TOO_MANY:
11269 gen_goto_tb(dc, 1, dc->base.pc_next);
11270 break;
11271 case DISAS_JUMP:
11272 gen_goto_ptr();
11273 break;
11274 case DISAS_UPDATE:
11275 gen_set_pc_im(dc, dc->base.pc_next);
11276 /* fall through */
11277 default:
11278 /* indicate that the hash table must be used to find the next TB */
11279 tcg_gen_exit_tb(NULL, 0);
11280 break;
11281 case DISAS_NORETURN:
11282 /* nothing more to generate */
11283 break;
11284 case DISAS_WFI:
11286 TCGv_i32 tmp = tcg_const_i32((dc->thumb &&
11287 !(dc->insn & (1U << 31))) ? 2 : 4);
11289 gen_helper_wfi(cpu_env, tmp);
11290 tcg_temp_free_i32(tmp);
11291 /* The helper doesn't necessarily throw an exception, but we
11292 * must go back to the main loop to check for interrupts anyway.
11294 tcg_gen_exit_tb(NULL, 0);
11295 break;
11297 case DISAS_WFE:
11298 gen_helper_wfe(cpu_env);
11299 break;
11300 case DISAS_YIELD:
11301 gen_helper_yield(cpu_env);
11302 break;
11303 case DISAS_SWI:
11304 gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11305 default_exception_el(dc));
11306 break;
11307 case DISAS_HVC:
11308 gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11309 break;
11310 case DISAS_SMC:
11311 gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11312 break;
11316 if (dc->condjmp) {
11317 /* "Condition failed" instruction codepath for the branch/trap insn */
11318 gen_set_label(dc->condlabel);
11319 gen_set_condexec(dc);
11320 if (unlikely(is_singlestepping(dc))) {
11321 gen_set_pc_im(dc, dc->base.pc_next);
11322 gen_singlestep_exception(dc);
11323 } else {
11324 gen_goto_tb(dc, 1, dc->base.pc_next);
11329 static void arm_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
11331 DisasContext *dc = container_of(dcbase, DisasContext, base);
11333 qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
11334 log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
11337 static const TranslatorOps arm_translator_ops = {
11338 .init_disas_context = arm_tr_init_disas_context,
11339 .tb_start = arm_tr_tb_start,
11340 .insn_start = arm_tr_insn_start,
11341 .breakpoint_check = arm_tr_breakpoint_check,
11342 .translate_insn = arm_tr_translate_insn,
11343 .tb_stop = arm_tr_tb_stop,
11344 .disas_log = arm_tr_disas_log,
11347 static const TranslatorOps thumb_translator_ops = {
11348 .init_disas_context = arm_tr_init_disas_context,
11349 .tb_start = arm_tr_tb_start,
11350 .insn_start = arm_tr_insn_start,
11351 .breakpoint_check = arm_tr_breakpoint_check,
11352 .translate_insn = thumb_tr_translate_insn,
11353 .tb_stop = arm_tr_tb_stop,
11354 .disas_log = arm_tr_disas_log,
11357 /* generate intermediate code for basic block 'tb'. */
11358 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
11360 DisasContext dc = { };
11361 const TranslatorOps *ops = &arm_translator_ops;
11363 if (FIELD_EX32(tb->flags, TBFLAG_AM32, THUMB)) {
11364 ops = &thumb_translator_ops;
11366 #ifdef TARGET_AARCH64
11367 if (FIELD_EX32(tb->flags, TBFLAG_ANY, AARCH64_STATE)) {
11368 ops = &aarch64_translator_ops;
11370 #endif
11372 translator_loop(ops, &dc.base, cpu, tb, max_insns);
11375 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
11376 target_ulong *data)
11378 if (is_a64(env)) {
11379 env->pc = data[0];
11380 env->condexec_bits = 0;
11381 env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
11382 } else {
11383 env->regs[15] = data[0];
11384 env->condexec_bits = data[1];
11385 env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;