target/arm: Convert Neon 3-reg-diff long multiplies
[qemu/ar7.git] / target / arm / translate.c
bloba2c47d19f21f920db74ad68c0f6ba77acf408644
1 /*
2 * ARM translation
4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21 #include "qemu/osdep.h"
23 #include "cpu.h"
24 #include "internals.h"
25 #include "disas/disas.h"
26 #include "exec/exec-all.h"
27 #include "tcg/tcg-op.h"
28 #include "tcg/tcg-op-gvec.h"
29 #include "qemu/log.h"
30 #include "qemu/bitops.h"
31 #include "arm_ldst.h"
32 #include "hw/semihosting/semihost.h"
34 #include "exec/helper-proto.h"
35 #include "exec/helper-gen.h"
37 #include "trace-tcg.h"
38 #include "exec/log.h"
41 #define ENABLE_ARCH_4T arm_dc_feature(s, ARM_FEATURE_V4T)
42 #define ENABLE_ARCH_5 arm_dc_feature(s, ARM_FEATURE_V5)
43 /* currently all emulated v5 cores are also v5TE, so don't bother */
44 #define ENABLE_ARCH_5TE arm_dc_feature(s, ARM_FEATURE_V5)
45 #define ENABLE_ARCH_5J dc_isar_feature(aa32_jazelle, s)
46 #define ENABLE_ARCH_6 arm_dc_feature(s, ARM_FEATURE_V6)
47 #define ENABLE_ARCH_6K arm_dc_feature(s, ARM_FEATURE_V6K)
48 #define ENABLE_ARCH_6T2 arm_dc_feature(s, ARM_FEATURE_THUMB2)
49 #define ENABLE_ARCH_7 arm_dc_feature(s, ARM_FEATURE_V7)
50 #define ENABLE_ARCH_8 arm_dc_feature(s, ARM_FEATURE_V8)
52 #define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
54 #include "translate.h"
56 #if defined(CONFIG_USER_ONLY)
57 #define IS_USER(s) 1
58 #else
59 #define IS_USER(s) (s->user)
60 #endif
62 /* We reuse the same 64-bit temporaries for efficiency. */
63 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
64 static TCGv_i32 cpu_R[16];
65 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
66 TCGv_i64 cpu_exclusive_addr;
67 TCGv_i64 cpu_exclusive_val;
69 #include "exec/gen-icount.h"
71 static const char * const regnames[] =
72 { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
73 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
75 /* Function prototypes for gen_ functions calling Neon helpers. */
76 typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
77 TCGv_i32, TCGv_i32);
78 /* Function prototypes for gen_ functions for fix point conversions */
79 typedef void VFPGenFixPointFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
81 /* initialize TCG globals. */
82 void arm_translate_init(void)
84 int i;
86 for (i = 0; i < 16; i++) {
87 cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
88 offsetof(CPUARMState, regs[i]),
89 regnames[i]);
91 cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
92 cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
93 cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
94 cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
96 cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
97 offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
98 cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
99 offsetof(CPUARMState, exclusive_val), "exclusive_val");
101 a64_translate_init();
104 /* Flags for the disas_set_da_iss info argument:
105 * lower bits hold the Rt register number, higher bits are flags.
107 typedef enum ISSInfo {
108 ISSNone = 0,
109 ISSRegMask = 0x1f,
110 ISSInvalid = (1 << 5),
111 ISSIsAcqRel = (1 << 6),
112 ISSIsWrite = (1 << 7),
113 ISSIs16Bit = (1 << 8),
114 } ISSInfo;
116 /* Save the syndrome information for a Data Abort */
117 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
119 uint32_t syn;
120 int sas = memop & MO_SIZE;
121 bool sse = memop & MO_SIGN;
122 bool is_acqrel = issinfo & ISSIsAcqRel;
123 bool is_write = issinfo & ISSIsWrite;
124 bool is_16bit = issinfo & ISSIs16Bit;
125 int srt = issinfo & ISSRegMask;
127 if (issinfo & ISSInvalid) {
128 /* Some callsites want to conditionally provide ISS info,
129 * eg "only if this was not a writeback"
131 return;
134 if (srt == 15) {
135 /* For AArch32, insns where the src/dest is R15 never generate
136 * ISS information. Catching that here saves checking at all
137 * the call sites.
139 return;
142 syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
143 0, 0, 0, is_write, 0, is_16bit);
144 disas_set_insn_syndrome(s, syn);
147 static inline int get_a32_user_mem_index(DisasContext *s)
149 /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
150 * insns:
151 * if PL2, UNPREDICTABLE (we choose to implement as if PL0)
152 * otherwise, access as if at PL0.
154 switch (s->mmu_idx) {
155 case ARMMMUIdx_E2: /* this one is UNPREDICTABLE */
156 case ARMMMUIdx_E10_0:
157 case ARMMMUIdx_E10_1:
158 case ARMMMUIdx_E10_1_PAN:
159 return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
160 case ARMMMUIdx_SE3:
161 case ARMMMUIdx_SE10_0:
162 case ARMMMUIdx_SE10_1:
163 case ARMMMUIdx_SE10_1_PAN:
164 return arm_to_core_mmu_idx(ARMMMUIdx_SE10_0);
165 case ARMMMUIdx_MUser:
166 case ARMMMUIdx_MPriv:
167 return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
168 case ARMMMUIdx_MUserNegPri:
169 case ARMMMUIdx_MPrivNegPri:
170 return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
171 case ARMMMUIdx_MSUser:
172 case ARMMMUIdx_MSPriv:
173 return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
174 case ARMMMUIdx_MSUserNegPri:
175 case ARMMMUIdx_MSPrivNegPri:
176 return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
177 default:
178 g_assert_not_reached();
182 static inline TCGv_i32 load_cpu_offset(int offset)
184 TCGv_i32 tmp = tcg_temp_new_i32();
185 tcg_gen_ld_i32(tmp, cpu_env, offset);
186 return tmp;
189 #define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
191 static inline void store_cpu_offset(TCGv_i32 var, int offset)
193 tcg_gen_st_i32(var, cpu_env, offset);
194 tcg_temp_free_i32(var);
197 #define store_cpu_field(var, name) \
198 store_cpu_offset(var, offsetof(CPUARMState, name))
200 /* The architectural value of PC. */
201 static uint32_t read_pc(DisasContext *s)
203 return s->pc_curr + (s->thumb ? 4 : 8);
206 /* Set a variable to the value of a CPU register. */
207 static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
209 if (reg == 15) {
210 tcg_gen_movi_i32(var, read_pc(s));
211 } else {
212 tcg_gen_mov_i32(var, cpu_R[reg]);
216 /* Create a new temporary and set it to the value of a CPU register. */
217 static inline TCGv_i32 load_reg(DisasContext *s, int reg)
219 TCGv_i32 tmp = tcg_temp_new_i32();
220 load_reg_var(s, tmp, reg);
221 return tmp;
225 * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
226 * This is used for load/store for which use of PC implies (literal),
227 * or ADD that implies ADR.
229 static TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
231 TCGv_i32 tmp = tcg_temp_new_i32();
233 if (reg == 15) {
234 tcg_gen_movi_i32(tmp, (read_pc(s) & ~3) + ofs);
235 } else {
236 tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
238 return tmp;
241 /* Set a CPU register. The source must be a temporary and will be
242 marked as dead. */
243 static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
245 if (reg == 15) {
246 /* In Thumb mode, we must ignore bit 0.
247 * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
248 * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
249 * We choose to ignore [1:0] in ARM mode for all architecture versions.
251 tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
252 s->base.is_jmp = DISAS_JUMP;
254 tcg_gen_mov_i32(cpu_R[reg], var);
255 tcg_temp_free_i32(var);
259 * Variant of store_reg which applies v8M stack-limit checks before updating
260 * SP. If the check fails this will result in an exception being taken.
261 * We disable the stack checks for CONFIG_USER_ONLY because we have
262 * no idea what the stack limits should be in that case.
263 * If stack checking is not being done this just acts like store_reg().
265 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
267 #ifndef CONFIG_USER_ONLY
268 if (s->v8m_stackcheck) {
269 gen_helper_v8m_stackcheck(cpu_env, var);
271 #endif
272 store_reg(s, 13, var);
275 /* Value extensions. */
276 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
277 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
278 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
279 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
281 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
282 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
285 static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
287 TCGv_i32 tmp_mask = tcg_const_i32(mask);
288 gen_helper_cpsr_write(cpu_env, var, tmp_mask);
289 tcg_temp_free_i32(tmp_mask);
291 /* Set NZCV flags from the high 4 bits of var. */
292 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
294 static void gen_exception_internal(int excp)
296 TCGv_i32 tcg_excp = tcg_const_i32(excp);
298 assert(excp_is_internal(excp));
299 gen_helper_exception_internal(cpu_env, tcg_excp);
300 tcg_temp_free_i32(tcg_excp);
303 static void gen_step_complete_exception(DisasContext *s)
305 /* We just completed step of an insn. Move from Active-not-pending
306 * to Active-pending, and then also take the swstep exception.
307 * This corresponds to making the (IMPDEF) choice to prioritize
308 * swstep exceptions over asynchronous exceptions taken to an exception
309 * level where debug is disabled. This choice has the advantage that
310 * we do not need to maintain internal state corresponding to the
311 * ISV/EX syndrome bits between completion of the step and generation
312 * of the exception, and our syndrome information is always correct.
314 gen_ss_advance(s);
315 gen_swstep_exception(s, 1, s->is_ldex);
316 s->base.is_jmp = DISAS_NORETURN;
319 static void gen_singlestep_exception(DisasContext *s)
321 /* Generate the right kind of exception for singlestep, which is
322 * either the architectural singlestep or EXCP_DEBUG for QEMU's
323 * gdb singlestepping.
325 if (s->ss_active) {
326 gen_step_complete_exception(s);
327 } else {
328 gen_exception_internal(EXCP_DEBUG);
332 static inline bool is_singlestepping(DisasContext *s)
334 /* Return true if we are singlestepping either because of
335 * architectural singlestep or QEMU gdbstub singlestep. This does
336 * not include the command line '-singlestep' mode which is rather
337 * misnamed as it only means "one instruction per TB" and doesn't
338 * affect the code we generate.
340 return s->base.singlestep_enabled || s->ss_active;
343 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
345 TCGv_i32 tmp1 = tcg_temp_new_i32();
346 TCGv_i32 tmp2 = tcg_temp_new_i32();
347 tcg_gen_ext16s_i32(tmp1, a);
348 tcg_gen_ext16s_i32(tmp2, b);
349 tcg_gen_mul_i32(tmp1, tmp1, tmp2);
350 tcg_temp_free_i32(tmp2);
351 tcg_gen_sari_i32(a, a, 16);
352 tcg_gen_sari_i32(b, b, 16);
353 tcg_gen_mul_i32(b, b, a);
354 tcg_gen_mov_i32(a, tmp1);
355 tcg_temp_free_i32(tmp1);
358 /* Byteswap each halfword. */
359 static void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
361 TCGv_i32 tmp = tcg_temp_new_i32();
362 TCGv_i32 mask = tcg_const_i32(0x00ff00ff);
363 tcg_gen_shri_i32(tmp, var, 8);
364 tcg_gen_and_i32(tmp, tmp, mask);
365 tcg_gen_and_i32(var, var, mask);
366 tcg_gen_shli_i32(var, var, 8);
367 tcg_gen_or_i32(dest, var, tmp);
368 tcg_temp_free_i32(mask);
369 tcg_temp_free_i32(tmp);
372 /* Byteswap low halfword and sign extend. */
373 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
375 tcg_gen_ext16u_i32(var, var);
376 tcg_gen_bswap16_i32(var, var);
377 tcg_gen_ext16s_i32(dest, var);
380 /* 32x32->64 multiply. Marks inputs as dead. */
381 static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
383 TCGv_i32 lo = tcg_temp_new_i32();
384 TCGv_i32 hi = tcg_temp_new_i32();
385 TCGv_i64 ret;
387 tcg_gen_mulu2_i32(lo, hi, a, b);
388 tcg_temp_free_i32(a);
389 tcg_temp_free_i32(b);
391 ret = tcg_temp_new_i64();
392 tcg_gen_concat_i32_i64(ret, lo, hi);
393 tcg_temp_free_i32(lo);
394 tcg_temp_free_i32(hi);
396 return ret;
399 static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
401 TCGv_i32 lo = tcg_temp_new_i32();
402 TCGv_i32 hi = tcg_temp_new_i32();
403 TCGv_i64 ret;
405 tcg_gen_muls2_i32(lo, hi, a, b);
406 tcg_temp_free_i32(a);
407 tcg_temp_free_i32(b);
409 ret = tcg_temp_new_i64();
410 tcg_gen_concat_i32_i64(ret, lo, hi);
411 tcg_temp_free_i32(lo);
412 tcg_temp_free_i32(hi);
414 return ret;
417 /* Swap low and high halfwords. */
418 static void gen_swap_half(TCGv_i32 var)
420 tcg_gen_rotri_i32(var, var, 16);
423 /* Dual 16-bit add. Result placed in t0 and t1 is marked as dead.
424 tmp = (t0 ^ t1) & 0x8000;
425 t0 &= ~0x8000;
426 t1 &= ~0x8000;
427 t0 = (t0 + t1) ^ tmp;
430 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
432 TCGv_i32 tmp = tcg_temp_new_i32();
433 tcg_gen_xor_i32(tmp, t0, t1);
434 tcg_gen_andi_i32(tmp, tmp, 0x8000);
435 tcg_gen_andi_i32(t0, t0, ~0x8000);
436 tcg_gen_andi_i32(t1, t1, ~0x8000);
437 tcg_gen_add_i32(t0, t0, t1);
438 tcg_gen_xor_i32(dest, t0, tmp);
439 tcg_temp_free_i32(tmp);
442 /* Set N and Z flags from var. */
443 static inline void gen_logic_CC(TCGv_i32 var)
445 tcg_gen_mov_i32(cpu_NF, var);
446 tcg_gen_mov_i32(cpu_ZF, var);
449 /* dest = T0 + T1 + CF. */
450 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
452 tcg_gen_add_i32(dest, t0, t1);
453 tcg_gen_add_i32(dest, dest, cpu_CF);
456 /* dest = T0 - T1 + CF - 1. */
457 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
459 tcg_gen_sub_i32(dest, t0, t1);
460 tcg_gen_add_i32(dest, dest, cpu_CF);
461 tcg_gen_subi_i32(dest, dest, 1);
464 /* dest = T0 + T1. Compute C, N, V and Z flags */
465 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
467 TCGv_i32 tmp = tcg_temp_new_i32();
468 tcg_gen_movi_i32(tmp, 0);
469 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
470 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
471 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
472 tcg_gen_xor_i32(tmp, t0, t1);
473 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
474 tcg_temp_free_i32(tmp);
475 tcg_gen_mov_i32(dest, cpu_NF);
478 /* dest = T0 + T1 + CF. Compute C, N, V and Z flags */
479 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
481 TCGv_i32 tmp = tcg_temp_new_i32();
482 if (TCG_TARGET_HAS_add2_i32) {
483 tcg_gen_movi_i32(tmp, 0);
484 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
485 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
486 } else {
487 TCGv_i64 q0 = tcg_temp_new_i64();
488 TCGv_i64 q1 = tcg_temp_new_i64();
489 tcg_gen_extu_i32_i64(q0, t0);
490 tcg_gen_extu_i32_i64(q1, t1);
491 tcg_gen_add_i64(q0, q0, q1);
492 tcg_gen_extu_i32_i64(q1, cpu_CF);
493 tcg_gen_add_i64(q0, q0, q1);
494 tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
495 tcg_temp_free_i64(q0);
496 tcg_temp_free_i64(q1);
498 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
499 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
500 tcg_gen_xor_i32(tmp, t0, t1);
501 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
502 tcg_temp_free_i32(tmp);
503 tcg_gen_mov_i32(dest, cpu_NF);
506 /* dest = T0 - T1. Compute C, N, V and Z flags */
507 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
509 TCGv_i32 tmp;
510 tcg_gen_sub_i32(cpu_NF, t0, t1);
511 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
512 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
513 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
514 tmp = tcg_temp_new_i32();
515 tcg_gen_xor_i32(tmp, t0, t1);
516 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
517 tcg_temp_free_i32(tmp);
518 tcg_gen_mov_i32(dest, cpu_NF);
521 /* dest = T0 + ~T1 + CF. Compute C, N, V and Z flags */
522 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
524 TCGv_i32 tmp = tcg_temp_new_i32();
525 tcg_gen_not_i32(tmp, t1);
526 gen_adc_CC(dest, t0, tmp);
527 tcg_temp_free_i32(tmp);
530 #define GEN_SHIFT(name) \
531 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) \
533 TCGv_i32 tmp1, tmp2, tmp3; \
534 tmp1 = tcg_temp_new_i32(); \
535 tcg_gen_andi_i32(tmp1, t1, 0xff); \
536 tmp2 = tcg_const_i32(0); \
537 tmp3 = tcg_const_i32(0x1f); \
538 tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0); \
539 tcg_temp_free_i32(tmp3); \
540 tcg_gen_andi_i32(tmp1, tmp1, 0x1f); \
541 tcg_gen_##name##_i32(dest, tmp2, tmp1); \
542 tcg_temp_free_i32(tmp2); \
543 tcg_temp_free_i32(tmp1); \
545 GEN_SHIFT(shl)
546 GEN_SHIFT(shr)
547 #undef GEN_SHIFT
549 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
551 TCGv_i32 tmp1, tmp2;
552 tmp1 = tcg_temp_new_i32();
553 tcg_gen_andi_i32(tmp1, t1, 0xff);
554 tmp2 = tcg_const_i32(0x1f);
555 tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
556 tcg_temp_free_i32(tmp2);
557 tcg_gen_sar_i32(dest, t0, tmp1);
558 tcg_temp_free_i32(tmp1);
561 static void shifter_out_im(TCGv_i32 var, int shift)
563 tcg_gen_extract_i32(cpu_CF, var, shift, 1);
566 /* Shift by immediate. Includes special handling for shift == 0. */
567 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
568 int shift, int flags)
570 switch (shiftop) {
571 case 0: /* LSL */
572 if (shift != 0) {
573 if (flags)
574 shifter_out_im(var, 32 - shift);
575 tcg_gen_shli_i32(var, var, shift);
577 break;
578 case 1: /* LSR */
579 if (shift == 0) {
580 if (flags) {
581 tcg_gen_shri_i32(cpu_CF, var, 31);
583 tcg_gen_movi_i32(var, 0);
584 } else {
585 if (flags)
586 shifter_out_im(var, shift - 1);
587 tcg_gen_shri_i32(var, var, shift);
589 break;
590 case 2: /* ASR */
591 if (shift == 0)
592 shift = 32;
593 if (flags)
594 shifter_out_im(var, shift - 1);
595 if (shift == 32)
596 shift = 31;
597 tcg_gen_sari_i32(var, var, shift);
598 break;
599 case 3: /* ROR/RRX */
600 if (shift != 0) {
601 if (flags)
602 shifter_out_im(var, shift - 1);
603 tcg_gen_rotri_i32(var, var, shift); break;
604 } else {
605 TCGv_i32 tmp = tcg_temp_new_i32();
606 tcg_gen_shli_i32(tmp, cpu_CF, 31);
607 if (flags)
608 shifter_out_im(var, 0);
609 tcg_gen_shri_i32(var, var, 1);
610 tcg_gen_or_i32(var, var, tmp);
611 tcg_temp_free_i32(tmp);
616 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
617 TCGv_i32 shift, int flags)
619 if (flags) {
620 switch (shiftop) {
621 case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
622 case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
623 case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
624 case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
626 } else {
627 switch (shiftop) {
628 case 0:
629 gen_shl(var, var, shift);
630 break;
631 case 1:
632 gen_shr(var, var, shift);
633 break;
634 case 2:
635 gen_sar(var, var, shift);
636 break;
637 case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
638 tcg_gen_rotr_i32(var, var, shift); break;
641 tcg_temp_free_i32(shift);
645 * Generate a conditional based on ARM condition code cc.
646 * This is common between ARM and Aarch64 targets.
648 void arm_test_cc(DisasCompare *cmp, int cc)
650 TCGv_i32 value;
651 TCGCond cond;
652 bool global = true;
654 switch (cc) {
655 case 0: /* eq: Z */
656 case 1: /* ne: !Z */
657 cond = TCG_COND_EQ;
658 value = cpu_ZF;
659 break;
661 case 2: /* cs: C */
662 case 3: /* cc: !C */
663 cond = TCG_COND_NE;
664 value = cpu_CF;
665 break;
667 case 4: /* mi: N */
668 case 5: /* pl: !N */
669 cond = TCG_COND_LT;
670 value = cpu_NF;
671 break;
673 case 6: /* vs: V */
674 case 7: /* vc: !V */
675 cond = TCG_COND_LT;
676 value = cpu_VF;
677 break;
679 case 8: /* hi: C && !Z */
680 case 9: /* ls: !C || Z -> !(C && !Z) */
681 cond = TCG_COND_NE;
682 value = tcg_temp_new_i32();
683 global = false;
684 /* CF is 1 for C, so -CF is an all-bits-set mask for C;
685 ZF is non-zero for !Z; so AND the two subexpressions. */
686 tcg_gen_neg_i32(value, cpu_CF);
687 tcg_gen_and_i32(value, value, cpu_ZF);
688 break;
690 case 10: /* ge: N == V -> N ^ V == 0 */
691 case 11: /* lt: N != V -> N ^ V != 0 */
692 /* Since we're only interested in the sign bit, == 0 is >= 0. */
693 cond = TCG_COND_GE;
694 value = tcg_temp_new_i32();
695 global = false;
696 tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
697 break;
699 case 12: /* gt: !Z && N == V */
700 case 13: /* le: Z || N != V */
701 cond = TCG_COND_NE;
702 value = tcg_temp_new_i32();
703 global = false;
704 /* (N == V) is equal to the sign bit of ~(NF ^ VF). Propagate
705 * the sign bit then AND with ZF to yield the result. */
706 tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
707 tcg_gen_sari_i32(value, value, 31);
708 tcg_gen_andc_i32(value, cpu_ZF, value);
709 break;
711 case 14: /* always */
712 case 15: /* always */
713 /* Use the ALWAYS condition, which will fold early.
714 * It doesn't matter what we use for the value. */
715 cond = TCG_COND_ALWAYS;
716 value = cpu_ZF;
717 goto no_invert;
719 default:
720 fprintf(stderr, "Bad condition code 0x%x\n", cc);
721 abort();
724 if (cc & 1) {
725 cond = tcg_invert_cond(cond);
728 no_invert:
729 cmp->cond = cond;
730 cmp->value = value;
731 cmp->value_global = global;
734 void arm_free_cc(DisasCompare *cmp)
736 if (!cmp->value_global) {
737 tcg_temp_free_i32(cmp->value);
741 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
743 tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
746 void arm_gen_test_cc(int cc, TCGLabel *label)
748 DisasCompare cmp;
749 arm_test_cc(&cmp, cc);
750 arm_jump_cc(&cmp, label);
751 arm_free_cc(&cmp);
754 static inline void gen_set_condexec(DisasContext *s)
756 if (s->condexec_mask) {
757 uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
758 TCGv_i32 tmp = tcg_temp_new_i32();
759 tcg_gen_movi_i32(tmp, val);
760 store_cpu_field(tmp, condexec_bits);
764 static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
766 tcg_gen_movi_i32(cpu_R[15], val);
769 /* Set PC and Thumb state from var. var is marked as dead. */
770 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
772 s->base.is_jmp = DISAS_JUMP;
773 tcg_gen_andi_i32(cpu_R[15], var, ~1);
774 tcg_gen_andi_i32(var, var, 1);
775 store_cpu_field(var, thumb);
779 * Set PC and Thumb state from var. var is marked as dead.
780 * For M-profile CPUs, include logic to detect exception-return
781 * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
782 * and BX reg, and no others, and happens only for code in Handler mode.
783 * The Security Extension also requires us to check for the FNC_RETURN
784 * which signals a function return from non-secure state; this can happen
785 * in both Handler and Thread mode.
786 * To avoid having to do multiple comparisons in inline generated code,
787 * we make the check we do here loose, so it will match for EXC_RETURN
788 * in Thread mode. For system emulation do_v7m_exception_exit() checks
789 * for these spurious cases and returns without doing anything (giving
790 * the same behaviour as for a branch to a non-magic address).
792 * In linux-user mode it is unclear what the right behaviour for an
793 * attempted FNC_RETURN should be, because in real hardware this will go
794 * directly to Secure code (ie not the Linux kernel) which will then treat
795 * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
796 * attempt behave the way it would on a CPU without the security extension,
797 * which is to say "like a normal branch". That means we can simply treat
798 * all branches as normal with no magic address behaviour.
800 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
802 /* Generate the same code here as for a simple bx, but flag via
803 * s->base.is_jmp that we need to do the rest of the work later.
805 gen_bx(s, var);
806 #ifndef CONFIG_USER_ONLY
807 if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
808 (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
809 s->base.is_jmp = DISAS_BX_EXCRET;
811 #endif
814 static inline void gen_bx_excret_final_code(DisasContext *s)
816 /* Generate the code to finish possible exception return and end the TB */
817 TCGLabel *excret_label = gen_new_label();
818 uint32_t min_magic;
820 if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
821 /* Covers FNC_RETURN and EXC_RETURN magic */
822 min_magic = FNC_RETURN_MIN_MAGIC;
823 } else {
824 /* EXC_RETURN magic only */
825 min_magic = EXC_RETURN_MIN_MAGIC;
828 /* Is the new PC value in the magic range indicating exception return? */
829 tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
830 /* No: end the TB as we would for a DISAS_JMP */
831 if (is_singlestepping(s)) {
832 gen_singlestep_exception(s);
833 } else {
834 tcg_gen_exit_tb(NULL, 0);
836 gen_set_label(excret_label);
837 /* Yes: this is an exception return.
838 * At this point in runtime env->regs[15] and env->thumb will hold
839 * the exception-return magic number, which do_v7m_exception_exit()
840 * will read. Nothing else will be able to see those values because
841 * the cpu-exec main loop guarantees that we will always go straight
842 * from raising the exception to the exception-handling code.
844 * gen_ss_advance(s) does nothing on M profile currently but
845 * calling it is conceptually the right thing as we have executed
846 * this instruction (compare SWI, HVC, SMC handling).
848 gen_ss_advance(s);
849 gen_exception_internal(EXCP_EXCEPTION_EXIT);
852 static inline void gen_bxns(DisasContext *s, int rm)
854 TCGv_i32 var = load_reg(s, rm);
856 /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
857 * we need to sync state before calling it, but:
858 * - we don't need to do gen_set_pc_im() because the bxns helper will
859 * always set the PC itself
860 * - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
861 * unless it's outside an IT block or the last insn in an IT block,
862 * so we know that condexec == 0 (already set at the top of the TB)
863 * is correct in the non-UNPREDICTABLE cases, and we can choose
864 * "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
866 gen_helper_v7m_bxns(cpu_env, var);
867 tcg_temp_free_i32(var);
868 s->base.is_jmp = DISAS_EXIT;
871 static inline void gen_blxns(DisasContext *s, int rm)
873 TCGv_i32 var = load_reg(s, rm);
875 /* We don't need to sync condexec state, for the same reason as bxns.
876 * We do however need to set the PC, because the blxns helper reads it.
877 * The blxns helper may throw an exception.
879 gen_set_pc_im(s, s->base.pc_next);
880 gen_helper_v7m_blxns(cpu_env, var);
881 tcg_temp_free_i32(var);
882 s->base.is_jmp = DISAS_EXIT;
885 /* Variant of store_reg which uses branch&exchange logic when storing
886 to r15 in ARM architecture v7 and above. The source must be a temporary
887 and will be marked as dead. */
888 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
890 if (reg == 15 && ENABLE_ARCH_7) {
891 gen_bx(s, var);
892 } else {
893 store_reg(s, reg, var);
897 /* Variant of store_reg which uses branch&exchange logic when storing
898 * to r15 in ARM architecture v5T and above. This is used for storing
899 * the results of a LDR/LDM/POP into r15, and corresponds to the cases
900 * in the ARM ARM which use the LoadWritePC() pseudocode function. */
901 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
903 if (reg == 15 && ENABLE_ARCH_5) {
904 gen_bx_excret(s, var);
905 } else {
906 store_reg(s, reg, var);
910 #ifdef CONFIG_USER_ONLY
911 #define IS_USER_ONLY 1
912 #else
913 #define IS_USER_ONLY 0
914 #endif
916 /* Abstractions of "generate code to do a guest load/store for
917 * AArch32", where a vaddr is always 32 bits (and is zero
918 * extended if we're a 64 bit core) and data is also
919 * 32 bits unless specifically doing a 64 bit access.
920 * These functions work like tcg_gen_qemu_{ld,st}* except
921 * that the address argument is TCGv_i32 rather than TCGv.
924 static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
926 TCGv addr = tcg_temp_new();
927 tcg_gen_extu_i32_tl(addr, a32);
929 /* Not needed for user-mode BE32, where we use MO_BE instead. */
930 if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
931 tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
933 return addr;
936 static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
937 int index, MemOp opc)
939 TCGv addr;
941 if (arm_dc_feature(s, ARM_FEATURE_M) &&
942 !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
943 opc |= MO_ALIGN;
946 addr = gen_aa32_addr(s, a32, opc);
947 tcg_gen_qemu_ld_i32(val, addr, index, opc);
948 tcg_temp_free(addr);
951 static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
952 int index, MemOp opc)
954 TCGv addr;
956 if (arm_dc_feature(s, ARM_FEATURE_M) &&
957 !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
958 opc |= MO_ALIGN;
961 addr = gen_aa32_addr(s, a32, opc);
962 tcg_gen_qemu_st_i32(val, addr, index, opc);
963 tcg_temp_free(addr);
966 #define DO_GEN_LD(SUFF, OPC) \
967 static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
968 TCGv_i32 a32, int index) \
970 gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data); \
973 #define DO_GEN_ST(SUFF, OPC) \
974 static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
975 TCGv_i32 a32, int index) \
977 gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data); \
980 static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
982 /* Not needed for user-mode BE32, where we use MO_BE instead. */
983 if (!IS_USER_ONLY && s->sctlr_b) {
984 tcg_gen_rotri_i64(val, val, 32);
988 static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
989 int index, MemOp opc)
991 TCGv addr = gen_aa32_addr(s, a32, opc);
992 tcg_gen_qemu_ld_i64(val, addr, index, opc);
993 gen_aa32_frob64(s, val);
994 tcg_temp_free(addr);
997 static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
998 TCGv_i32 a32, int index)
1000 gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
1003 static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1004 int index, MemOp opc)
1006 TCGv addr = gen_aa32_addr(s, a32, opc);
1008 /* Not needed for user-mode BE32, where we use MO_BE instead. */
1009 if (!IS_USER_ONLY && s->sctlr_b) {
1010 TCGv_i64 tmp = tcg_temp_new_i64();
1011 tcg_gen_rotri_i64(tmp, val, 32);
1012 tcg_gen_qemu_st_i64(tmp, addr, index, opc);
1013 tcg_temp_free_i64(tmp);
1014 } else {
1015 tcg_gen_qemu_st_i64(val, addr, index, opc);
1017 tcg_temp_free(addr);
1020 static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
1021 TCGv_i32 a32, int index)
1023 gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
1026 DO_GEN_LD(8u, MO_UB)
1027 DO_GEN_LD(16u, MO_UW)
1028 DO_GEN_LD(32u, MO_UL)
1029 DO_GEN_ST(8, MO_UB)
1030 DO_GEN_ST(16, MO_UW)
1031 DO_GEN_ST(32, MO_UL)
1033 static inline void gen_hvc(DisasContext *s, int imm16)
1035 /* The pre HVC helper handles cases when HVC gets trapped
1036 * as an undefined insn by runtime configuration (ie before
1037 * the insn really executes).
1039 gen_set_pc_im(s, s->pc_curr);
1040 gen_helper_pre_hvc(cpu_env);
1041 /* Otherwise we will treat this as a real exception which
1042 * happens after execution of the insn. (The distinction matters
1043 * for the PC value reported to the exception handler and also
1044 * for single stepping.)
1046 s->svc_imm = imm16;
1047 gen_set_pc_im(s, s->base.pc_next);
1048 s->base.is_jmp = DISAS_HVC;
1051 static inline void gen_smc(DisasContext *s)
1053 /* As with HVC, we may take an exception either before or after
1054 * the insn executes.
1056 TCGv_i32 tmp;
1058 gen_set_pc_im(s, s->pc_curr);
1059 tmp = tcg_const_i32(syn_aa32_smc());
1060 gen_helper_pre_smc(cpu_env, tmp);
1061 tcg_temp_free_i32(tmp);
1062 gen_set_pc_im(s, s->base.pc_next);
1063 s->base.is_jmp = DISAS_SMC;
1066 static void gen_exception_internal_insn(DisasContext *s, uint32_t pc, int excp)
1068 gen_set_condexec(s);
1069 gen_set_pc_im(s, pc);
1070 gen_exception_internal(excp);
1071 s->base.is_jmp = DISAS_NORETURN;
1074 static void gen_exception_insn(DisasContext *s, uint32_t pc, int excp,
1075 int syn, uint32_t target_el)
1077 gen_set_condexec(s);
1078 gen_set_pc_im(s, pc);
1079 gen_exception(excp, syn, target_el);
1080 s->base.is_jmp = DISAS_NORETURN;
1083 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1085 TCGv_i32 tcg_syn;
1087 gen_set_condexec(s);
1088 gen_set_pc_im(s, s->pc_curr);
1089 tcg_syn = tcg_const_i32(syn);
1090 gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
1091 tcg_temp_free_i32(tcg_syn);
1092 s->base.is_jmp = DISAS_NORETURN;
1095 static void unallocated_encoding(DisasContext *s)
1097 /* Unallocated and reserved encodings are uncategorized */
1098 gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
1099 default_exception_el(s));
1102 /* Force a TB lookup after an instruction that changes the CPU state. */
1103 static inline void gen_lookup_tb(DisasContext *s)
1105 tcg_gen_movi_i32(cpu_R[15], s->base.pc_next);
1106 s->base.is_jmp = DISAS_EXIT;
1109 static inline void gen_hlt(DisasContext *s, int imm)
1111 /* HLT. This has two purposes.
1112 * Architecturally, it is an external halting debug instruction.
1113 * Since QEMU doesn't implement external debug, we treat this as
1114 * it is required for halting debug disabled: it will UNDEF.
1115 * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1116 * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1117 * must trigger semihosting even for ARMv7 and earlier, where
1118 * HLT was an undefined encoding.
1119 * In system mode, we don't allow userspace access to
1120 * semihosting, to provide some semblance of security
1121 * (and for consistency with our 32-bit semihosting).
1123 if (semihosting_enabled() &&
1124 #ifndef CONFIG_USER_ONLY
1125 s->current_el != 0 &&
1126 #endif
1127 (imm == (s->thumb ? 0x3c : 0xf000))) {
1128 gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
1129 return;
1132 unallocated_encoding(s);
1135 static TCGv_ptr get_fpstatus_ptr(int neon)
1137 TCGv_ptr statusptr = tcg_temp_new_ptr();
1138 int offset;
1139 if (neon) {
1140 offset = offsetof(CPUARMState, vfp.standard_fp_status);
1141 } else {
1142 offset = offsetof(CPUARMState, vfp.fp_status);
1144 tcg_gen_addi_ptr(statusptr, cpu_env, offset);
1145 return statusptr;
1148 static inline long vfp_reg_offset(bool dp, unsigned reg)
1150 if (dp) {
1151 return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1152 } else {
1153 long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
1154 if (reg & 1) {
1155 ofs += offsetof(CPU_DoubleU, l.upper);
1156 } else {
1157 ofs += offsetof(CPU_DoubleU, l.lower);
1159 return ofs;
1163 /* Return the offset of a 32-bit piece of a NEON register.
1164 zero is the least significant end of the register. */
1165 static inline long
1166 neon_reg_offset (int reg, int n)
1168 int sreg;
1169 sreg = reg * 2 + n;
1170 return vfp_reg_offset(0, sreg);
1173 /* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1174 * where 0 is the least significant end of the register.
1176 static inline long
1177 neon_element_offset(int reg, int element, MemOp size)
1179 int element_size = 1 << size;
1180 int ofs = element * element_size;
1181 #ifdef HOST_WORDS_BIGENDIAN
1182 /* Calculate the offset assuming fully little-endian,
1183 * then XOR to account for the order of the 8-byte units.
1185 if (element_size < 8) {
1186 ofs ^= 8 - element_size;
1188 #endif
1189 return neon_reg_offset(reg, 0) + ofs;
1192 static TCGv_i32 neon_load_reg(int reg, int pass)
1194 TCGv_i32 tmp = tcg_temp_new_i32();
1195 tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1196 return tmp;
1199 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
1201 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1203 switch (mop) {
1204 case MO_UB:
1205 tcg_gen_ld8u_i32(var, cpu_env, offset);
1206 break;
1207 case MO_UW:
1208 tcg_gen_ld16u_i32(var, cpu_env, offset);
1209 break;
1210 case MO_UL:
1211 tcg_gen_ld_i32(var, cpu_env, offset);
1212 break;
1213 default:
1214 g_assert_not_reached();
1218 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
1220 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1222 switch (mop) {
1223 case MO_UB:
1224 tcg_gen_ld8u_i64(var, cpu_env, offset);
1225 break;
1226 case MO_UW:
1227 tcg_gen_ld16u_i64(var, cpu_env, offset);
1228 break;
1229 case MO_UL:
1230 tcg_gen_ld32u_i64(var, cpu_env, offset);
1231 break;
1232 case MO_Q:
1233 tcg_gen_ld_i64(var, cpu_env, offset);
1234 break;
1235 default:
1236 g_assert_not_reached();
1240 static void neon_store_reg(int reg, int pass, TCGv_i32 var)
1242 tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1243 tcg_temp_free_i32(var);
1246 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
1248 long offset = neon_element_offset(reg, ele, size);
1250 switch (size) {
1251 case MO_8:
1252 tcg_gen_st8_i32(var, cpu_env, offset);
1253 break;
1254 case MO_16:
1255 tcg_gen_st16_i32(var, cpu_env, offset);
1256 break;
1257 case MO_32:
1258 tcg_gen_st_i32(var, cpu_env, offset);
1259 break;
1260 default:
1261 g_assert_not_reached();
1265 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
1267 long offset = neon_element_offset(reg, ele, size);
1269 switch (size) {
1270 case MO_8:
1271 tcg_gen_st8_i64(var, cpu_env, offset);
1272 break;
1273 case MO_16:
1274 tcg_gen_st16_i64(var, cpu_env, offset);
1275 break;
1276 case MO_32:
1277 tcg_gen_st32_i64(var, cpu_env, offset);
1278 break;
1279 case MO_64:
1280 tcg_gen_st_i64(var, cpu_env, offset);
1281 break;
1282 default:
1283 g_assert_not_reached();
1287 static inline void neon_load_reg64(TCGv_i64 var, int reg)
1289 tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1292 static inline void neon_store_reg64(TCGv_i64 var, int reg)
1294 tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1297 static inline void neon_load_reg32(TCGv_i32 var, int reg)
1299 tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
1302 static inline void neon_store_reg32(TCGv_i32 var, int reg)
1304 tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
1307 static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
1309 TCGv_ptr ret = tcg_temp_new_ptr();
1310 tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
1311 return ret;
1314 #define ARM_CP_RW_BIT (1 << 20)
1316 /* Include the VFP and Neon decoders */
1317 #include "translate-vfp.inc.c"
1318 #include "translate-neon.inc.c"
1320 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1322 tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1325 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1327 tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1330 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1332 TCGv_i32 var = tcg_temp_new_i32();
1333 tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1334 return var;
1337 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1339 tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1340 tcg_temp_free_i32(var);
1343 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1345 iwmmxt_store_reg(cpu_M0, rn);
1348 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1350 iwmmxt_load_reg(cpu_M0, rn);
1353 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1355 iwmmxt_load_reg(cpu_V1, rn);
1356 tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1359 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1361 iwmmxt_load_reg(cpu_V1, rn);
1362 tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1365 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1367 iwmmxt_load_reg(cpu_V1, rn);
1368 tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1371 #define IWMMXT_OP(name) \
1372 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1374 iwmmxt_load_reg(cpu_V1, rn); \
1375 gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1378 #define IWMMXT_OP_ENV(name) \
1379 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1381 iwmmxt_load_reg(cpu_V1, rn); \
1382 gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1385 #define IWMMXT_OP_ENV_SIZE(name) \
1386 IWMMXT_OP_ENV(name##b) \
1387 IWMMXT_OP_ENV(name##w) \
1388 IWMMXT_OP_ENV(name##l)
1390 #define IWMMXT_OP_ENV1(name) \
1391 static inline void gen_op_iwmmxt_##name##_M0(void) \
1393 gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1396 IWMMXT_OP(maddsq)
1397 IWMMXT_OP(madduq)
1398 IWMMXT_OP(sadb)
1399 IWMMXT_OP(sadw)
1400 IWMMXT_OP(mulslw)
1401 IWMMXT_OP(mulshw)
1402 IWMMXT_OP(mululw)
1403 IWMMXT_OP(muluhw)
1404 IWMMXT_OP(macsw)
1405 IWMMXT_OP(macuw)
1407 IWMMXT_OP_ENV_SIZE(unpackl)
1408 IWMMXT_OP_ENV_SIZE(unpackh)
1410 IWMMXT_OP_ENV1(unpacklub)
1411 IWMMXT_OP_ENV1(unpackluw)
1412 IWMMXT_OP_ENV1(unpacklul)
1413 IWMMXT_OP_ENV1(unpackhub)
1414 IWMMXT_OP_ENV1(unpackhuw)
1415 IWMMXT_OP_ENV1(unpackhul)
1416 IWMMXT_OP_ENV1(unpacklsb)
1417 IWMMXT_OP_ENV1(unpacklsw)
1418 IWMMXT_OP_ENV1(unpacklsl)
1419 IWMMXT_OP_ENV1(unpackhsb)
1420 IWMMXT_OP_ENV1(unpackhsw)
1421 IWMMXT_OP_ENV1(unpackhsl)
1423 IWMMXT_OP_ENV_SIZE(cmpeq)
1424 IWMMXT_OP_ENV_SIZE(cmpgtu)
1425 IWMMXT_OP_ENV_SIZE(cmpgts)
1427 IWMMXT_OP_ENV_SIZE(mins)
1428 IWMMXT_OP_ENV_SIZE(minu)
1429 IWMMXT_OP_ENV_SIZE(maxs)
1430 IWMMXT_OP_ENV_SIZE(maxu)
1432 IWMMXT_OP_ENV_SIZE(subn)
1433 IWMMXT_OP_ENV_SIZE(addn)
1434 IWMMXT_OP_ENV_SIZE(subu)
1435 IWMMXT_OP_ENV_SIZE(addu)
1436 IWMMXT_OP_ENV_SIZE(subs)
1437 IWMMXT_OP_ENV_SIZE(adds)
1439 IWMMXT_OP_ENV(avgb0)
1440 IWMMXT_OP_ENV(avgb1)
1441 IWMMXT_OP_ENV(avgw0)
1442 IWMMXT_OP_ENV(avgw1)
1444 IWMMXT_OP_ENV(packuw)
1445 IWMMXT_OP_ENV(packul)
1446 IWMMXT_OP_ENV(packuq)
1447 IWMMXT_OP_ENV(packsw)
1448 IWMMXT_OP_ENV(packsl)
1449 IWMMXT_OP_ENV(packsq)
1451 static void gen_op_iwmmxt_set_mup(void)
1453 TCGv_i32 tmp;
1454 tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1455 tcg_gen_ori_i32(tmp, tmp, 2);
1456 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1459 static void gen_op_iwmmxt_set_cup(void)
1461 TCGv_i32 tmp;
1462 tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1463 tcg_gen_ori_i32(tmp, tmp, 1);
1464 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1467 static void gen_op_iwmmxt_setpsr_nz(void)
1469 TCGv_i32 tmp = tcg_temp_new_i32();
1470 gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1471 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1474 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1476 iwmmxt_load_reg(cpu_V1, rn);
1477 tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1478 tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1481 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1482 TCGv_i32 dest)
1484 int rd;
1485 uint32_t offset;
1486 TCGv_i32 tmp;
1488 rd = (insn >> 16) & 0xf;
1489 tmp = load_reg(s, rd);
1491 offset = (insn & 0xff) << ((insn >> 7) & 2);
1492 if (insn & (1 << 24)) {
1493 /* Pre indexed */
1494 if (insn & (1 << 23))
1495 tcg_gen_addi_i32(tmp, tmp, offset);
1496 else
1497 tcg_gen_addi_i32(tmp, tmp, -offset);
1498 tcg_gen_mov_i32(dest, tmp);
1499 if (insn & (1 << 21))
1500 store_reg(s, rd, tmp);
1501 else
1502 tcg_temp_free_i32(tmp);
1503 } else if (insn & (1 << 21)) {
1504 /* Post indexed */
1505 tcg_gen_mov_i32(dest, tmp);
1506 if (insn & (1 << 23))
1507 tcg_gen_addi_i32(tmp, tmp, offset);
1508 else
1509 tcg_gen_addi_i32(tmp, tmp, -offset);
1510 store_reg(s, rd, tmp);
1511 } else if (!(insn & (1 << 23)))
1512 return 1;
1513 return 0;
1516 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1518 int rd = (insn >> 0) & 0xf;
1519 TCGv_i32 tmp;
1521 if (insn & (1 << 8)) {
1522 if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1523 return 1;
1524 } else {
1525 tmp = iwmmxt_load_creg(rd);
1527 } else {
1528 tmp = tcg_temp_new_i32();
1529 iwmmxt_load_reg(cpu_V0, rd);
1530 tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1532 tcg_gen_andi_i32(tmp, tmp, mask);
1533 tcg_gen_mov_i32(dest, tmp);
1534 tcg_temp_free_i32(tmp);
1535 return 0;
1538 /* Disassemble an iwMMXt instruction. Returns nonzero if an error occurred
1539 (ie. an undefined instruction). */
1540 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1542 int rd, wrd;
1543 int rdhi, rdlo, rd0, rd1, i;
1544 TCGv_i32 addr;
1545 TCGv_i32 tmp, tmp2, tmp3;
1547 if ((insn & 0x0e000e00) == 0x0c000000) {
1548 if ((insn & 0x0fe00ff0) == 0x0c400000) {
1549 wrd = insn & 0xf;
1550 rdlo = (insn >> 12) & 0xf;
1551 rdhi = (insn >> 16) & 0xf;
1552 if (insn & ARM_CP_RW_BIT) { /* TMRRC */
1553 iwmmxt_load_reg(cpu_V0, wrd);
1554 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1555 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1556 } else { /* TMCRR */
1557 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1558 iwmmxt_store_reg(cpu_V0, wrd);
1559 gen_op_iwmmxt_set_mup();
1561 return 0;
1564 wrd = (insn >> 12) & 0xf;
1565 addr = tcg_temp_new_i32();
1566 if (gen_iwmmxt_address(s, insn, addr)) {
1567 tcg_temp_free_i32(addr);
1568 return 1;
1570 if (insn & ARM_CP_RW_BIT) {
1571 if ((insn >> 28) == 0xf) { /* WLDRW wCx */
1572 tmp = tcg_temp_new_i32();
1573 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1574 iwmmxt_store_creg(wrd, tmp);
1575 } else {
1576 i = 1;
1577 if (insn & (1 << 8)) {
1578 if (insn & (1 << 22)) { /* WLDRD */
1579 gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1580 i = 0;
1581 } else { /* WLDRW wRd */
1582 tmp = tcg_temp_new_i32();
1583 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1585 } else {
1586 tmp = tcg_temp_new_i32();
1587 if (insn & (1 << 22)) { /* WLDRH */
1588 gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1589 } else { /* WLDRB */
1590 gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1593 if (i) {
1594 tcg_gen_extu_i32_i64(cpu_M0, tmp);
1595 tcg_temp_free_i32(tmp);
1597 gen_op_iwmmxt_movq_wRn_M0(wrd);
1599 } else {
1600 if ((insn >> 28) == 0xf) { /* WSTRW wCx */
1601 tmp = iwmmxt_load_creg(wrd);
1602 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1603 } else {
1604 gen_op_iwmmxt_movq_M0_wRn(wrd);
1605 tmp = tcg_temp_new_i32();
1606 if (insn & (1 << 8)) {
1607 if (insn & (1 << 22)) { /* WSTRD */
1608 gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1609 } else { /* WSTRW wRd */
1610 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1611 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1613 } else {
1614 if (insn & (1 << 22)) { /* WSTRH */
1615 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1616 gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1617 } else { /* WSTRB */
1618 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1619 gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1623 tcg_temp_free_i32(tmp);
1625 tcg_temp_free_i32(addr);
1626 return 0;
1629 if ((insn & 0x0f000000) != 0x0e000000)
1630 return 1;
1632 switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1633 case 0x000: /* WOR */
1634 wrd = (insn >> 12) & 0xf;
1635 rd0 = (insn >> 0) & 0xf;
1636 rd1 = (insn >> 16) & 0xf;
1637 gen_op_iwmmxt_movq_M0_wRn(rd0);
1638 gen_op_iwmmxt_orq_M0_wRn(rd1);
1639 gen_op_iwmmxt_setpsr_nz();
1640 gen_op_iwmmxt_movq_wRn_M0(wrd);
1641 gen_op_iwmmxt_set_mup();
1642 gen_op_iwmmxt_set_cup();
1643 break;
1644 case 0x011: /* TMCR */
1645 if (insn & 0xf)
1646 return 1;
1647 rd = (insn >> 12) & 0xf;
1648 wrd = (insn >> 16) & 0xf;
1649 switch (wrd) {
1650 case ARM_IWMMXT_wCID:
1651 case ARM_IWMMXT_wCASF:
1652 break;
1653 case ARM_IWMMXT_wCon:
1654 gen_op_iwmmxt_set_cup();
1655 /* Fall through. */
1656 case ARM_IWMMXT_wCSSF:
1657 tmp = iwmmxt_load_creg(wrd);
1658 tmp2 = load_reg(s, rd);
1659 tcg_gen_andc_i32(tmp, tmp, tmp2);
1660 tcg_temp_free_i32(tmp2);
1661 iwmmxt_store_creg(wrd, tmp);
1662 break;
1663 case ARM_IWMMXT_wCGR0:
1664 case ARM_IWMMXT_wCGR1:
1665 case ARM_IWMMXT_wCGR2:
1666 case ARM_IWMMXT_wCGR3:
1667 gen_op_iwmmxt_set_cup();
1668 tmp = load_reg(s, rd);
1669 iwmmxt_store_creg(wrd, tmp);
1670 break;
1671 default:
1672 return 1;
1674 break;
1675 case 0x100: /* WXOR */
1676 wrd = (insn >> 12) & 0xf;
1677 rd0 = (insn >> 0) & 0xf;
1678 rd1 = (insn >> 16) & 0xf;
1679 gen_op_iwmmxt_movq_M0_wRn(rd0);
1680 gen_op_iwmmxt_xorq_M0_wRn(rd1);
1681 gen_op_iwmmxt_setpsr_nz();
1682 gen_op_iwmmxt_movq_wRn_M0(wrd);
1683 gen_op_iwmmxt_set_mup();
1684 gen_op_iwmmxt_set_cup();
1685 break;
1686 case 0x111: /* TMRC */
1687 if (insn & 0xf)
1688 return 1;
1689 rd = (insn >> 12) & 0xf;
1690 wrd = (insn >> 16) & 0xf;
1691 tmp = iwmmxt_load_creg(wrd);
1692 store_reg(s, rd, tmp);
1693 break;
1694 case 0x300: /* WANDN */
1695 wrd = (insn >> 12) & 0xf;
1696 rd0 = (insn >> 0) & 0xf;
1697 rd1 = (insn >> 16) & 0xf;
1698 gen_op_iwmmxt_movq_M0_wRn(rd0);
1699 tcg_gen_neg_i64(cpu_M0, cpu_M0);
1700 gen_op_iwmmxt_andq_M0_wRn(rd1);
1701 gen_op_iwmmxt_setpsr_nz();
1702 gen_op_iwmmxt_movq_wRn_M0(wrd);
1703 gen_op_iwmmxt_set_mup();
1704 gen_op_iwmmxt_set_cup();
1705 break;
1706 case 0x200: /* WAND */
1707 wrd = (insn >> 12) & 0xf;
1708 rd0 = (insn >> 0) & 0xf;
1709 rd1 = (insn >> 16) & 0xf;
1710 gen_op_iwmmxt_movq_M0_wRn(rd0);
1711 gen_op_iwmmxt_andq_M0_wRn(rd1);
1712 gen_op_iwmmxt_setpsr_nz();
1713 gen_op_iwmmxt_movq_wRn_M0(wrd);
1714 gen_op_iwmmxt_set_mup();
1715 gen_op_iwmmxt_set_cup();
1716 break;
1717 case 0x810: case 0xa10: /* WMADD */
1718 wrd = (insn >> 12) & 0xf;
1719 rd0 = (insn >> 0) & 0xf;
1720 rd1 = (insn >> 16) & 0xf;
1721 gen_op_iwmmxt_movq_M0_wRn(rd0);
1722 if (insn & (1 << 21))
1723 gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1724 else
1725 gen_op_iwmmxt_madduq_M0_wRn(rd1);
1726 gen_op_iwmmxt_movq_wRn_M0(wrd);
1727 gen_op_iwmmxt_set_mup();
1728 break;
1729 case 0x10e: case 0x50e: case 0x90e: case 0xd0e: /* WUNPCKIL */
1730 wrd = (insn >> 12) & 0xf;
1731 rd0 = (insn >> 16) & 0xf;
1732 rd1 = (insn >> 0) & 0xf;
1733 gen_op_iwmmxt_movq_M0_wRn(rd0);
1734 switch ((insn >> 22) & 3) {
1735 case 0:
1736 gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1737 break;
1738 case 1:
1739 gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1740 break;
1741 case 2:
1742 gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1743 break;
1744 case 3:
1745 return 1;
1747 gen_op_iwmmxt_movq_wRn_M0(wrd);
1748 gen_op_iwmmxt_set_mup();
1749 gen_op_iwmmxt_set_cup();
1750 break;
1751 case 0x10c: case 0x50c: case 0x90c: case 0xd0c: /* WUNPCKIH */
1752 wrd = (insn >> 12) & 0xf;
1753 rd0 = (insn >> 16) & 0xf;
1754 rd1 = (insn >> 0) & 0xf;
1755 gen_op_iwmmxt_movq_M0_wRn(rd0);
1756 switch ((insn >> 22) & 3) {
1757 case 0:
1758 gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1759 break;
1760 case 1:
1761 gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1762 break;
1763 case 2:
1764 gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1765 break;
1766 case 3:
1767 return 1;
1769 gen_op_iwmmxt_movq_wRn_M0(wrd);
1770 gen_op_iwmmxt_set_mup();
1771 gen_op_iwmmxt_set_cup();
1772 break;
1773 case 0x012: case 0x112: case 0x412: case 0x512: /* WSAD */
1774 wrd = (insn >> 12) & 0xf;
1775 rd0 = (insn >> 16) & 0xf;
1776 rd1 = (insn >> 0) & 0xf;
1777 gen_op_iwmmxt_movq_M0_wRn(rd0);
1778 if (insn & (1 << 22))
1779 gen_op_iwmmxt_sadw_M0_wRn(rd1);
1780 else
1781 gen_op_iwmmxt_sadb_M0_wRn(rd1);
1782 if (!(insn & (1 << 20)))
1783 gen_op_iwmmxt_addl_M0_wRn(wrd);
1784 gen_op_iwmmxt_movq_wRn_M0(wrd);
1785 gen_op_iwmmxt_set_mup();
1786 break;
1787 case 0x010: case 0x110: case 0x210: case 0x310: /* WMUL */
1788 wrd = (insn >> 12) & 0xf;
1789 rd0 = (insn >> 16) & 0xf;
1790 rd1 = (insn >> 0) & 0xf;
1791 gen_op_iwmmxt_movq_M0_wRn(rd0);
1792 if (insn & (1 << 21)) {
1793 if (insn & (1 << 20))
1794 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1795 else
1796 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1797 } else {
1798 if (insn & (1 << 20))
1799 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1800 else
1801 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1803 gen_op_iwmmxt_movq_wRn_M0(wrd);
1804 gen_op_iwmmxt_set_mup();
1805 break;
1806 case 0x410: case 0x510: case 0x610: case 0x710: /* WMAC */
1807 wrd = (insn >> 12) & 0xf;
1808 rd0 = (insn >> 16) & 0xf;
1809 rd1 = (insn >> 0) & 0xf;
1810 gen_op_iwmmxt_movq_M0_wRn(rd0);
1811 if (insn & (1 << 21))
1812 gen_op_iwmmxt_macsw_M0_wRn(rd1);
1813 else
1814 gen_op_iwmmxt_macuw_M0_wRn(rd1);
1815 if (!(insn & (1 << 20))) {
1816 iwmmxt_load_reg(cpu_V1, wrd);
1817 tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1819 gen_op_iwmmxt_movq_wRn_M0(wrd);
1820 gen_op_iwmmxt_set_mup();
1821 break;
1822 case 0x006: case 0x406: case 0x806: case 0xc06: /* WCMPEQ */
1823 wrd = (insn >> 12) & 0xf;
1824 rd0 = (insn >> 16) & 0xf;
1825 rd1 = (insn >> 0) & 0xf;
1826 gen_op_iwmmxt_movq_M0_wRn(rd0);
1827 switch ((insn >> 22) & 3) {
1828 case 0:
1829 gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1830 break;
1831 case 1:
1832 gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1833 break;
1834 case 2:
1835 gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1836 break;
1837 case 3:
1838 return 1;
1840 gen_op_iwmmxt_movq_wRn_M0(wrd);
1841 gen_op_iwmmxt_set_mup();
1842 gen_op_iwmmxt_set_cup();
1843 break;
1844 case 0x800: case 0x900: case 0xc00: case 0xd00: /* WAVG2 */
1845 wrd = (insn >> 12) & 0xf;
1846 rd0 = (insn >> 16) & 0xf;
1847 rd1 = (insn >> 0) & 0xf;
1848 gen_op_iwmmxt_movq_M0_wRn(rd0);
1849 if (insn & (1 << 22)) {
1850 if (insn & (1 << 20))
1851 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1852 else
1853 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1854 } else {
1855 if (insn & (1 << 20))
1856 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1857 else
1858 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1860 gen_op_iwmmxt_movq_wRn_M0(wrd);
1861 gen_op_iwmmxt_set_mup();
1862 gen_op_iwmmxt_set_cup();
1863 break;
1864 case 0x802: case 0x902: case 0xa02: case 0xb02: /* WALIGNR */
1865 wrd = (insn >> 12) & 0xf;
1866 rd0 = (insn >> 16) & 0xf;
1867 rd1 = (insn >> 0) & 0xf;
1868 gen_op_iwmmxt_movq_M0_wRn(rd0);
1869 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1870 tcg_gen_andi_i32(tmp, tmp, 7);
1871 iwmmxt_load_reg(cpu_V1, rd1);
1872 gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1873 tcg_temp_free_i32(tmp);
1874 gen_op_iwmmxt_movq_wRn_M0(wrd);
1875 gen_op_iwmmxt_set_mup();
1876 break;
1877 case 0x601: case 0x605: case 0x609: case 0x60d: /* TINSR */
1878 if (((insn >> 6) & 3) == 3)
1879 return 1;
1880 rd = (insn >> 12) & 0xf;
1881 wrd = (insn >> 16) & 0xf;
1882 tmp = load_reg(s, rd);
1883 gen_op_iwmmxt_movq_M0_wRn(wrd);
1884 switch ((insn >> 6) & 3) {
1885 case 0:
1886 tmp2 = tcg_const_i32(0xff);
1887 tmp3 = tcg_const_i32((insn & 7) << 3);
1888 break;
1889 case 1:
1890 tmp2 = tcg_const_i32(0xffff);
1891 tmp3 = tcg_const_i32((insn & 3) << 4);
1892 break;
1893 case 2:
1894 tmp2 = tcg_const_i32(0xffffffff);
1895 tmp3 = tcg_const_i32((insn & 1) << 5);
1896 break;
1897 default:
1898 tmp2 = NULL;
1899 tmp3 = NULL;
1901 gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1902 tcg_temp_free_i32(tmp3);
1903 tcg_temp_free_i32(tmp2);
1904 tcg_temp_free_i32(tmp);
1905 gen_op_iwmmxt_movq_wRn_M0(wrd);
1906 gen_op_iwmmxt_set_mup();
1907 break;
1908 case 0x107: case 0x507: case 0x907: case 0xd07: /* TEXTRM */
1909 rd = (insn >> 12) & 0xf;
1910 wrd = (insn >> 16) & 0xf;
1911 if (rd == 15 || ((insn >> 22) & 3) == 3)
1912 return 1;
1913 gen_op_iwmmxt_movq_M0_wRn(wrd);
1914 tmp = tcg_temp_new_i32();
1915 switch ((insn >> 22) & 3) {
1916 case 0:
1917 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1918 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1919 if (insn & 8) {
1920 tcg_gen_ext8s_i32(tmp, tmp);
1921 } else {
1922 tcg_gen_andi_i32(tmp, tmp, 0xff);
1924 break;
1925 case 1:
1926 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1927 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1928 if (insn & 8) {
1929 tcg_gen_ext16s_i32(tmp, tmp);
1930 } else {
1931 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1933 break;
1934 case 2:
1935 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1936 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1937 break;
1939 store_reg(s, rd, tmp);
1940 break;
1941 case 0x117: case 0x517: case 0x917: case 0xd17: /* TEXTRC */
1942 if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1943 return 1;
1944 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1945 switch ((insn >> 22) & 3) {
1946 case 0:
1947 tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1948 break;
1949 case 1:
1950 tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1951 break;
1952 case 2:
1953 tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1954 break;
1956 tcg_gen_shli_i32(tmp, tmp, 28);
1957 gen_set_nzcv(tmp);
1958 tcg_temp_free_i32(tmp);
1959 break;
1960 case 0x401: case 0x405: case 0x409: case 0x40d: /* TBCST */
1961 if (((insn >> 6) & 3) == 3)
1962 return 1;
1963 rd = (insn >> 12) & 0xf;
1964 wrd = (insn >> 16) & 0xf;
1965 tmp = load_reg(s, rd);
1966 switch ((insn >> 6) & 3) {
1967 case 0:
1968 gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1969 break;
1970 case 1:
1971 gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1972 break;
1973 case 2:
1974 gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1975 break;
1977 tcg_temp_free_i32(tmp);
1978 gen_op_iwmmxt_movq_wRn_M0(wrd);
1979 gen_op_iwmmxt_set_mup();
1980 break;
1981 case 0x113: case 0x513: case 0x913: case 0xd13: /* TANDC */
1982 if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1983 return 1;
1984 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1985 tmp2 = tcg_temp_new_i32();
1986 tcg_gen_mov_i32(tmp2, tmp);
1987 switch ((insn >> 22) & 3) {
1988 case 0:
1989 for (i = 0; i < 7; i ++) {
1990 tcg_gen_shli_i32(tmp2, tmp2, 4);
1991 tcg_gen_and_i32(tmp, tmp, tmp2);
1993 break;
1994 case 1:
1995 for (i = 0; i < 3; i ++) {
1996 tcg_gen_shli_i32(tmp2, tmp2, 8);
1997 tcg_gen_and_i32(tmp, tmp, tmp2);
1999 break;
2000 case 2:
2001 tcg_gen_shli_i32(tmp2, tmp2, 16);
2002 tcg_gen_and_i32(tmp, tmp, tmp2);
2003 break;
2005 gen_set_nzcv(tmp);
2006 tcg_temp_free_i32(tmp2);
2007 tcg_temp_free_i32(tmp);
2008 break;
2009 case 0x01c: case 0x41c: case 0x81c: case 0xc1c: /* WACC */
2010 wrd = (insn >> 12) & 0xf;
2011 rd0 = (insn >> 16) & 0xf;
2012 gen_op_iwmmxt_movq_M0_wRn(rd0);
2013 switch ((insn >> 22) & 3) {
2014 case 0:
2015 gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
2016 break;
2017 case 1:
2018 gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2019 break;
2020 case 2:
2021 gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2022 break;
2023 case 3:
2024 return 1;
2026 gen_op_iwmmxt_movq_wRn_M0(wrd);
2027 gen_op_iwmmxt_set_mup();
2028 break;
2029 case 0x115: case 0x515: case 0x915: case 0xd15: /* TORC */
2030 if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2031 return 1;
2032 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2033 tmp2 = tcg_temp_new_i32();
2034 tcg_gen_mov_i32(tmp2, tmp);
2035 switch ((insn >> 22) & 3) {
2036 case 0:
2037 for (i = 0; i < 7; i ++) {
2038 tcg_gen_shli_i32(tmp2, tmp2, 4);
2039 tcg_gen_or_i32(tmp, tmp, tmp2);
2041 break;
2042 case 1:
2043 for (i = 0; i < 3; i ++) {
2044 tcg_gen_shli_i32(tmp2, tmp2, 8);
2045 tcg_gen_or_i32(tmp, tmp, tmp2);
2047 break;
2048 case 2:
2049 tcg_gen_shli_i32(tmp2, tmp2, 16);
2050 tcg_gen_or_i32(tmp, tmp, tmp2);
2051 break;
2053 gen_set_nzcv(tmp);
2054 tcg_temp_free_i32(tmp2);
2055 tcg_temp_free_i32(tmp);
2056 break;
2057 case 0x103: case 0x503: case 0x903: case 0xd03: /* TMOVMSK */
2058 rd = (insn >> 12) & 0xf;
2059 rd0 = (insn >> 16) & 0xf;
2060 if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2061 return 1;
2062 gen_op_iwmmxt_movq_M0_wRn(rd0);
2063 tmp = tcg_temp_new_i32();
2064 switch ((insn >> 22) & 3) {
2065 case 0:
2066 gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2067 break;
2068 case 1:
2069 gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2070 break;
2071 case 2:
2072 gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2073 break;
2075 store_reg(s, rd, tmp);
2076 break;
2077 case 0x106: case 0x306: case 0x506: case 0x706: /* WCMPGT */
2078 case 0x906: case 0xb06: case 0xd06: case 0xf06:
2079 wrd = (insn >> 12) & 0xf;
2080 rd0 = (insn >> 16) & 0xf;
2081 rd1 = (insn >> 0) & 0xf;
2082 gen_op_iwmmxt_movq_M0_wRn(rd0);
2083 switch ((insn >> 22) & 3) {
2084 case 0:
2085 if (insn & (1 << 21))
2086 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2087 else
2088 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2089 break;
2090 case 1:
2091 if (insn & (1 << 21))
2092 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2093 else
2094 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2095 break;
2096 case 2:
2097 if (insn & (1 << 21))
2098 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2099 else
2100 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2101 break;
2102 case 3:
2103 return 1;
2105 gen_op_iwmmxt_movq_wRn_M0(wrd);
2106 gen_op_iwmmxt_set_mup();
2107 gen_op_iwmmxt_set_cup();
2108 break;
2109 case 0x00e: case 0x20e: case 0x40e: case 0x60e: /* WUNPCKEL */
2110 case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2111 wrd = (insn >> 12) & 0xf;
2112 rd0 = (insn >> 16) & 0xf;
2113 gen_op_iwmmxt_movq_M0_wRn(rd0);
2114 switch ((insn >> 22) & 3) {
2115 case 0:
2116 if (insn & (1 << 21))
2117 gen_op_iwmmxt_unpacklsb_M0();
2118 else
2119 gen_op_iwmmxt_unpacklub_M0();
2120 break;
2121 case 1:
2122 if (insn & (1 << 21))
2123 gen_op_iwmmxt_unpacklsw_M0();
2124 else
2125 gen_op_iwmmxt_unpackluw_M0();
2126 break;
2127 case 2:
2128 if (insn & (1 << 21))
2129 gen_op_iwmmxt_unpacklsl_M0();
2130 else
2131 gen_op_iwmmxt_unpacklul_M0();
2132 break;
2133 case 3:
2134 return 1;
2136 gen_op_iwmmxt_movq_wRn_M0(wrd);
2137 gen_op_iwmmxt_set_mup();
2138 gen_op_iwmmxt_set_cup();
2139 break;
2140 case 0x00c: case 0x20c: case 0x40c: case 0x60c: /* WUNPCKEH */
2141 case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2142 wrd = (insn >> 12) & 0xf;
2143 rd0 = (insn >> 16) & 0xf;
2144 gen_op_iwmmxt_movq_M0_wRn(rd0);
2145 switch ((insn >> 22) & 3) {
2146 case 0:
2147 if (insn & (1 << 21))
2148 gen_op_iwmmxt_unpackhsb_M0();
2149 else
2150 gen_op_iwmmxt_unpackhub_M0();
2151 break;
2152 case 1:
2153 if (insn & (1 << 21))
2154 gen_op_iwmmxt_unpackhsw_M0();
2155 else
2156 gen_op_iwmmxt_unpackhuw_M0();
2157 break;
2158 case 2:
2159 if (insn & (1 << 21))
2160 gen_op_iwmmxt_unpackhsl_M0();
2161 else
2162 gen_op_iwmmxt_unpackhul_M0();
2163 break;
2164 case 3:
2165 return 1;
2167 gen_op_iwmmxt_movq_wRn_M0(wrd);
2168 gen_op_iwmmxt_set_mup();
2169 gen_op_iwmmxt_set_cup();
2170 break;
2171 case 0x204: case 0x604: case 0xa04: case 0xe04: /* WSRL */
2172 case 0x214: case 0x614: case 0xa14: case 0xe14:
2173 if (((insn >> 22) & 3) == 0)
2174 return 1;
2175 wrd = (insn >> 12) & 0xf;
2176 rd0 = (insn >> 16) & 0xf;
2177 gen_op_iwmmxt_movq_M0_wRn(rd0);
2178 tmp = tcg_temp_new_i32();
2179 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2180 tcg_temp_free_i32(tmp);
2181 return 1;
2183 switch ((insn >> 22) & 3) {
2184 case 1:
2185 gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2186 break;
2187 case 2:
2188 gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2189 break;
2190 case 3:
2191 gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2192 break;
2194 tcg_temp_free_i32(tmp);
2195 gen_op_iwmmxt_movq_wRn_M0(wrd);
2196 gen_op_iwmmxt_set_mup();
2197 gen_op_iwmmxt_set_cup();
2198 break;
2199 case 0x004: case 0x404: case 0x804: case 0xc04: /* WSRA */
2200 case 0x014: case 0x414: case 0x814: case 0xc14:
2201 if (((insn >> 22) & 3) == 0)
2202 return 1;
2203 wrd = (insn >> 12) & 0xf;
2204 rd0 = (insn >> 16) & 0xf;
2205 gen_op_iwmmxt_movq_M0_wRn(rd0);
2206 tmp = tcg_temp_new_i32();
2207 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2208 tcg_temp_free_i32(tmp);
2209 return 1;
2211 switch ((insn >> 22) & 3) {
2212 case 1:
2213 gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2214 break;
2215 case 2:
2216 gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2217 break;
2218 case 3:
2219 gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2220 break;
2222 tcg_temp_free_i32(tmp);
2223 gen_op_iwmmxt_movq_wRn_M0(wrd);
2224 gen_op_iwmmxt_set_mup();
2225 gen_op_iwmmxt_set_cup();
2226 break;
2227 case 0x104: case 0x504: case 0x904: case 0xd04: /* WSLL */
2228 case 0x114: case 0x514: case 0x914: case 0xd14:
2229 if (((insn >> 22) & 3) == 0)
2230 return 1;
2231 wrd = (insn >> 12) & 0xf;
2232 rd0 = (insn >> 16) & 0xf;
2233 gen_op_iwmmxt_movq_M0_wRn(rd0);
2234 tmp = tcg_temp_new_i32();
2235 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2236 tcg_temp_free_i32(tmp);
2237 return 1;
2239 switch ((insn >> 22) & 3) {
2240 case 1:
2241 gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2242 break;
2243 case 2:
2244 gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2245 break;
2246 case 3:
2247 gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2248 break;
2250 tcg_temp_free_i32(tmp);
2251 gen_op_iwmmxt_movq_wRn_M0(wrd);
2252 gen_op_iwmmxt_set_mup();
2253 gen_op_iwmmxt_set_cup();
2254 break;
2255 case 0x304: case 0x704: case 0xb04: case 0xf04: /* WROR */
2256 case 0x314: case 0x714: case 0xb14: case 0xf14:
2257 if (((insn >> 22) & 3) == 0)
2258 return 1;
2259 wrd = (insn >> 12) & 0xf;
2260 rd0 = (insn >> 16) & 0xf;
2261 gen_op_iwmmxt_movq_M0_wRn(rd0);
2262 tmp = tcg_temp_new_i32();
2263 switch ((insn >> 22) & 3) {
2264 case 1:
2265 if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2266 tcg_temp_free_i32(tmp);
2267 return 1;
2269 gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2270 break;
2271 case 2:
2272 if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2273 tcg_temp_free_i32(tmp);
2274 return 1;
2276 gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2277 break;
2278 case 3:
2279 if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2280 tcg_temp_free_i32(tmp);
2281 return 1;
2283 gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2284 break;
2286 tcg_temp_free_i32(tmp);
2287 gen_op_iwmmxt_movq_wRn_M0(wrd);
2288 gen_op_iwmmxt_set_mup();
2289 gen_op_iwmmxt_set_cup();
2290 break;
2291 case 0x116: case 0x316: case 0x516: case 0x716: /* WMIN */
2292 case 0x916: case 0xb16: case 0xd16: case 0xf16:
2293 wrd = (insn >> 12) & 0xf;
2294 rd0 = (insn >> 16) & 0xf;
2295 rd1 = (insn >> 0) & 0xf;
2296 gen_op_iwmmxt_movq_M0_wRn(rd0);
2297 switch ((insn >> 22) & 3) {
2298 case 0:
2299 if (insn & (1 << 21))
2300 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2301 else
2302 gen_op_iwmmxt_minub_M0_wRn(rd1);
2303 break;
2304 case 1:
2305 if (insn & (1 << 21))
2306 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2307 else
2308 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2309 break;
2310 case 2:
2311 if (insn & (1 << 21))
2312 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2313 else
2314 gen_op_iwmmxt_minul_M0_wRn(rd1);
2315 break;
2316 case 3:
2317 return 1;
2319 gen_op_iwmmxt_movq_wRn_M0(wrd);
2320 gen_op_iwmmxt_set_mup();
2321 break;
2322 case 0x016: case 0x216: case 0x416: case 0x616: /* WMAX */
2323 case 0x816: case 0xa16: case 0xc16: case 0xe16:
2324 wrd = (insn >> 12) & 0xf;
2325 rd0 = (insn >> 16) & 0xf;
2326 rd1 = (insn >> 0) & 0xf;
2327 gen_op_iwmmxt_movq_M0_wRn(rd0);
2328 switch ((insn >> 22) & 3) {
2329 case 0:
2330 if (insn & (1 << 21))
2331 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2332 else
2333 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2334 break;
2335 case 1:
2336 if (insn & (1 << 21))
2337 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2338 else
2339 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2340 break;
2341 case 2:
2342 if (insn & (1 << 21))
2343 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2344 else
2345 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2346 break;
2347 case 3:
2348 return 1;
2350 gen_op_iwmmxt_movq_wRn_M0(wrd);
2351 gen_op_iwmmxt_set_mup();
2352 break;
2353 case 0x002: case 0x102: case 0x202: case 0x302: /* WALIGNI */
2354 case 0x402: case 0x502: case 0x602: case 0x702:
2355 wrd = (insn >> 12) & 0xf;
2356 rd0 = (insn >> 16) & 0xf;
2357 rd1 = (insn >> 0) & 0xf;
2358 gen_op_iwmmxt_movq_M0_wRn(rd0);
2359 tmp = tcg_const_i32((insn >> 20) & 3);
2360 iwmmxt_load_reg(cpu_V1, rd1);
2361 gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2362 tcg_temp_free_i32(tmp);
2363 gen_op_iwmmxt_movq_wRn_M0(wrd);
2364 gen_op_iwmmxt_set_mup();
2365 break;
2366 case 0x01a: case 0x11a: case 0x21a: case 0x31a: /* WSUB */
2367 case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2368 case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2369 case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2370 wrd = (insn >> 12) & 0xf;
2371 rd0 = (insn >> 16) & 0xf;
2372 rd1 = (insn >> 0) & 0xf;
2373 gen_op_iwmmxt_movq_M0_wRn(rd0);
2374 switch ((insn >> 20) & 0xf) {
2375 case 0x0:
2376 gen_op_iwmmxt_subnb_M0_wRn(rd1);
2377 break;
2378 case 0x1:
2379 gen_op_iwmmxt_subub_M0_wRn(rd1);
2380 break;
2381 case 0x3:
2382 gen_op_iwmmxt_subsb_M0_wRn(rd1);
2383 break;
2384 case 0x4:
2385 gen_op_iwmmxt_subnw_M0_wRn(rd1);
2386 break;
2387 case 0x5:
2388 gen_op_iwmmxt_subuw_M0_wRn(rd1);
2389 break;
2390 case 0x7:
2391 gen_op_iwmmxt_subsw_M0_wRn(rd1);
2392 break;
2393 case 0x8:
2394 gen_op_iwmmxt_subnl_M0_wRn(rd1);
2395 break;
2396 case 0x9:
2397 gen_op_iwmmxt_subul_M0_wRn(rd1);
2398 break;
2399 case 0xb:
2400 gen_op_iwmmxt_subsl_M0_wRn(rd1);
2401 break;
2402 default:
2403 return 1;
2405 gen_op_iwmmxt_movq_wRn_M0(wrd);
2406 gen_op_iwmmxt_set_mup();
2407 gen_op_iwmmxt_set_cup();
2408 break;
2409 case 0x01e: case 0x11e: case 0x21e: case 0x31e: /* WSHUFH */
2410 case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2411 case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2412 case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2413 wrd = (insn >> 12) & 0xf;
2414 rd0 = (insn >> 16) & 0xf;
2415 gen_op_iwmmxt_movq_M0_wRn(rd0);
2416 tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2417 gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2418 tcg_temp_free_i32(tmp);
2419 gen_op_iwmmxt_movq_wRn_M0(wrd);
2420 gen_op_iwmmxt_set_mup();
2421 gen_op_iwmmxt_set_cup();
2422 break;
2423 case 0x018: case 0x118: case 0x218: case 0x318: /* WADD */
2424 case 0x418: case 0x518: case 0x618: case 0x718:
2425 case 0x818: case 0x918: case 0xa18: case 0xb18:
2426 case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2427 wrd = (insn >> 12) & 0xf;
2428 rd0 = (insn >> 16) & 0xf;
2429 rd1 = (insn >> 0) & 0xf;
2430 gen_op_iwmmxt_movq_M0_wRn(rd0);
2431 switch ((insn >> 20) & 0xf) {
2432 case 0x0:
2433 gen_op_iwmmxt_addnb_M0_wRn(rd1);
2434 break;
2435 case 0x1:
2436 gen_op_iwmmxt_addub_M0_wRn(rd1);
2437 break;
2438 case 0x3:
2439 gen_op_iwmmxt_addsb_M0_wRn(rd1);
2440 break;
2441 case 0x4:
2442 gen_op_iwmmxt_addnw_M0_wRn(rd1);
2443 break;
2444 case 0x5:
2445 gen_op_iwmmxt_adduw_M0_wRn(rd1);
2446 break;
2447 case 0x7:
2448 gen_op_iwmmxt_addsw_M0_wRn(rd1);
2449 break;
2450 case 0x8:
2451 gen_op_iwmmxt_addnl_M0_wRn(rd1);
2452 break;
2453 case 0x9:
2454 gen_op_iwmmxt_addul_M0_wRn(rd1);
2455 break;
2456 case 0xb:
2457 gen_op_iwmmxt_addsl_M0_wRn(rd1);
2458 break;
2459 default:
2460 return 1;
2462 gen_op_iwmmxt_movq_wRn_M0(wrd);
2463 gen_op_iwmmxt_set_mup();
2464 gen_op_iwmmxt_set_cup();
2465 break;
2466 case 0x008: case 0x108: case 0x208: case 0x308: /* WPACK */
2467 case 0x408: case 0x508: case 0x608: case 0x708:
2468 case 0x808: case 0x908: case 0xa08: case 0xb08:
2469 case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2470 if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2471 return 1;
2472 wrd = (insn >> 12) & 0xf;
2473 rd0 = (insn >> 16) & 0xf;
2474 rd1 = (insn >> 0) & 0xf;
2475 gen_op_iwmmxt_movq_M0_wRn(rd0);
2476 switch ((insn >> 22) & 3) {
2477 case 1:
2478 if (insn & (1 << 21))
2479 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2480 else
2481 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2482 break;
2483 case 2:
2484 if (insn & (1 << 21))
2485 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2486 else
2487 gen_op_iwmmxt_packul_M0_wRn(rd1);
2488 break;
2489 case 3:
2490 if (insn & (1 << 21))
2491 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2492 else
2493 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2494 break;
2496 gen_op_iwmmxt_movq_wRn_M0(wrd);
2497 gen_op_iwmmxt_set_mup();
2498 gen_op_iwmmxt_set_cup();
2499 break;
2500 case 0x201: case 0x203: case 0x205: case 0x207:
2501 case 0x209: case 0x20b: case 0x20d: case 0x20f:
2502 case 0x211: case 0x213: case 0x215: case 0x217:
2503 case 0x219: case 0x21b: case 0x21d: case 0x21f:
2504 wrd = (insn >> 5) & 0xf;
2505 rd0 = (insn >> 12) & 0xf;
2506 rd1 = (insn >> 0) & 0xf;
2507 if (rd0 == 0xf || rd1 == 0xf)
2508 return 1;
2509 gen_op_iwmmxt_movq_M0_wRn(wrd);
2510 tmp = load_reg(s, rd0);
2511 tmp2 = load_reg(s, rd1);
2512 switch ((insn >> 16) & 0xf) {
2513 case 0x0: /* TMIA */
2514 gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2515 break;
2516 case 0x8: /* TMIAPH */
2517 gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2518 break;
2519 case 0xc: case 0xd: case 0xe: case 0xf: /* TMIAxy */
2520 if (insn & (1 << 16))
2521 tcg_gen_shri_i32(tmp, tmp, 16);
2522 if (insn & (1 << 17))
2523 tcg_gen_shri_i32(tmp2, tmp2, 16);
2524 gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2525 break;
2526 default:
2527 tcg_temp_free_i32(tmp2);
2528 tcg_temp_free_i32(tmp);
2529 return 1;
2531 tcg_temp_free_i32(tmp2);
2532 tcg_temp_free_i32(tmp);
2533 gen_op_iwmmxt_movq_wRn_M0(wrd);
2534 gen_op_iwmmxt_set_mup();
2535 break;
2536 default:
2537 return 1;
2540 return 0;
2543 /* Disassemble an XScale DSP instruction. Returns nonzero if an error occurred
2544 (ie. an undefined instruction). */
2545 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2547 int acc, rd0, rd1, rdhi, rdlo;
2548 TCGv_i32 tmp, tmp2;
2550 if ((insn & 0x0ff00f10) == 0x0e200010) {
2551 /* Multiply with Internal Accumulate Format */
2552 rd0 = (insn >> 12) & 0xf;
2553 rd1 = insn & 0xf;
2554 acc = (insn >> 5) & 7;
2556 if (acc != 0)
2557 return 1;
2559 tmp = load_reg(s, rd0);
2560 tmp2 = load_reg(s, rd1);
2561 switch ((insn >> 16) & 0xf) {
2562 case 0x0: /* MIA */
2563 gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2564 break;
2565 case 0x8: /* MIAPH */
2566 gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2567 break;
2568 case 0xc: /* MIABB */
2569 case 0xd: /* MIABT */
2570 case 0xe: /* MIATB */
2571 case 0xf: /* MIATT */
2572 if (insn & (1 << 16))
2573 tcg_gen_shri_i32(tmp, tmp, 16);
2574 if (insn & (1 << 17))
2575 tcg_gen_shri_i32(tmp2, tmp2, 16);
2576 gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2577 break;
2578 default:
2579 return 1;
2581 tcg_temp_free_i32(tmp2);
2582 tcg_temp_free_i32(tmp);
2584 gen_op_iwmmxt_movq_wRn_M0(acc);
2585 return 0;
2588 if ((insn & 0x0fe00ff8) == 0x0c400000) {
2589 /* Internal Accumulator Access Format */
2590 rdhi = (insn >> 16) & 0xf;
2591 rdlo = (insn >> 12) & 0xf;
2592 acc = insn & 7;
2594 if (acc != 0)
2595 return 1;
2597 if (insn & ARM_CP_RW_BIT) { /* MRA */
2598 iwmmxt_load_reg(cpu_V0, acc);
2599 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2600 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2601 tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2602 } else { /* MAR */
2603 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2604 iwmmxt_store_reg(cpu_V0, acc);
2606 return 0;
2609 return 1;
2612 #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2613 #define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2614 if (dc_isar_feature(aa32_simd_r32, s)) { \
2615 reg = (((insn) >> (bigbit)) & 0x0f) \
2616 | (((insn) >> ((smallbit) - 4)) & 0x10); \
2617 } else { \
2618 if (insn & (1 << (smallbit))) \
2619 return 1; \
2620 reg = ((insn) >> (bigbit)) & 0x0f; \
2621 }} while (0)
2623 #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2624 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7)
2625 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5)
2627 static void gen_neon_dup_low16(TCGv_i32 var)
2629 TCGv_i32 tmp = tcg_temp_new_i32();
2630 tcg_gen_ext16u_i32(var, var);
2631 tcg_gen_shli_i32(tmp, var, 16);
2632 tcg_gen_or_i32(var, var, tmp);
2633 tcg_temp_free_i32(tmp);
2636 static void gen_neon_dup_high16(TCGv_i32 var)
2638 TCGv_i32 tmp = tcg_temp_new_i32();
2639 tcg_gen_andi_i32(var, var, 0xffff0000);
2640 tcg_gen_shri_i32(tmp, var, 16);
2641 tcg_gen_or_i32(var, var, tmp);
2642 tcg_temp_free_i32(tmp);
2645 static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
2647 #ifndef CONFIG_USER_ONLY
2648 return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
2649 ((s->base.pc_next - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
2650 #else
2651 return true;
2652 #endif
2655 static void gen_goto_ptr(void)
2657 tcg_gen_lookup_and_goto_ptr();
2660 /* This will end the TB but doesn't guarantee we'll return to
2661 * cpu_loop_exec. Any live exit_requests will be processed as we
2662 * enter the next TB.
2664 static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
2666 if (use_goto_tb(s, dest)) {
2667 tcg_gen_goto_tb(n);
2668 gen_set_pc_im(s, dest);
2669 tcg_gen_exit_tb(s->base.tb, n);
2670 } else {
2671 gen_set_pc_im(s, dest);
2672 gen_goto_ptr();
2674 s->base.is_jmp = DISAS_NORETURN;
2677 static inline void gen_jmp (DisasContext *s, uint32_t dest)
2679 if (unlikely(is_singlestepping(s))) {
2680 /* An indirect jump so that we still trigger the debug exception. */
2681 gen_set_pc_im(s, dest);
2682 s->base.is_jmp = DISAS_JUMP;
2683 } else {
2684 gen_goto_tb(s, 0, dest);
2688 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2690 if (x)
2691 tcg_gen_sari_i32(t0, t0, 16);
2692 else
2693 gen_sxth(t0);
2694 if (y)
2695 tcg_gen_sari_i32(t1, t1, 16);
2696 else
2697 gen_sxth(t1);
2698 tcg_gen_mul_i32(t0, t0, t1);
2701 /* Return the mask of PSR bits set by a MSR instruction. */
2702 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2704 uint32_t mask = 0;
2706 if (flags & (1 << 0)) {
2707 mask |= 0xff;
2709 if (flags & (1 << 1)) {
2710 mask |= 0xff00;
2712 if (flags & (1 << 2)) {
2713 mask |= 0xff0000;
2715 if (flags & (1 << 3)) {
2716 mask |= 0xff000000;
2719 /* Mask out undefined and reserved bits. */
2720 mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2722 /* Mask out execution state. */
2723 if (!spsr) {
2724 mask &= ~CPSR_EXEC;
2727 /* Mask out privileged bits. */
2728 if (IS_USER(s)) {
2729 mask &= CPSR_USER;
2731 return mask;
2734 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2735 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2737 TCGv_i32 tmp;
2738 if (spsr) {
2739 /* ??? This is also undefined in system mode. */
2740 if (IS_USER(s))
2741 return 1;
2743 tmp = load_cpu_field(spsr);
2744 tcg_gen_andi_i32(tmp, tmp, ~mask);
2745 tcg_gen_andi_i32(t0, t0, mask);
2746 tcg_gen_or_i32(tmp, tmp, t0);
2747 store_cpu_field(tmp, spsr);
2748 } else {
2749 gen_set_cpsr(t0, mask);
2751 tcg_temp_free_i32(t0);
2752 gen_lookup_tb(s);
2753 return 0;
2756 /* Returns nonzero if access to the PSR is not permitted. */
2757 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2759 TCGv_i32 tmp;
2760 tmp = tcg_temp_new_i32();
2761 tcg_gen_movi_i32(tmp, val);
2762 return gen_set_psr(s, mask, spsr, tmp);
2765 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2766 int *tgtmode, int *regno)
2768 /* Decode the r and sysm fields of MSR/MRS banked accesses into
2769 * the target mode and register number, and identify the various
2770 * unpredictable cases.
2771 * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2772 * + executed in user mode
2773 * + using R15 as the src/dest register
2774 * + accessing an unimplemented register
2775 * + accessing a register that's inaccessible at current PL/security state*
2776 * + accessing a register that you could access with a different insn
2777 * We choose to UNDEF in all these cases.
2778 * Since we don't know which of the various AArch32 modes we are in
2779 * we have to defer some checks to runtime.
2780 * Accesses to Monitor mode registers from Secure EL1 (which implies
2781 * that EL3 is AArch64) must trap to EL3.
2783 * If the access checks fail this function will emit code to take
2784 * an exception and return false. Otherwise it will return true,
2785 * and set *tgtmode and *regno appropriately.
2787 int exc_target = default_exception_el(s);
2789 /* These instructions are present only in ARMv8, or in ARMv7 with the
2790 * Virtualization Extensions.
2792 if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2793 !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2794 goto undef;
2797 if (IS_USER(s) || rn == 15) {
2798 goto undef;
2801 /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2802 * of registers into (r, sysm).
2804 if (r) {
2805 /* SPSRs for other modes */
2806 switch (sysm) {
2807 case 0xe: /* SPSR_fiq */
2808 *tgtmode = ARM_CPU_MODE_FIQ;
2809 break;
2810 case 0x10: /* SPSR_irq */
2811 *tgtmode = ARM_CPU_MODE_IRQ;
2812 break;
2813 case 0x12: /* SPSR_svc */
2814 *tgtmode = ARM_CPU_MODE_SVC;
2815 break;
2816 case 0x14: /* SPSR_abt */
2817 *tgtmode = ARM_CPU_MODE_ABT;
2818 break;
2819 case 0x16: /* SPSR_und */
2820 *tgtmode = ARM_CPU_MODE_UND;
2821 break;
2822 case 0x1c: /* SPSR_mon */
2823 *tgtmode = ARM_CPU_MODE_MON;
2824 break;
2825 case 0x1e: /* SPSR_hyp */
2826 *tgtmode = ARM_CPU_MODE_HYP;
2827 break;
2828 default: /* unallocated */
2829 goto undef;
2831 /* We arbitrarily assign SPSR a register number of 16. */
2832 *regno = 16;
2833 } else {
2834 /* general purpose registers for other modes */
2835 switch (sysm) {
2836 case 0x0 ... 0x6: /* 0b00xxx : r8_usr ... r14_usr */
2837 *tgtmode = ARM_CPU_MODE_USR;
2838 *regno = sysm + 8;
2839 break;
2840 case 0x8 ... 0xe: /* 0b01xxx : r8_fiq ... r14_fiq */
2841 *tgtmode = ARM_CPU_MODE_FIQ;
2842 *regno = sysm;
2843 break;
2844 case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2845 *tgtmode = ARM_CPU_MODE_IRQ;
2846 *regno = sysm & 1 ? 13 : 14;
2847 break;
2848 case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2849 *tgtmode = ARM_CPU_MODE_SVC;
2850 *regno = sysm & 1 ? 13 : 14;
2851 break;
2852 case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2853 *tgtmode = ARM_CPU_MODE_ABT;
2854 *regno = sysm & 1 ? 13 : 14;
2855 break;
2856 case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2857 *tgtmode = ARM_CPU_MODE_UND;
2858 *regno = sysm & 1 ? 13 : 14;
2859 break;
2860 case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2861 *tgtmode = ARM_CPU_MODE_MON;
2862 *regno = sysm & 1 ? 13 : 14;
2863 break;
2864 case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2865 *tgtmode = ARM_CPU_MODE_HYP;
2866 /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2867 *regno = sysm & 1 ? 13 : 17;
2868 break;
2869 default: /* unallocated */
2870 goto undef;
2874 /* Catch the 'accessing inaccessible register' cases we can detect
2875 * at translate time.
2877 switch (*tgtmode) {
2878 case ARM_CPU_MODE_MON:
2879 if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2880 goto undef;
2882 if (s->current_el == 1) {
2883 /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2884 * then accesses to Mon registers trap to EL3
2886 exc_target = 3;
2887 goto undef;
2889 break;
2890 case ARM_CPU_MODE_HYP:
2892 * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2893 * (and so we can forbid accesses from EL2 or below). elr_hyp
2894 * can be accessed also from Hyp mode, so forbid accesses from
2895 * EL0 or EL1.
2897 if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2898 (s->current_el < 3 && *regno != 17)) {
2899 goto undef;
2901 break;
2902 default:
2903 break;
2906 return true;
2908 undef:
2909 /* If we get here then some access check did not pass */
2910 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
2911 syn_uncategorized(), exc_target);
2912 return false;
2915 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2917 TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2918 int tgtmode = 0, regno = 0;
2920 if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2921 return;
2924 /* Sync state because msr_banked() can raise exceptions */
2925 gen_set_condexec(s);
2926 gen_set_pc_im(s, s->pc_curr);
2927 tcg_reg = load_reg(s, rn);
2928 tcg_tgtmode = tcg_const_i32(tgtmode);
2929 tcg_regno = tcg_const_i32(regno);
2930 gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
2931 tcg_temp_free_i32(tcg_tgtmode);
2932 tcg_temp_free_i32(tcg_regno);
2933 tcg_temp_free_i32(tcg_reg);
2934 s->base.is_jmp = DISAS_UPDATE;
2937 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2939 TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2940 int tgtmode = 0, regno = 0;
2942 if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2943 return;
2946 /* Sync state because mrs_banked() can raise exceptions */
2947 gen_set_condexec(s);
2948 gen_set_pc_im(s, s->pc_curr);
2949 tcg_reg = tcg_temp_new_i32();
2950 tcg_tgtmode = tcg_const_i32(tgtmode);
2951 tcg_regno = tcg_const_i32(regno);
2952 gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
2953 tcg_temp_free_i32(tcg_tgtmode);
2954 tcg_temp_free_i32(tcg_regno);
2955 store_reg(s, rn, tcg_reg);
2956 s->base.is_jmp = DISAS_UPDATE;
2959 /* Store value to PC as for an exception return (ie don't
2960 * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2961 * will do the masking based on the new value of the Thumb bit.
2963 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2965 tcg_gen_mov_i32(cpu_R[15], pc);
2966 tcg_temp_free_i32(pc);
2969 /* Generate a v6 exception return. Marks both values as dead. */
2970 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2972 store_pc_exc_ret(s, pc);
2973 /* The cpsr_write_eret helper will mask the low bits of PC
2974 * appropriately depending on the new Thumb bit, so it must
2975 * be called after storing the new PC.
2977 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2978 gen_io_start();
2980 gen_helper_cpsr_write_eret(cpu_env, cpsr);
2981 tcg_temp_free_i32(cpsr);
2982 /* Must exit loop to check un-masked IRQs */
2983 s->base.is_jmp = DISAS_EXIT;
2986 /* Generate an old-style exception return. Marks pc as dead. */
2987 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2989 gen_rfe(s, pc, load_cpu_field(spsr));
2992 #define CPU_V001 cpu_V0, cpu_V0, cpu_V1
2994 static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
2996 switch (size) {
2997 case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
2998 case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
2999 case 2: tcg_gen_add_i32(t0, t0, t1); break;
3000 default: abort();
3004 static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
3006 switch (size) {
3007 case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
3008 case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
3009 case 2: tcg_gen_sub_i32(t0, t1, t0); break;
3010 default: return;
3014 static TCGv_i32 neon_load_scratch(int scratch)
3016 TCGv_i32 tmp = tcg_temp_new_i32();
3017 tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3018 return tmp;
3021 static void neon_store_scratch(int scratch, TCGv_i32 var)
3023 tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3024 tcg_temp_free_i32(var);
3027 static inline TCGv_i32 neon_get_scalar(int size, int reg)
3029 TCGv_i32 tmp;
3030 if (size == 1) {
3031 tmp = neon_load_reg(reg & 7, reg >> 4);
3032 if (reg & 8) {
3033 gen_neon_dup_high16(tmp);
3034 } else {
3035 gen_neon_dup_low16(tmp);
3037 } else {
3038 tmp = neon_load_reg(reg & 15, reg >> 4);
3040 return tmp;
3043 static int gen_neon_unzip(int rd, int rm, int size, int q)
3045 TCGv_ptr pd, pm;
3047 if (!q && size == 2) {
3048 return 1;
3050 pd = vfp_reg_ptr(true, rd);
3051 pm = vfp_reg_ptr(true, rm);
3052 if (q) {
3053 switch (size) {
3054 case 0:
3055 gen_helper_neon_qunzip8(pd, pm);
3056 break;
3057 case 1:
3058 gen_helper_neon_qunzip16(pd, pm);
3059 break;
3060 case 2:
3061 gen_helper_neon_qunzip32(pd, pm);
3062 break;
3063 default:
3064 abort();
3066 } else {
3067 switch (size) {
3068 case 0:
3069 gen_helper_neon_unzip8(pd, pm);
3070 break;
3071 case 1:
3072 gen_helper_neon_unzip16(pd, pm);
3073 break;
3074 default:
3075 abort();
3078 tcg_temp_free_ptr(pd);
3079 tcg_temp_free_ptr(pm);
3080 return 0;
3083 static int gen_neon_zip(int rd, int rm, int size, int q)
3085 TCGv_ptr pd, pm;
3087 if (!q && size == 2) {
3088 return 1;
3090 pd = vfp_reg_ptr(true, rd);
3091 pm = vfp_reg_ptr(true, rm);
3092 if (q) {
3093 switch (size) {
3094 case 0:
3095 gen_helper_neon_qzip8(pd, pm);
3096 break;
3097 case 1:
3098 gen_helper_neon_qzip16(pd, pm);
3099 break;
3100 case 2:
3101 gen_helper_neon_qzip32(pd, pm);
3102 break;
3103 default:
3104 abort();
3106 } else {
3107 switch (size) {
3108 case 0:
3109 gen_helper_neon_zip8(pd, pm);
3110 break;
3111 case 1:
3112 gen_helper_neon_zip16(pd, pm);
3113 break;
3114 default:
3115 abort();
3118 tcg_temp_free_ptr(pd);
3119 tcg_temp_free_ptr(pm);
3120 return 0;
3123 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3125 TCGv_i32 rd, tmp;
3127 rd = tcg_temp_new_i32();
3128 tmp = tcg_temp_new_i32();
3130 tcg_gen_shli_i32(rd, t0, 8);
3131 tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3132 tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3133 tcg_gen_or_i32(rd, rd, tmp);
3135 tcg_gen_shri_i32(t1, t1, 8);
3136 tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3137 tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3138 tcg_gen_or_i32(t1, t1, tmp);
3139 tcg_gen_mov_i32(t0, rd);
3141 tcg_temp_free_i32(tmp);
3142 tcg_temp_free_i32(rd);
3145 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3147 TCGv_i32 rd, tmp;
3149 rd = tcg_temp_new_i32();
3150 tmp = tcg_temp_new_i32();
3152 tcg_gen_shli_i32(rd, t0, 16);
3153 tcg_gen_andi_i32(tmp, t1, 0xffff);
3154 tcg_gen_or_i32(rd, rd, tmp);
3155 tcg_gen_shri_i32(t1, t1, 16);
3156 tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3157 tcg_gen_or_i32(t1, t1, tmp);
3158 tcg_gen_mov_i32(t0, rd);
3160 tcg_temp_free_i32(tmp);
3161 tcg_temp_free_i32(rd);
3164 static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
3166 switch (size) {
3167 case 0: gen_helper_neon_narrow_u8(dest, src); break;
3168 case 1: gen_helper_neon_narrow_u16(dest, src); break;
3169 case 2: tcg_gen_extrl_i64_i32(dest, src); break;
3170 default: abort();
3174 static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3176 switch (size) {
3177 case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
3178 case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
3179 case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
3180 default: abort();
3184 static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src)
3186 switch (size) {
3187 case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
3188 case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
3189 case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
3190 default: abort();
3194 static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3196 switch (size) {
3197 case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
3198 case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
3199 case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
3200 default: abort();
3204 static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
3206 if (u) {
3207 switch (size) {
3208 case 0: gen_helper_neon_widen_u8(dest, src); break;
3209 case 1: gen_helper_neon_widen_u16(dest, src); break;
3210 case 2: tcg_gen_extu_i32_i64(dest, src); break;
3211 default: abort();
3213 } else {
3214 switch (size) {
3215 case 0: gen_helper_neon_widen_s8(dest, src); break;
3216 case 1: gen_helper_neon_widen_s16(dest, src); break;
3217 case 2: tcg_gen_ext_i32_i64(dest, src); break;
3218 default: abort();
3221 tcg_temp_free_i32(src);
3224 static inline void gen_neon_addl(int size)
3226 switch (size) {
3227 case 0: gen_helper_neon_addl_u16(CPU_V001); break;
3228 case 1: gen_helper_neon_addl_u32(CPU_V001); break;
3229 case 2: tcg_gen_add_i64(CPU_V001); break;
3230 default: abort();
3234 static inline void gen_neon_negl(TCGv_i64 var, int size)
3236 switch (size) {
3237 case 0: gen_helper_neon_negl_u16(var, var); break;
3238 case 1: gen_helper_neon_negl_u32(var, var); break;
3239 case 2:
3240 tcg_gen_neg_i64(var, var);
3241 break;
3242 default: abort();
3246 static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
3248 switch (size) {
3249 case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
3250 case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
3251 default: abort();
3255 static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
3256 int size, int u)
3258 TCGv_i64 tmp;
3260 switch ((size << 1) | u) {
3261 case 0: gen_helper_neon_mull_s8(dest, a, b); break;
3262 case 1: gen_helper_neon_mull_u8(dest, a, b); break;
3263 case 2: gen_helper_neon_mull_s16(dest, a, b); break;
3264 case 3: gen_helper_neon_mull_u16(dest, a, b); break;
3265 case 4:
3266 tmp = gen_muls_i64_i32(a, b);
3267 tcg_gen_mov_i64(dest, tmp);
3268 tcg_temp_free_i64(tmp);
3269 break;
3270 case 5:
3271 tmp = gen_mulu_i64_i32(a, b);
3272 tcg_gen_mov_i64(dest, tmp);
3273 tcg_temp_free_i64(tmp);
3274 break;
3275 default: abort();
3278 /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
3279 Don't forget to clean them now. */
3280 if (size < 2) {
3281 tcg_temp_free_i32(a);
3282 tcg_temp_free_i32(b);
3286 static void gen_neon_narrow_op(int op, int u, int size,
3287 TCGv_i32 dest, TCGv_i64 src)
3289 if (op) {
3290 if (u) {
3291 gen_neon_unarrow_sats(size, dest, src);
3292 } else {
3293 gen_neon_narrow(size, dest, src);
3295 } else {
3296 if (u) {
3297 gen_neon_narrow_satu(size, dest, src);
3298 } else {
3299 gen_neon_narrow_sats(size, dest, src);
3304 /* Symbolic constants for op fields for Neon 2-register miscellaneous.
3305 * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
3306 * table A7-13.
3308 #define NEON_2RM_VREV64 0
3309 #define NEON_2RM_VREV32 1
3310 #define NEON_2RM_VREV16 2
3311 #define NEON_2RM_VPADDL 4
3312 #define NEON_2RM_VPADDL_U 5
3313 #define NEON_2RM_AESE 6 /* Includes AESD */
3314 #define NEON_2RM_AESMC 7 /* Includes AESIMC */
3315 #define NEON_2RM_VCLS 8
3316 #define NEON_2RM_VCLZ 9
3317 #define NEON_2RM_VCNT 10
3318 #define NEON_2RM_VMVN 11
3319 #define NEON_2RM_VPADAL 12
3320 #define NEON_2RM_VPADAL_U 13
3321 #define NEON_2RM_VQABS 14
3322 #define NEON_2RM_VQNEG 15
3323 #define NEON_2RM_VCGT0 16
3324 #define NEON_2RM_VCGE0 17
3325 #define NEON_2RM_VCEQ0 18
3326 #define NEON_2RM_VCLE0 19
3327 #define NEON_2RM_VCLT0 20
3328 #define NEON_2RM_SHA1H 21
3329 #define NEON_2RM_VABS 22
3330 #define NEON_2RM_VNEG 23
3331 #define NEON_2RM_VCGT0_F 24
3332 #define NEON_2RM_VCGE0_F 25
3333 #define NEON_2RM_VCEQ0_F 26
3334 #define NEON_2RM_VCLE0_F 27
3335 #define NEON_2RM_VCLT0_F 28
3336 #define NEON_2RM_VABS_F 30
3337 #define NEON_2RM_VNEG_F 31
3338 #define NEON_2RM_VSWP 32
3339 #define NEON_2RM_VTRN 33
3340 #define NEON_2RM_VUZP 34
3341 #define NEON_2RM_VZIP 35
3342 #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
3343 #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
3344 #define NEON_2RM_VSHLL 38
3345 #define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
3346 #define NEON_2RM_VRINTN 40
3347 #define NEON_2RM_VRINTX 41
3348 #define NEON_2RM_VRINTA 42
3349 #define NEON_2RM_VRINTZ 43
3350 #define NEON_2RM_VCVT_F16_F32 44
3351 #define NEON_2RM_VRINTM 45
3352 #define NEON_2RM_VCVT_F32_F16 46
3353 #define NEON_2RM_VRINTP 47
3354 #define NEON_2RM_VCVTAU 48
3355 #define NEON_2RM_VCVTAS 49
3356 #define NEON_2RM_VCVTNU 50
3357 #define NEON_2RM_VCVTNS 51
3358 #define NEON_2RM_VCVTPU 52
3359 #define NEON_2RM_VCVTPS 53
3360 #define NEON_2RM_VCVTMU 54
3361 #define NEON_2RM_VCVTMS 55
3362 #define NEON_2RM_VRECPE 56
3363 #define NEON_2RM_VRSQRTE 57
3364 #define NEON_2RM_VRECPE_F 58
3365 #define NEON_2RM_VRSQRTE_F 59
3366 #define NEON_2RM_VCVT_FS 60
3367 #define NEON_2RM_VCVT_FU 61
3368 #define NEON_2RM_VCVT_SF 62
3369 #define NEON_2RM_VCVT_UF 63
3371 static bool neon_2rm_is_v8_op(int op)
3373 /* Return true if this neon 2reg-misc op is ARMv8 and up */
3374 switch (op) {
3375 case NEON_2RM_VRINTN:
3376 case NEON_2RM_VRINTA:
3377 case NEON_2RM_VRINTM:
3378 case NEON_2RM_VRINTP:
3379 case NEON_2RM_VRINTZ:
3380 case NEON_2RM_VRINTX:
3381 case NEON_2RM_VCVTAU:
3382 case NEON_2RM_VCVTAS:
3383 case NEON_2RM_VCVTNU:
3384 case NEON_2RM_VCVTNS:
3385 case NEON_2RM_VCVTPU:
3386 case NEON_2RM_VCVTPS:
3387 case NEON_2RM_VCVTMU:
3388 case NEON_2RM_VCVTMS:
3389 return true;
3390 default:
3391 return false;
3395 /* Each entry in this array has bit n set if the insn allows
3396 * size value n (otherwise it will UNDEF). Since unallocated
3397 * op values will have no bits set they always UNDEF.
3399 static const uint8_t neon_2rm_sizes[] = {
3400 [NEON_2RM_VREV64] = 0x7,
3401 [NEON_2RM_VREV32] = 0x3,
3402 [NEON_2RM_VREV16] = 0x1,
3403 [NEON_2RM_VPADDL] = 0x7,
3404 [NEON_2RM_VPADDL_U] = 0x7,
3405 [NEON_2RM_AESE] = 0x1,
3406 [NEON_2RM_AESMC] = 0x1,
3407 [NEON_2RM_VCLS] = 0x7,
3408 [NEON_2RM_VCLZ] = 0x7,
3409 [NEON_2RM_VCNT] = 0x1,
3410 [NEON_2RM_VMVN] = 0x1,
3411 [NEON_2RM_VPADAL] = 0x7,
3412 [NEON_2RM_VPADAL_U] = 0x7,
3413 [NEON_2RM_VQABS] = 0x7,
3414 [NEON_2RM_VQNEG] = 0x7,
3415 [NEON_2RM_VCGT0] = 0x7,
3416 [NEON_2RM_VCGE0] = 0x7,
3417 [NEON_2RM_VCEQ0] = 0x7,
3418 [NEON_2RM_VCLE0] = 0x7,
3419 [NEON_2RM_VCLT0] = 0x7,
3420 [NEON_2RM_SHA1H] = 0x4,
3421 [NEON_2RM_VABS] = 0x7,
3422 [NEON_2RM_VNEG] = 0x7,
3423 [NEON_2RM_VCGT0_F] = 0x4,
3424 [NEON_2RM_VCGE0_F] = 0x4,
3425 [NEON_2RM_VCEQ0_F] = 0x4,
3426 [NEON_2RM_VCLE0_F] = 0x4,
3427 [NEON_2RM_VCLT0_F] = 0x4,
3428 [NEON_2RM_VABS_F] = 0x4,
3429 [NEON_2RM_VNEG_F] = 0x4,
3430 [NEON_2RM_VSWP] = 0x1,
3431 [NEON_2RM_VTRN] = 0x7,
3432 [NEON_2RM_VUZP] = 0x7,
3433 [NEON_2RM_VZIP] = 0x7,
3434 [NEON_2RM_VMOVN] = 0x7,
3435 [NEON_2RM_VQMOVN] = 0x7,
3436 [NEON_2RM_VSHLL] = 0x7,
3437 [NEON_2RM_SHA1SU1] = 0x4,
3438 [NEON_2RM_VRINTN] = 0x4,
3439 [NEON_2RM_VRINTX] = 0x4,
3440 [NEON_2RM_VRINTA] = 0x4,
3441 [NEON_2RM_VRINTZ] = 0x4,
3442 [NEON_2RM_VCVT_F16_F32] = 0x2,
3443 [NEON_2RM_VRINTM] = 0x4,
3444 [NEON_2RM_VCVT_F32_F16] = 0x2,
3445 [NEON_2RM_VRINTP] = 0x4,
3446 [NEON_2RM_VCVTAU] = 0x4,
3447 [NEON_2RM_VCVTAS] = 0x4,
3448 [NEON_2RM_VCVTNU] = 0x4,
3449 [NEON_2RM_VCVTNS] = 0x4,
3450 [NEON_2RM_VCVTPU] = 0x4,
3451 [NEON_2RM_VCVTPS] = 0x4,
3452 [NEON_2RM_VCVTMU] = 0x4,
3453 [NEON_2RM_VCVTMS] = 0x4,
3454 [NEON_2RM_VRECPE] = 0x4,
3455 [NEON_2RM_VRSQRTE] = 0x4,
3456 [NEON_2RM_VRECPE_F] = 0x4,
3457 [NEON_2RM_VRSQRTE_F] = 0x4,
3458 [NEON_2RM_VCVT_FS] = 0x4,
3459 [NEON_2RM_VCVT_FU] = 0x4,
3460 [NEON_2RM_VCVT_SF] = 0x4,
3461 [NEON_2RM_VCVT_UF] = 0x4,
3464 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
3465 uint32_t opr_sz, uint32_t max_sz,
3466 gen_helper_gvec_3_ptr *fn)
3468 TCGv_ptr qc_ptr = tcg_temp_new_ptr();
3470 tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
3471 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
3472 opr_sz, max_sz, 0, fn);
3473 tcg_temp_free_ptr(qc_ptr);
3476 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3477 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3479 static gen_helper_gvec_3_ptr * const fns[2] = {
3480 gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
3482 tcg_debug_assert(vece >= 1 && vece <= 2);
3483 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
3486 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3487 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3489 static gen_helper_gvec_3_ptr * const fns[2] = {
3490 gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
3492 tcg_debug_assert(vece >= 1 && vece <= 2);
3493 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
3496 #define GEN_CMP0(NAME, COND) \
3497 static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a) \
3499 tcg_gen_setcondi_i32(COND, d, a, 0); \
3500 tcg_gen_neg_i32(d, d); \
3502 static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a) \
3504 tcg_gen_setcondi_i64(COND, d, a, 0); \
3505 tcg_gen_neg_i64(d, d); \
3507 static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
3509 TCGv_vec zero = tcg_const_zeros_vec_matching(d); \
3510 tcg_gen_cmp_vec(COND, vece, d, a, zero); \
3511 tcg_temp_free_vec(zero); \
3513 void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m, \
3514 uint32_t opr_sz, uint32_t max_sz) \
3516 const GVecGen2 op[4] = { \
3517 { .fno = gen_helper_gvec_##NAME##0_b, \
3518 .fniv = gen_##NAME##0_vec, \
3519 .opt_opc = vecop_list_cmp, \
3520 .vece = MO_8 }, \
3521 { .fno = gen_helper_gvec_##NAME##0_h, \
3522 .fniv = gen_##NAME##0_vec, \
3523 .opt_opc = vecop_list_cmp, \
3524 .vece = MO_16 }, \
3525 { .fni4 = gen_##NAME##0_i32, \
3526 .fniv = gen_##NAME##0_vec, \
3527 .opt_opc = vecop_list_cmp, \
3528 .vece = MO_32 }, \
3529 { .fni8 = gen_##NAME##0_i64, \
3530 .fniv = gen_##NAME##0_vec, \
3531 .opt_opc = vecop_list_cmp, \
3532 .prefer_i64 = TCG_TARGET_REG_BITS == 64, \
3533 .vece = MO_64 }, \
3534 }; \
3535 tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]); \
3538 static const TCGOpcode vecop_list_cmp[] = {
3539 INDEX_op_cmp_vec, 0
3542 GEN_CMP0(ceq, TCG_COND_EQ)
3543 GEN_CMP0(cle, TCG_COND_LE)
3544 GEN_CMP0(cge, TCG_COND_GE)
3545 GEN_CMP0(clt, TCG_COND_LT)
3546 GEN_CMP0(cgt, TCG_COND_GT)
3548 #undef GEN_CMP0
3550 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3552 tcg_gen_vec_sar8i_i64(a, a, shift);
3553 tcg_gen_vec_add8_i64(d, d, a);
3556 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3558 tcg_gen_vec_sar16i_i64(a, a, shift);
3559 tcg_gen_vec_add16_i64(d, d, a);
3562 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3564 tcg_gen_sari_i32(a, a, shift);
3565 tcg_gen_add_i32(d, d, a);
3568 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3570 tcg_gen_sari_i64(a, a, shift);
3571 tcg_gen_add_i64(d, d, a);
3574 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3576 tcg_gen_sari_vec(vece, a, a, sh);
3577 tcg_gen_add_vec(vece, d, d, a);
3580 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3581 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3583 static const TCGOpcode vecop_list[] = {
3584 INDEX_op_sari_vec, INDEX_op_add_vec, 0
3586 static const GVecGen2i ops[4] = {
3587 { .fni8 = gen_ssra8_i64,
3588 .fniv = gen_ssra_vec,
3589 .fno = gen_helper_gvec_ssra_b,
3590 .load_dest = true,
3591 .opt_opc = vecop_list,
3592 .vece = MO_8 },
3593 { .fni8 = gen_ssra16_i64,
3594 .fniv = gen_ssra_vec,
3595 .fno = gen_helper_gvec_ssra_h,
3596 .load_dest = true,
3597 .opt_opc = vecop_list,
3598 .vece = MO_16 },
3599 { .fni4 = gen_ssra32_i32,
3600 .fniv = gen_ssra_vec,
3601 .fno = gen_helper_gvec_ssra_s,
3602 .load_dest = true,
3603 .opt_opc = vecop_list,
3604 .vece = MO_32 },
3605 { .fni8 = gen_ssra64_i64,
3606 .fniv = gen_ssra_vec,
3607 .fno = gen_helper_gvec_ssra_b,
3608 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3609 .opt_opc = vecop_list,
3610 .load_dest = true,
3611 .vece = MO_64 },
3614 /* tszimm encoding produces immediates in the range [1..esize]. */
3615 tcg_debug_assert(shift > 0);
3616 tcg_debug_assert(shift <= (8 << vece));
3619 * Shifts larger than the element size are architecturally valid.
3620 * Signed results in all sign bits.
3622 shift = MIN(shift, (8 << vece) - 1);
3623 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3626 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3628 tcg_gen_vec_shr8i_i64(a, a, shift);
3629 tcg_gen_vec_add8_i64(d, d, a);
3632 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3634 tcg_gen_vec_shr16i_i64(a, a, shift);
3635 tcg_gen_vec_add16_i64(d, d, a);
3638 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3640 tcg_gen_shri_i32(a, a, shift);
3641 tcg_gen_add_i32(d, d, a);
3644 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3646 tcg_gen_shri_i64(a, a, shift);
3647 tcg_gen_add_i64(d, d, a);
3650 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3652 tcg_gen_shri_vec(vece, a, a, sh);
3653 tcg_gen_add_vec(vece, d, d, a);
3656 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3657 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3659 static const TCGOpcode vecop_list[] = {
3660 INDEX_op_shri_vec, INDEX_op_add_vec, 0
3662 static const GVecGen2i ops[4] = {
3663 { .fni8 = gen_usra8_i64,
3664 .fniv = gen_usra_vec,
3665 .fno = gen_helper_gvec_usra_b,
3666 .load_dest = true,
3667 .opt_opc = vecop_list,
3668 .vece = MO_8, },
3669 { .fni8 = gen_usra16_i64,
3670 .fniv = gen_usra_vec,
3671 .fno = gen_helper_gvec_usra_h,
3672 .load_dest = true,
3673 .opt_opc = vecop_list,
3674 .vece = MO_16, },
3675 { .fni4 = gen_usra32_i32,
3676 .fniv = gen_usra_vec,
3677 .fno = gen_helper_gvec_usra_s,
3678 .load_dest = true,
3679 .opt_opc = vecop_list,
3680 .vece = MO_32, },
3681 { .fni8 = gen_usra64_i64,
3682 .fniv = gen_usra_vec,
3683 .fno = gen_helper_gvec_usra_d,
3684 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3685 .load_dest = true,
3686 .opt_opc = vecop_list,
3687 .vece = MO_64, },
3690 /* tszimm encoding produces immediates in the range [1..esize]. */
3691 tcg_debug_assert(shift > 0);
3692 tcg_debug_assert(shift <= (8 << vece));
3695 * Shifts larger than the element size are architecturally valid.
3696 * Unsigned results in all zeros as input to accumulate: nop.
3698 if (shift < (8 << vece)) {
3699 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3700 } else {
3701 /* Nop, but we do need to clear the tail. */
3702 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3707 * Shift one less than the requested amount, and the low bit is
3708 * the rounding bit. For the 8 and 16-bit operations, because we
3709 * mask the low bit, we can perform a normal integer shift instead
3710 * of a vector shift.
3712 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3714 TCGv_i64 t = tcg_temp_new_i64();
3716 tcg_gen_shri_i64(t, a, sh - 1);
3717 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3718 tcg_gen_vec_sar8i_i64(d, a, sh);
3719 tcg_gen_vec_add8_i64(d, d, t);
3720 tcg_temp_free_i64(t);
3723 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3725 TCGv_i64 t = tcg_temp_new_i64();
3727 tcg_gen_shri_i64(t, a, sh - 1);
3728 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3729 tcg_gen_vec_sar16i_i64(d, a, sh);
3730 tcg_gen_vec_add16_i64(d, d, t);
3731 tcg_temp_free_i64(t);
3734 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3736 TCGv_i32 t = tcg_temp_new_i32();
3738 tcg_gen_extract_i32(t, a, sh - 1, 1);
3739 tcg_gen_sari_i32(d, a, sh);
3740 tcg_gen_add_i32(d, d, t);
3741 tcg_temp_free_i32(t);
3744 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3746 TCGv_i64 t = tcg_temp_new_i64();
3748 tcg_gen_extract_i64(t, a, sh - 1, 1);
3749 tcg_gen_sari_i64(d, a, sh);
3750 tcg_gen_add_i64(d, d, t);
3751 tcg_temp_free_i64(t);
3754 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3756 TCGv_vec t = tcg_temp_new_vec_matching(d);
3757 TCGv_vec ones = tcg_temp_new_vec_matching(d);
3759 tcg_gen_shri_vec(vece, t, a, sh - 1);
3760 tcg_gen_dupi_vec(vece, ones, 1);
3761 tcg_gen_and_vec(vece, t, t, ones);
3762 tcg_gen_sari_vec(vece, d, a, sh);
3763 tcg_gen_add_vec(vece, d, d, t);
3765 tcg_temp_free_vec(t);
3766 tcg_temp_free_vec(ones);
3769 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3770 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3772 static const TCGOpcode vecop_list[] = {
3773 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3775 static const GVecGen2i ops[4] = {
3776 { .fni8 = gen_srshr8_i64,
3777 .fniv = gen_srshr_vec,
3778 .fno = gen_helper_gvec_srshr_b,
3779 .opt_opc = vecop_list,
3780 .vece = MO_8 },
3781 { .fni8 = gen_srshr16_i64,
3782 .fniv = gen_srshr_vec,
3783 .fno = gen_helper_gvec_srshr_h,
3784 .opt_opc = vecop_list,
3785 .vece = MO_16 },
3786 { .fni4 = gen_srshr32_i32,
3787 .fniv = gen_srshr_vec,
3788 .fno = gen_helper_gvec_srshr_s,
3789 .opt_opc = vecop_list,
3790 .vece = MO_32 },
3791 { .fni8 = gen_srshr64_i64,
3792 .fniv = gen_srshr_vec,
3793 .fno = gen_helper_gvec_srshr_d,
3794 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3795 .opt_opc = vecop_list,
3796 .vece = MO_64 },
3799 /* tszimm encoding produces immediates in the range [1..esize] */
3800 tcg_debug_assert(shift > 0);
3801 tcg_debug_assert(shift <= (8 << vece));
3803 if (shift == (8 << vece)) {
3805 * Shifts larger than the element size are architecturally valid.
3806 * Signed results in all sign bits. With rounding, this produces
3807 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3808 * I.e. always zero.
3810 tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3811 } else {
3812 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3816 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3818 TCGv_i64 t = tcg_temp_new_i64();
3820 gen_srshr8_i64(t, a, sh);
3821 tcg_gen_vec_add8_i64(d, d, t);
3822 tcg_temp_free_i64(t);
3825 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3827 TCGv_i64 t = tcg_temp_new_i64();
3829 gen_srshr16_i64(t, a, sh);
3830 tcg_gen_vec_add16_i64(d, d, t);
3831 tcg_temp_free_i64(t);
3834 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3836 TCGv_i32 t = tcg_temp_new_i32();
3838 gen_srshr32_i32(t, a, sh);
3839 tcg_gen_add_i32(d, d, t);
3840 tcg_temp_free_i32(t);
3843 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3845 TCGv_i64 t = tcg_temp_new_i64();
3847 gen_srshr64_i64(t, a, sh);
3848 tcg_gen_add_i64(d, d, t);
3849 tcg_temp_free_i64(t);
3852 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3854 TCGv_vec t = tcg_temp_new_vec_matching(d);
3856 gen_srshr_vec(vece, t, a, sh);
3857 tcg_gen_add_vec(vece, d, d, t);
3858 tcg_temp_free_vec(t);
3861 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3862 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3864 static const TCGOpcode vecop_list[] = {
3865 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3867 static const GVecGen2i ops[4] = {
3868 { .fni8 = gen_srsra8_i64,
3869 .fniv = gen_srsra_vec,
3870 .fno = gen_helper_gvec_srsra_b,
3871 .opt_opc = vecop_list,
3872 .load_dest = true,
3873 .vece = MO_8 },
3874 { .fni8 = gen_srsra16_i64,
3875 .fniv = gen_srsra_vec,
3876 .fno = gen_helper_gvec_srsra_h,
3877 .opt_opc = vecop_list,
3878 .load_dest = true,
3879 .vece = MO_16 },
3880 { .fni4 = gen_srsra32_i32,
3881 .fniv = gen_srsra_vec,
3882 .fno = gen_helper_gvec_srsra_s,
3883 .opt_opc = vecop_list,
3884 .load_dest = true,
3885 .vece = MO_32 },
3886 { .fni8 = gen_srsra64_i64,
3887 .fniv = gen_srsra_vec,
3888 .fno = gen_helper_gvec_srsra_d,
3889 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3890 .opt_opc = vecop_list,
3891 .load_dest = true,
3892 .vece = MO_64 },
3895 /* tszimm encoding produces immediates in the range [1..esize] */
3896 tcg_debug_assert(shift > 0);
3897 tcg_debug_assert(shift <= (8 << vece));
3900 * Shifts larger than the element size are architecturally valid.
3901 * Signed results in all sign bits. With rounding, this produces
3902 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3903 * I.e. always zero. With accumulation, this leaves D unchanged.
3905 if (shift == (8 << vece)) {
3906 /* Nop, but we do need to clear the tail. */
3907 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3908 } else {
3909 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3913 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3915 TCGv_i64 t = tcg_temp_new_i64();
3917 tcg_gen_shri_i64(t, a, sh - 1);
3918 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3919 tcg_gen_vec_shr8i_i64(d, a, sh);
3920 tcg_gen_vec_add8_i64(d, d, t);
3921 tcg_temp_free_i64(t);
3924 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3926 TCGv_i64 t = tcg_temp_new_i64();
3928 tcg_gen_shri_i64(t, a, sh - 1);
3929 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3930 tcg_gen_vec_shr16i_i64(d, a, sh);
3931 tcg_gen_vec_add16_i64(d, d, t);
3932 tcg_temp_free_i64(t);
3935 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3937 TCGv_i32 t = tcg_temp_new_i32();
3939 tcg_gen_extract_i32(t, a, sh - 1, 1);
3940 tcg_gen_shri_i32(d, a, sh);
3941 tcg_gen_add_i32(d, d, t);
3942 tcg_temp_free_i32(t);
3945 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3947 TCGv_i64 t = tcg_temp_new_i64();
3949 tcg_gen_extract_i64(t, a, sh - 1, 1);
3950 tcg_gen_shri_i64(d, a, sh);
3951 tcg_gen_add_i64(d, d, t);
3952 tcg_temp_free_i64(t);
3955 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
3957 TCGv_vec t = tcg_temp_new_vec_matching(d);
3958 TCGv_vec ones = tcg_temp_new_vec_matching(d);
3960 tcg_gen_shri_vec(vece, t, a, shift - 1);
3961 tcg_gen_dupi_vec(vece, ones, 1);
3962 tcg_gen_and_vec(vece, t, t, ones);
3963 tcg_gen_shri_vec(vece, d, a, shift);
3964 tcg_gen_add_vec(vece, d, d, t);
3966 tcg_temp_free_vec(t);
3967 tcg_temp_free_vec(ones);
3970 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3971 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3973 static const TCGOpcode vecop_list[] = {
3974 INDEX_op_shri_vec, INDEX_op_add_vec, 0
3976 static const GVecGen2i ops[4] = {
3977 { .fni8 = gen_urshr8_i64,
3978 .fniv = gen_urshr_vec,
3979 .fno = gen_helper_gvec_urshr_b,
3980 .opt_opc = vecop_list,
3981 .vece = MO_8 },
3982 { .fni8 = gen_urshr16_i64,
3983 .fniv = gen_urshr_vec,
3984 .fno = gen_helper_gvec_urshr_h,
3985 .opt_opc = vecop_list,
3986 .vece = MO_16 },
3987 { .fni4 = gen_urshr32_i32,
3988 .fniv = gen_urshr_vec,
3989 .fno = gen_helper_gvec_urshr_s,
3990 .opt_opc = vecop_list,
3991 .vece = MO_32 },
3992 { .fni8 = gen_urshr64_i64,
3993 .fniv = gen_urshr_vec,
3994 .fno = gen_helper_gvec_urshr_d,
3995 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3996 .opt_opc = vecop_list,
3997 .vece = MO_64 },
4000 /* tszimm encoding produces immediates in the range [1..esize] */
4001 tcg_debug_assert(shift > 0);
4002 tcg_debug_assert(shift <= (8 << vece));
4004 if (shift == (8 << vece)) {
4006 * Shifts larger than the element size are architecturally valid.
4007 * Unsigned results in zero. With rounding, this produces a
4008 * copy of the most significant bit.
4010 tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
4011 } else {
4012 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4016 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4018 TCGv_i64 t = tcg_temp_new_i64();
4020 if (sh == 8) {
4021 tcg_gen_vec_shr8i_i64(t, a, 7);
4022 } else {
4023 gen_urshr8_i64(t, a, sh);
4025 tcg_gen_vec_add8_i64(d, d, t);
4026 tcg_temp_free_i64(t);
4029 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4031 TCGv_i64 t = tcg_temp_new_i64();
4033 if (sh == 16) {
4034 tcg_gen_vec_shr16i_i64(t, a, 15);
4035 } else {
4036 gen_urshr16_i64(t, a, sh);
4038 tcg_gen_vec_add16_i64(d, d, t);
4039 tcg_temp_free_i64(t);
4042 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
4044 TCGv_i32 t = tcg_temp_new_i32();
4046 if (sh == 32) {
4047 tcg_gen_shri_i32(t, a, 31);
4048 } else {
4049 gen_urshr32_i32(t, a, sh);
4051 tcg_gen_add_i32(d, d, t);
4052 tcg_temp_free_i32(t);
4055 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4057 TCGv_i64 t = tcg_temp_new_i64();
4059 if (sh == 64) {
4060 tcg_gen_shri_i64(t, a, 63);
4061 } else {
4062 gen_urshr64_i64(t, a, sh);
4064 tcg_gen_add_i64(d, d, t);
4065 tcg_temp_free_i64(t);
4068 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4070 TCGv_vec t = tcg_temp_new_vec_matching(d);
4072 if (sh == (8 << vece)) {
4073 tcg_gen_shri_vec(vece, t, a, sh - 1);
4074 } else {
4075 gen_urshr_vec(vece, t, a, sh);
4077 tcg_gen_add_vec(vece, d, d, t);
4078 tcg_temp_free_vec(t);
4081 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4082 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4084 static const TCGOpcode vecop_list[] = {
4085 INDEX_op_shri_vec, INDEX_op_add_vec, 0
4087 static const GVecGen2i ops[4] = {
4088 { .fni8 = gen_ursra8_i64,
4089 .fniv = gen_ursra_vec,
4090 .fno = gen_helper_gvec_ursra_b,
4091 .opt_opc = vecop_list,
4092 .load_dest = true,
4093 .vece = MO_8 },
4094 { .fni8 = gen_ursra16_i64,
4095 .fniv = gen_ursra_vec,
4096 .fno = gen_helper_gvec_ursra_h,
4097 .opt_opc = vecop_list,
4098 .load_dest = true,
4099 .vece = MO_16 },
4100 { .fni4 = gen_ursra32_i32,
4101 .fniv = gen_ursra_vec,
4102 .fno = gen_helper_gvec_ursra_s,
4103 .opt_opc = vecop_list,
4104 .load_dest = true,
4105 .vece = MO_32 },
4106 { .fni8 = gen_ursra64_i64,
4107 .fniv = gen_ursra_vec,
4108 .fno = gen_helper_gvec_ursra_d,
4109 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4110 .opt_opc = vecop_list,
4111 .load_dest = true,
4112 .vece = MO_64 },
4115 /* tszimm encoding produces immediates in the range [1..esize] */
4116 tcg_debug_assert(shift > 0);
4117 tcg_debug_assert(shift <= (8 << vece));
4119 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4122 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4124 uint64_t mask = dup_const(MO_8, 0xff >> shift);
4125 TCGv_i64 t = tcg_temp_new_i64();
4127 tcg_gen_shri_i64(t, a, shift);
4128 tcg_gen_andi_i64(t, t, mask);
4129 tcg_gen_andi_i64(d, d, ~mask);
4130 tcg_gen_or_i64(d, d, t);
4131 tcg_temp_free_i64(t);
4134 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4136 uint64_t mask = dup_const(MO_16, 0xffff >> shift);
4137 TCGv_i64 t = tcg_temp_new_i64();
4139 tcg_gen_shri_i64(t, a, shift);
4140 tcg_gen_andi_i64(t, t, mask);
4141 tcg_gen_andi_i64(d, d, ~mask);
4142 tcg_gen_or_i64(d, d, t);
4143 tcg_temp_free_i64(t);
4146 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4148 tcg_gen_shri_i32(a, a, shift);
4149 tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
4152 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4154 tcg_gen_shri_i64(a, a, shift);
4155 tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
4158 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4160 TCGv_vec t = tcg_temp_new_vec_matching(d);
4161 TCGv_vec m = tcg_temp_new_vec_matching(d);
4163 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
4164 tcg_gen_shri_vec(vece, t, a, sh);
4165 tcg_gen_and_vec(vece, d, d, m);
4166 tcg_gen_or_vec(vece, d, d, t);
4168 tcg_temp_free_vec(t);
4169 tcg_temp_free_vec(m);
4172 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4173 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4175 static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
4176 const GVecGen2i ops[4] = {
4177 { .fni8 = gen_shr8_ins_i64,
4178 .fniv = gen_shr_ins_vec,
4179 .fno = gen_helper_gvec_sri_b,
4180 .load_dest = true,
4181 .opt_opc = vecop_list,
4182 .vece = MO_8 },
4183 { .fni8 = gen_shr16_ins_i64,
4184 .fniv = gen_shr_ins_vec,
4185 .fno = gen_helper_gvec_sri_h,
4186 .load_dest = true,
4187 .opt_opc = vecop_list,
4188 .vece = MO_16 },
4189 { .fni4 = gen_shr32_ins_i32,
4190 .fniv = gen_shr_ins_vec,
4191 .fno = gen_helper_gvec_sri_s,
4192 .load_dest = true,
4193 .opt_opc = vecop_list,
4194 .vece = MO_32 },
4195 { .fni8 = gen_shr64_ins_i64,
4196 .fniv = gen_shr_ins_vec,
4197 .fno = gen_helper_gvec_sri_d,
4198 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4199 .load_dest = true,
4200 .opt_opc = vecop_list,
4201 .vece = MO_64 },
4204 /* tszimm encoding produces immediates in the range [1..esize]. */
4205 tcg_debug_assert(shift > 0);
4206 tcg_debug_assert(shift <= (8 << vece));
4208 /* Shift of esize leaves destination unchanged. */
4209 if (shift < (8 << vece)) {
4210 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4211 } else {
4212 /* Nop, but we do need to clear the tail. */
4213 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
4217 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4219 uint64_t mask = dup_const(MO_8, 0xff << shift);
4220 TCGv_i64 t = tcg_temp_new_i64();
4222 tcg_gen_shli_i64(t, a, shift);
4223 tcg_gen_andi_i64(t, t, mask);
4224 tcg_gen_andi_i64(d, d, ~mask);
4225 tcg_gen_or_i64(d, d, t);
4226 tcg_temp_free_i64(t);
4229 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4231 uint64_t mask = dup_const(MO_16, 0xffff << shift);
4232 TCGv_i64 t = tcg_temp_new_i64();
4234 tcg_gen_shli_i64(t, a, shift);
4235 tcg_gen_andi_i64(t, t, mask);
4236 tcg_gen_andi_i64(d, d, ~mask);
4237 tcg_gen_or_i64(d, d, t);
4238 tcg_temp_free_i64(t);
4241 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4243 tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
4246 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4248 tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
4251 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4253 TCGv_vec t = tcg_temp_new_vec_matching(d);
4254 TCGv_vec m = tcg_temp_new_vec_matching(d);
4256 tcg_gen_shli_vec(vece, t, a, sh);
4257 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
4258 tcg_gen_and_vec(vece, d, d, m);
4259 tcg_gen_or_vec(vece, d, d, t);
4261 tcg_temp_free_vec(t);
4262 tcg_temp_free_vec(m);
4265 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4266 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4268 static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
4269 const GVecGen2i ops[4] = {
4270 { .fni8 = gen_shl8_ins_i64,
4271 .fniv = gen_shl_ins_vec,
4272 .fno = gen_helper_gvec_sli_b,
4273 .load_dest = true,
4274 .opt_opc = vecop_list,
4275 .vece = MO_8 },
4276 { .fni8 = gen_shl16_ins_i64,
4277 .fniv = gen_shl_ins_vec,
4278 .fno = gen_helper_gvec_sli_h,
4279 .load_dest = true,
4280 .opt_opc = vecop_list,
4281 .vece = MO_16 },
4282 { .fni4 = gen_shl32_ins_i32,
4283 .fniv = gen_shl_ins_vec,
4284 .fno = gen_helper_gvec_sli_s,
4285 .load_dest = true,
4286 .opt_opc = vecop_list,
4287 .vece = MO_32 },
4288 { .fni8 = gen_shl64_ins_i64,
4289 .fniv = gen_shl_ins_vec,
4290 .fno = gen_helper_gvec_sli_d,
4291 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4292 .load_dest = true,
4293 .opt_opc = vecop_list,
4294 .vece = MO_64 },
4297 /* tszimm encoding produces immediates in the range [0..esize-1]. */
4298 tcg_debug_assert(shift >= 0);
4299 tcg_debug_assert(shift < (8 << vece));
4301 if (shift == 0) {
4302 tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
4303 } else {
4304 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4308 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4310 gen_helper_neon_mul_u8(a, a, b);
4311 gen_helper_neon_add_u8(d, d, a);
4314 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4316 gen_helper_neon_mul_u8(a, a, b);
4317 gen_helper_neon_sub_u8(d, d, a);
4320 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4322 gen_helper_neon_mul_u16(a, a, b);
4323 gen_helper_neon_add_u16(d, d, a);
4326 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4328 gen_helper_neon_mul_u16(a, a, b);
4329 gen_helper_neon_sub_u16(d, d, a);
4332 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4334 tcg_gen_mul_i32(a, a, b);
4335 tcg_gen_add_i32(d, d, a);
4338 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4340 tcg_gen_mul_i32(a, a, b);
4341 tcg_gen_sub_i32(d, d, a);
4344 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4346 tcg_gen_mul_i64(a, a, b);
4347 tcg_gen_add_i64(d, d, a);
4350 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4352 tcg_gen_mul_i64(a, a, b);
4353 tcg_gen_sub_i64(d, d, a);
4356 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4358 tcg_gen_mul_vec(vece, a, a, b);
4359 tcg_gen_add_vec(vece, d, d, a);
4362 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4364 tcg_gen_mul_vec(vece, a, a, b);
4365 tcg_gen_sub_vec(vece, d, d, a);
4368 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
4369 * these tables are shared with AArch64 which does support them.
4371 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4372 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4374 static const TCGOpcode vecop_list[] = {
4375 INDEX_op_mul_vec, INDEX_op_add_vec, 0
4377 static const GVecGen3 ops[4] = {
4378 { .fni4 = gen_mla8_i32,
4379 .fniv = gen_mla_vec,
4380 .load_dest = true,
4381 .opt_opc = vecop_list,
4382 .vece = MO_8 },
4383 { .fni4 = gen_mla16_i32,
4384 .fniv = gen_mla_vec,
4385 .load_dest = true,
4386 .opt_opc = vecop_list,
4387 .vece = MO_16 },
4388 { .fni4 = gen_mla32_i32,
4389 .fniv = gen_mla_vec,
4390 .load_dest = true,
4391 .opt_opc = vecop_list,
4392 .vece = MO_32 },
4393 { .fni8 = gen_mla64_i64,
4394 .fniv = gen_mla_vec,
4395 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4396 .load_dest = true,
4397 .opt_opc = vecop_list,
4398 .vece = MO_64 },
4400 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4403 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4404 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4406 static const TCGOpcode vecop_list[] = {
4407 INDEX_op_mul_vec, INDEX_op_sub_vec, 0
4409 static const GVecGen3 ops[4] = {
4410 { .fni4 = gen_mls8_i32,
4411 .fniv = gen_mls_vec,
4412 .load_dest = true,
4413 .opt_opc = vecop_list,
4414 .vece = MO_8 },
4415 { .fni4 = gen_mls16_i32,
4416 .fniv = gen_mls_vec,
4417 .load_dest = true,
4418 .opt_opc = vecop_list,
4419 .vece = MO_16 },
4420 { .fni4 = gen_mls32_i32,
4421 .fniv = gen_mls_vec,
4422 .load_dest = true,
4423 .opt_opc = vecop_list,
4424 .vece = MO_32 },
4425 { .fni8 = gen_mls64_i64,
4426 .fniv = gen_mls_vec,
4427 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4428 .load_dest = true,
4429 .opt_opc = vecop_list,
4430 .vece = MO_64 },
4432 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4435 /* CMTST : test is "if (X & Y != 0)". */
4436 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4438 tcg_gen_and_i32(d, a, b);
4439 tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
4440 tcg_gen_neg_i32(d, d);
4443 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4445 tcg_gen_and_i64(d, a, b);
4446 tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
4447 tcg_gen_neg_i64(d, d);
4450 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4452 tcg_gen_and_vec(vece, d, a, b);
4453 tcg_gen_dupi_vec(vece, a, 0);
4454 tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
4457 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4458 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4460 static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
4461 static const GVecGen3 ops[4] = {
4462 { .fni4 = gen_helper_neon_tst_u8,
4463 .fniv = gen_cmtst_vec,
4464 .opt_opc = vecop_list,
4465 .vece = MO_8 },
4466 { .fni4 = gen_helper_neon_tst_u16,
4467 .fniv = gen_cmtst_vec,
4468 .opt_opc = vecop_list,
4469 .vece = MO_16 },
4470 { .fni4 = gen_cmtst_i32,
4471 .fniv = gen_cmtst_vec,
4472 .opt_opc = vecop_list,
4473 .vece = MO_32 },
4474 { .fni8 = gen_cmtst_i64,
4475 .fniv = gen_cmtst_vec,
4476 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4477 .opt_opc = vecop_list,
4478 .vece = MO_64 },
4480 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4483 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4485 TCGv_i32 lval = tcg_temp_new_i32();
4486 TCGv_i32 rval = tcg_temp_new_i32();
4487 TCGv_i32 lsh = tcg_temp_new_i32();
4488 TCGv_i32 rsh = tcg_temp_new_i32();
4489 TCGv_i32 zero = tcg_const_i32(0);
4490 TCGv_i32 max = tcg_const_i32(32);
4493 * Rely on the TCG guarantee that out of range shifts produce
4494 * unspecified results, not undefined behaviour (i.e. no trap).
4495 * Discard out-of-range results after the fact.
4497 tcg_gen_ext8s_i32(lsh, shift);
4498 tcg_gen_neg_i32(rsh, lsh);
4499 tcg_gen_shl_i32(lval, src, lsh);
4500 tcg_gen_shr_i32(rval, src, rsh);
4501 tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
4502 tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
4504 tcg_temp_free_i32(lval);
4505 tcg_temp_free_i32(rval);
4506 tcg_temp_free_i32(lsh);
4507 tcg_temp_free_i32(rsh);
4508 tcg_temp_free_i32(zero);
4509 tcg_temp_free_i32(max);
4512 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4514 TCGv_i64 lval = tcg_temp_new_i64();
4515 TCGv_i64 rval = tcg_temp_new_i64();
4516 TCGv_i64 lsh = tcg_temp_new_i64();
4517 TCGv_i64 rsh = tcg_temp_new_i64();
4518 TCGv_i64 zero = tcg_const_i64(0);
4519 TCGv_i64 max = tcg_const_i64(64);
4522 * Rely on the TCG guarantee that out of range shifts produce
4523 * unspecified results, not undefined behaviour (i.e. no trap).
4524 * Discard out-of-range results after the fact.
4526 tcg_gen_ext8s_i64(lsh, shift);
4527 tcg_gen_neg_i64(rsh, lsh);
4528 tcg_gen_shl_i64(lval, src, lsh);
4529 tcg_gen_shr_i64(rval, src, rsh);
4530 tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
4531 tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
4533 tcg_temp_free_i64(lval);
4534 tcg_temp_free_i64(rval);
4535 tcg_temp_free_i64(lsh);
4536 tcg_temp_free_i64(rsh);
4537 tcg_temp_free_i64(zero);
4538 tcg_temp_free_i64(max);
4541 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
4542 TCGv_vec src, TCGv_vec shift)
4544 TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4545 TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4546 TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4547 TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4548 TCGv_vec msk, max;
4550 tcg_gen_neg_vec(vece, rsh, shift);
4551 if (vece == MO_8) {
4552 tcg_gen_mov_vec(lsh, shift);
4553 } else {
4554 msk = tcg_temp_new_vec_matching(dst);
4555 tcg_gen_dupi_vec(vece, msk, 0xff);
4556 tcg_gen_and_vec(vece, lsh, shift, msk);
4557 tcg_gen_and_vec(vece, rsh, rsh, msk);
4558 tcg_temp_free_vec(msk);
4562 * Rely on the TCG guarantee that out of range shifts produce
4563 * unspecified results, not undefined behaviour (i.e. no trap).
4564 * Discard out-of-range results after the fact.
4566 tcg_gen_shlv_vec(vece, lval, src, lsh);
4567 tcg_gen_shrv_vec(vece, rval, src, rsh);
4569 max = tcg_temp_new_vec_matching(dst);
4570 tcg_gen_dupi_vec(vece, max, 8 << vece);
4573 * The choice of LT (signed) and GEU (unsigned) are biased toward
4574 * the instructions of the x86_64 host. For MO_8, the whole byte
4575 * is significant so we must use an unsigned compare; otherwise we
4576 * have already masked to a byte and so a signed compare works.
4577 * Other tcg hosts have a full set of comparisons and do not care.
4579 if (vece == MO_8) {
4580 tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
4581 tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
4582 tcg_gen_andc_vec(vece, lval, lval, lsh);
4583 tcg_gen_andc_vec(vece, rval, rval, rsh);
4584 } else {
4585 tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
4586 tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
4587 tcg_gen_and_vec(vece, lval, lval, lsh);
4588 tcg_gen_and_vec(vece, rval, rval, rsh);
4590 tcg_gen_or_vec(vece, dst, lval, rval);
4592 tcg_temp_free_vec(max);
4593 tcg_temp_free_vec(lval);
4594 tcg_temp_free_vec(rval);
4595 tcg_temp_free_vec(lsh);
4596 tcg_temp_free_vec(rsh);
4599 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4600 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4602 static const TCGOpcode vecop_list[] = {
4603 INDEX_op_neg_vec, INDEX_op_shlv_vec,
4604 INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4606 static const GVecGen3 ops[4] = {
4607 { .fniv = gen_ushl_vec,
4608 .fno = gen_helper_gvec_ushl_b,
4609 .opt_opc = vecop_list,
4610 .vece = MO_8 },
4611 { .fniv = gen_ushl_vec,
4612 .fno = gen_helper_gvec_ushl_h,
4613 .opt_opc = vecop_list,
4614 .vece = MO_16 },
4615 { .fni4 = gen_ushl_i32,
4616 .fniv = gen_ushl_vec,
4617 .opt_opc = vecop_list,
4618 .vece = MO_32 },
4619 { .fni8 = gen_ushl_i64,
4620 .fniv = gen_ushl_vec,
4621 .opt_opc = vecop_list,
4622 .vece = MO_64 },
4624 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4627 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4629 TCGv_i32 lval = tcg_temp_new_i32();
4630 TCGv_i32 rval = tcg_temp_new_i32();
4631 TCGv_i32 lsh = tcg_temp_new_i32();
4632 TCGv_i32 rsh = tcg_temp_new_i32();
4633 TCGv_i32 zero = tcg_const_i32(0);
4634 TCGv_i32 max = tcg_const_i32(31);
4637 * Rely on the TCG guarantee that out of range shifts produce
4638 * unspecified results, not undefined behaviour (i.e. no trap).
4639 * Discard out-of-range results after the fact.
4641 tcg_gen_ext8s_i32(lsh, shift);
4642 tcg_gen_neg_i32(rsh, lsh);
4643 tcg_gen_shl_i32(lval, src, lsh);
4644 tcg_gen_umin_i32(rsh, rsh, max);
4645 tcg_gen_sar_i32(rval, src, rsh);
4646 tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4647 tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4649 tcg_temp_free_i32(lval);
4650 tcg_temp_free_i32(rval);
4651 tcg_temp_free_i32(lsh);
4652 tcg_temp_free_i32(rsh);
4653 tcg_temp_free_i32(zero);
4654 tcg_temp_free_i32(max);
4657 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4659 TCGv_i64 lval = tcg_temp_new_i64();
4660 TCGv_i64 rval = tcg_temp_new_i64();
4661 TCGv_i64 lsh = tcg_temp_new_i64();
4662 TCGv_i64 rsh = tcg_temp_new_i64();
4663 TCGv_i64 zero = tcg_const_i64(0);
4664 TCGv_i64 max = tcg_const_i64(63);
4667 * Rely on the TCG guarantee that out of range shifts produce
4668 * unspecified results, not undefined behaviour (i.e. no trap).
4669 * Discard out-of-range results after the fact.
4671 tcg_gen_ext8s_i64(lsh, shift);
4672 tcg_gen_neg_i64(rsh, lsh);
4673 tcg_gen_shl_i64(lval, src, lsh);
4674 tcg_gen_umin_i64(rsh, rsh, max);
4675 tcg_gen_sar_i64(rval, src, rsh);
4676 tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4677 tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4679 tcg_temp_free_i64(lval);
4680 tcg_temp_free_i64(rval);
4681 tcg_temp_free_i64(lsh);
4682 tcg_temp_free_i64(rsh);
4683 tcg_temp_free_i64(zero);
4684 tcg_temp_free_i64(max);
4687 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4688 TCGv_vec src, TCGv_vec shift)
4690 TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4691 TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4692 TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4693 TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4694 TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4697 * Rely on the TCG guarantee that out of range shifts produce
4698 * unspecified results, not undefined behaviour (i.e. no trap).
4699 * Discard out-of-range results after the fact.
4701 tcg_gen_neg_vec(vece, rsh, shift);
4702 if (vece == MO_8) {
4703 tcg_gen_mov_vec(lsh, shift);
4704 } else {
4705 tcg_gen_dupi_vec(vece, tmp, 0xff);
4706 tcg_gen_and_vec(vece, lsh, shift, tmp);
4707 tcg_gen_and_vec(vece, rsh, rsh, tmp);
4710 /* Bound rsh so out of bound right shift gets -1. */
4711 tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4712 tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4713 tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4715 tcg_gen_shlv_vec(vece, lval, src, lsh);
4716 tcg_gen_sarv_vec(vece, rval, src, rsh);
4718 /* Select in-bound left shift. */
4719 tcg_gen_andc_vec(vece, lval, lval, tmp);
4721 /* Select between left and right shift. */
4722 if (vece == MO_8) {
4723 tcg_gen_dupi_vec(vece, tmp, 0);
4724 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4725 } else {
4726 tcg_gen_dupi_vec(vece, tmp, 0x80);
4727 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4730 tcg_temp_free_vec(lval);
4731 tcg_temp_free_vec(rval);
4732 tcg_temp_free_vec(lsh);
4733 tcg_temp_free_vec(rsh);
4734 tcg_temp_free_vec(tmp);
4737 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4738 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4740 static const TCGOpcode vecop_list[] = {
4741 INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4742 INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4744 static const GVecGen3 ops[4] = {
4745 { .fniv = gen_sshl_vec,
4746 .fno = gen_helper_gvec_sshl_b,
4747 .opt_opc = vecop_list,
4748 .vece = MO_8 },
4749 { .fniv = gen_sshl_vec,
4750 .fno = gen_helper_gvec_sshl_h,
4751 .opt_opc = vecop_list,
4752 .vece = MO_16 },
4753 { .fni4 = gen_sshl_i32,
4754 .fniv = gen_sshl_vec,
4755 .opt_opc = vecop_list,
4756 .vece = MO_32 },
4757 { .fni8 = gen_sshl_i64,
4758 .fniv = gen_sshl_vec,
4759 .opt_opc = vecop_list,
4760 .vece = MO_64 },
4762 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4765 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4766 TCGv_vec a, TCGv_vec b)
4768 TCGv_vec x = tcg_temp_new_vec_matching(t);
4769 tcg_gen_add_vec(vece, x, a, b);
4770 tcg_gen_usadd_vec(vece, t, a, b);
4771 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4772 tcg_gen_or_vec(vece, sat, sat, x);
4773 tcg_temp_free_vec(x);
4776 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4777 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4779 static const TCGOpcode vecop_list[] = {
4780 INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4782 static const GVecGen4 ops[4] = {
4783 { .fniv = gen_uqadd_vec,
4784 .fno = gen_helper_gvec_uqadd_b,
4785 .write_aofs = true,
4786 .opt_opc = vecop_list,
4787 .vece = MO_8 },
4788 { .fniv = gen_uqadd_vec,
4789 .fno = gen_helper_gvec_uqadd_h,
4790 .write_aofs = true,
4791 .opt_opc = vecop_list,
4792 .vece = MO_16 },
4793 { .fniv = gen_uqadd_vec,
4794 .fno = gen_helper_gvec_uqadd_s,
4795 .write_aofs = true,
4796 .opt_opc = vecop_list,
4797 .vece = MO_32 },
4798 { .fniv = gen_uqadd_vec,
4799 .fno = gen_helper_gvec_uqadd_d,
4800 .write_aofs = true,
4801 .opt_opc = vecop_list,
4802 .vece = MO_64 },
4804 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4805 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4808 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4809 TCGv_vec a, TCGv_vec b)
4811 TCGv_vec x = tcg_temp_new_vec_matching(t);
4812 tcg_gen_add_vec(vece, x, a, b);
4813 tcg_gen_ssadd_vec(vece, t, a, b);
4814 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4815 tcg_gen_or_vec(vece, sat, sat, x);
4816 tcg_temp_free_vec(x);
4819 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4820 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4822 static const TCGOpcode vecop_list[] = {
4823 INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4825 static const GVecGen4 ops[4] = {
4826 { .fniv = gen_sqadd_vec,
4827 .fno = gen_helper_gvec_sqadd_b,
4828 .opt_opc = vecop_list,
4829 .write_aofs = true,
4830 .vece = MO_8 },
4831 { .fniv = gen_sqadd_vec,
4832 .fno = gen_helper_gvec_sqadd_h,
4833 .opt_opc = vecop_list,
4834 .write_aofs = true,
4835 .vece = MO_16 },
4836 { .fniv = gen_sqadd_vec,
4837 .fno = gen_helper_gvec_sqadd_s,
4838 .opt_opc = vecop_list,
4839 .write_aofs = true,
4840 .vece = MO_32 },
4841 { .fniv = gen_sqadd_vec,
4842 .fno = gen_helper_gvec_sqadd_d,
4843 .opt_opc = vecop_list,
4844 .write_aofs = true,
4845 .vece = MO_64 },
4847 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4848 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4851 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4852 TCGv_vec a, TCGv_vec b)
4854 TCGv_vec x = tcg_temp_new_vec_matching(t);
4855 tcg_gen_sub_vec(vece, x, a, b);
4856 tcg_gen_ussub_vec(vece, t, a, b);
4857 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4858 tcg_gen_or_vec(vece, sat, sat, x);
4859 tcg_temp_free_vec(x);
4862 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4863 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4865 static const TCGOpcode vecop_list[] = {
4866 INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4868 static const GVecGen4 ops[4] = {
4869 { .fniv = gen_uqsub_vec,
4870 .fno = gen_helper_gvec_uqsub_b,
4871 .opt_opc = vecop_list,
4872 .write_aofs = true,
4873 .vece = MO_8 },
4874 { .fniv = gen_uqsub_vec,
4875 .fno = gen_helper_gvec_uqsub_h,
4876 .opt_opc = vecop_list,
4877 .write_aofs = true,
4878 .vece = MO_16 },
4879 { .fniv = gen_uqsub_vec,
4880 .fno = gen_helper_gvec_uqsub_s,
4881 .opt_opc = vecop_list,
4882 .write_aofs = true,
4883 .vece = MO_32 },
4884 { .fniv = gen_uqsub_vec,
4885 .fno = gen_helper_gvec_uqsub_d,
4886 .opt_opc = vecop_list,
4887 .write_aofs = true,
4888 .vece = MO_64 },
4890 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4891 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4894 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4895 TCGv_vec a, TCGv_vec b)
4897 TCGv_vec x = tcg_temp_new_vec_matching(t);
4898 tcg_gen_sub_vec(vece, x, a, b);
4899 tcg_gen_sssub_vec(vece, t, a, b);
4900 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4901 tcg_gen_or_vec(vece, sat, sat, x);
4902 tcg_temp_free_vec(x);
4905 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4906 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4908 static const TCGOpcode vecop_list[] = {
4909 INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4911 static const GVecGen4 ops[4] = {
4912 { .fniv = gen_sqsub_vec,
4913 .fno = gen_helper_gvec_sqsub_b,
4914 .opt_opc = vecop_list,
4915 .write_aofs = true,
4916 .vece = MO_8 },
4917 { .fniv = gen_sqsub_vec,
4918 .fno = gen_helper_gvec_sqsub_h,
4919 .opt_opc = vecop_list,
4920 .write_aofs = true,
4921 .vece = MO_16 },
4922 { .fniv = gen_sqsub_vec,
4923 .fno = gen_helper_gvec_sqsub_s,
4924 .opt_opc = vecop_list,
4925 .write_aofs = true,
4926 .vece = MO_32 },
4927 { .fniv = gen_sqsub_vec,
4928 .fno = gen_helper_gvec_sqsub_d,
4929 .opt_opc = vecop_list,
4930 .write_aofs = true,
4931 .vece = MO_64 },
4933 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4934 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4937 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4939 TCGv_i32 t = tcg_temp_new_i32();
4941 tcg_gen_sub_i32(t, a, b);
4942 tcg_gen_sub_i32(d, b, a);
4943 tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
4944 tcg_temp_free_i32(t);
4947 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4949 TCGv_i64 t = tcg_temp_new_i64();
4951 tcg_gen_sub_i64(t, a, b);
4952 tcg_gen_sub_i64(d, b, a);
4953 tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
4954 tcg_temp_free_i64(t);
4957 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4959 TCGv_vec t = tcg_temp_new_vec_matching(d);
4961 tcg_gen_smin_vec(vece, t, a, b);
4962 tcg_gen_smax_vec(vece, d, a, b);
4963 tcg_gen_sub_vec(vece, d, d, t);
4964 tcg_temp_free_vec(t);
4967 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4968 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4970 static const TCGOpcode vecop_list[] = {
4971 INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4973 static const GVecGen3 ops[4] = {
4974 { .fniv = gen_sabd_vec,
4975 .fno = gen_helper_gvec_sabd_b,
4976 .opt_opc = vecop_list,
4977 .vece = MO_8 },
4978 { .fniv = gen_sabd_vec,
4979 .fno = gen_helper_gvec_sabd_h,
4980 .opt_opc = vecop_list,
4981 .vece = MO_16 },
4982 { .fni4 = gen_sabd_i32,
4983 .fniv = gen_sabd_vec,
4984 .fno = gen_helper_gvec_sabd_s,
4985 .opt_opc = vecop_list,
4986 .vece = MO_32 },
4987 { .fni8 = gen_sabd_i64,
4988 .fniv = gen_sabd_vec,
4989 .fno = gen_helper_gvec_sabd_d,
4990 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4991 .opt_opc = vecop_list,
4992 .vece = MO_64 },
4994 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4997 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4999 TCGv_i32 t = tcg_temp_new_i32();
5001 tcg_gen_sub_i32(t, a, b);
5002 tcg_gen_sub_i32(d, b, a);
5003 tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
5004 tcg_temp_free_i32(t);
5007 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5009 TCGv_i64 t = tcg_temp_new_i64();
5011 tcg_gen_sub_i64(t, a, b);
5012 tcg_gen_sub_i64(d, b, a);
5013 tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
5014 tcg_temp_free_i64(t);
5017 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5019 TCGv_vec t = tcg_temp_new_vec_matching(d);
5021 tcg_gen_umin_vec(vece, t, a, b);
5022 tcg_gen_umax_vec(vece, d, a, b);
5023 tcg_gen_sub_vec(vece, d, d, t);
5024 tcg_temp_free_vec(t);
5027 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5028 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5030 static const TCGOpcode vecop_list[] = {
5031 INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
5033 static const GVecGen3 ops[4] = {
5034 { .fniv = gen_uabd_vec,
5035 .fno = gen_helper_gvec_uabd_b,
5036 .opt_opc = vecop_list,
5037 .vece = MO_8 },
5038 { .fniv = gen_uabd_vec,
5039 .fno = gen_helper_gvec_uabd_h,
5040 .opt_opc = vecop_list,
5041 .vece = MO_16 },
5042 { .fni4 = gen_uabd_i32,
5043 .fniv = gen_uabd_vec,
5044 .fno = gen_helper_gvec_uabd_s,
5045 .opt_opc = vecop_list,
5046 .vece = MO_32 },
5047 { .fni8 = gen_uabd_i64,
5048 .fniv = gen_uabd_vec,
5049 .fno = gen_helper_gvec_uabd_d,
5050 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5051 .opt_opc = vecop_list,
5052 .vece = MO_64 },
5054 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5057 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5059 TCGv_i32 t = tcg_temp_new_i32();
5060 gen_sabd_i32(t, a, b);
5061 tcg_gen_add_i32(d, d, t);
5062 tcg_temp_free_i32(t);
5065 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5067 TCGv_i64 t = tcg_temp_new_i64();
5068 gen_sabd_i64(t, a, b);
5069 tcg_gen_add_i64(d, d, t);
5070 tcg_temp_free_i64(t);
5073 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5075 TCGv_vec t = tcg_temp_new_vec_matching(d);
5076 gen_sabd_vec(vece, t, a, b);
5077 tcg_gen_add_vec(vece, d, d, t);
5078 tcg_temp_free_vec(t);
5081 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5082 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5084 static const TCGOpcode vecop_list[] = {
5085 INDEX_op_sub_vec, INDEX_op_add_vec,
5086 INDEX_op_smin_vec, INDEX_op_smax_vec, 0
5088 static const GVecGen3 ops[4] = {
5089 { .fniv = gen_saba_vec,
5090 .fno = gen_helper_gvec_saba_b,
5091 .opt_opc = vecop_list,
5092 .load_dest = true,
5093 .vece = MO_8 },
5094 { .fniv = gen_saba_vec,
5095 .fno = gen_helper_gvec_saba_h,
5096 .opt_opc = vecop_list,
5097 .load_dest = true,
5098 .vece = MO_16 },
5099 { .fni4 = gen_saba_i32,
5100 .fniv = gen_saba_vec,
5101 .fno = gen_helper_gvec_saba_s,
5102 .opt_opc = vecop_list,
5103 .load_dest = true,
5104 .vece = MO_32 },
5105 { .fni8 = gen_saba_i64,
5106 .fniv = gen_saba_vec,
5107 .fno = gen_helper_gvec_saba_d,
5108 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5109 .opt_opc = vecop_list,
5110 .load_dest = true,
5111 .vece = MO_64 },
5113 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5116 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5118 TCGv_i32 t = tcg_temp_new_i32();
5119 gen_uabd_i32(t, a, b);
5120 tcg_gen_add_i32(d, d, t);
5121 tcg_temp_free_i32(t);
5124 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5126 TCGv_i64 t = tcg_temp_new_i64();
5127 gen_uabd_i64(t, a, b);
5128 tcg_gen_add_i64(d, d, t);
5129 tcg_temp_free_i64(t);
5132 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5134 TCGv_vec t = tcg_temp_new_vec_matching(d);
5135 gen_uabd_vec(vece, t, a, b);
5136 tcg_gen_add_vec(vece, d, d, t);
5137 tcg_temp_free_vec(t);
5140 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5141 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5143 static const TCGOpcode vecop_list[] = {
5144 INDEX_op_sub_vec, INDEX_op_add_vec,
5145 INDEX_op_umin_vec, INDEX_op_umax_vec, 0
5147 static const GVecGen3 ops[4] = {
5148 { .fniv = gen_uaba_vec,
5149 .fno = gen_helper_gvec_uaba_b,
5150 .opt_opc = vecop_list,
5151 .load_dest = true,
5152 .vece = MO_8 },
5153 { .fniv = gen_uaba_vec,
5154 .fno = gen_helper_gvec_uaba_h,
5155 .opt_opc = vecop_list,
5156 .load_dest = true,
5157 .vece = MO_16 },
5158 { .fni4 = gen_uaba_i32,
5159 .fniv = gen_uaba_vec,
5160 .fno = gen_helper_gvec_uaba_s,
5161 .opt_opc = vecop_list,
5162 .load_dest = true,
5163 .vece = MO_32 },
5164 { .fni8 = gen_uaba_i64,
5165 .fniv = gen_uaba_vec,
5166 .fno = gen_helper_gvec_uaba_d,
5167 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5168 .opt_opc = vecop_list,
5169 .load_dest = true,
5170 .vece = MO_64 },
5172 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5175 /* Translate a NEON data processing instruction. Return nonzero if the
5176 instruction is invalid.
5177 We process data in a mixture of 32-bit and 64-bit chunks.
5178 Mostly we use 32-bit chunks so we can use normal scalar instructions. */
5180 static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
5182 int op;
5183 int q;
5184 int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
5185 int size;
5186 int pass;
5187 int u;
5188 int vec_size;
5189 uint32_t imm;
5190 TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
5191 TCGv_ptr ptr1;
5192 TCGv_i64 tmp64;
5194 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
5195 return 1;
5198 /* FIXME: this access check should not take precedence over UNDEF
5199 * for invalid encodings; we will generate incorrect syndrome information
5200 * for attempts to execute invalid vfp/neon encodings with FP disabled.
5202 if (s->fp_excp_el) {
5203 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
5204 syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
5205 return 0;
5208 if (!s->vfp_enabled)
5209 return 1;
5210 q = (insn & (1 << 6)) != 0;
5211 u = (insn >> 24) & 1;
5212 VFP_DREG_D(rd, insn);
5213 VFP_DREG_N(rn, insn);
5214 VFP_DREG_M(rm, insn);
5215 size = (insn >> 20) & 3;
5216 vec_size = q ? 16 : 8;
5217 rd_ofs = neon_reg_offset(rd, 0);
5218 rn_ofs = neon_reg_offset(rn, 0);
5219 rm_ofs = neon_reg_offset(rm, 0);
5221 if ((insn & (1 << 23)) == 0) {
5222 /* Three register same length: handled by decodetree */
5223 return 1;
5224 } else if (insn & (1 << 4)) {
5225 /* Two registers and shift or reg and imm: handled by decodetree */
5226 return 1;
5227 } else { /* (insn & 0x00800010 == 0x00800000) */
5228 if (size != 3) {
5229 op = (insn >> 8) & 0xf;
5230 if ((insn & (1 << 6)) == 0) {
5231 /* Three registers of different lengths. */
5232 /* undefreq: bit 0 : UNDEF if size == 0
5233 * bit 1 : UNDEF if size == 1
5234 * bit 2 : UNDEF if size == 2
5235 * bit 3 : UNDEF if U == 1
5236 * Note that [2:0] set implies 'always UNDEF'
5238 int undefreq;
5239 /* prewiden, src1_wide, src2_wide, undefreq */
5240 static const int neon_3reg_wide[16][4] = {
5241 {0, 0, 0, 7}, /* VADDL: handled by decodetree */
5242 {0, 0, 0, 7}, /* VADDW: handled by decodetree */
5243 {0, 0, 0, 7}, /* VSUBL: handled by decodetree */
5244 {0, 0, 0, 7}, /* VSUBW: handled by decodetree */
5245 {0, 0, 0, 7}, /* VADDHN: handled by decodetree */
5246 {0, 0, 0, 7}, /* VABAL */
5247 {0, 0, 0, 7}, /* VSUBHN: handled by decodetree */
5248 {0, 0, 0, 7}, /* VABDL */
5249 {0, 0, 0, 7}, /* VMLAL */
5250 {0, 0, 0, 9}, /* VQDMLAL */
5251 {0, 0, 0, 7}, /* VMLSL */
5252 {0, 0, 0, 9}, /* VQDMLSL */
5253 {0, 0, 0, 7}, /* Integer VMULL */
5254 {0, 0, 0, 9}, /* VQDMULL */
5255 {0, 0, 0, 0xa}, /* Polynomial VMULL */
5256 {0, 0, 0, 7}, /* Reserved: always UNDEF */
5259 undefreq = neon_3reg_wide[op][3];
5261 if ((undefreq & (1 << size)) ||
5262 ((undefreq & 8) && u)) {
5263 return 1;
5265 if (rd & 1) {
5266 return 1;
5269 /* Handle polynomial VMULL in a single pass. */
5270 if (op == 14) {
5271 if (size == 0) {
5272 /* VMULL.P8 */
5273 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
5274 0, gen_helper_neon_pmull_h);
5275 } else {
5276 /* VMULL.P64 */
5277 if (!dc_isar_feature(aa32_pmull, s)) {
5278 return 1;
5280 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
5281 0, gen_helper_gvec_pmull_q);
5283 return 0;
5286 /* Avoid overlapping operands. Wide source operands are
5287 always aligned so will never overlap with wide
5288 destinations in problematic ways. */
5289 if (rd == rm) {
5290 tmp = neon_load_reg(rm, 1);
5291 neon_store_scratch(2, tmp);
5292 } else if (rd == rn) {
5293 tmp = neon_load_reg(rn, 1);
5294 neon_store_scratch(2, tmp);
5296 tmp3 = NULL;
5297 for (pass = 0; pass < 2; pass++) {
5298 if (pass == 1 && rd == rn) {
5299 tmp = neon_load_scratch(2);
5300 } else {
5301 tmp = neon_load_reg(rn, pass);
5303 if (pass == 1 && rd == rm) {
5304 tmp2 = neon_load_scratch(2);
5305 } else {
5306 tmp2 = neon_load_reg(rm, pass);
5308 switch (op) {
5309 case 9: case 11: case 13:
5310 /* VQDMLAL, VQDMLSL, VQDMULL */
5311 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
5312 break;
5313 default: /* 15 is RESERVED: caught earlier */
5314 abort();
5316 if (op == 13) {
5317 /* VQDMULL */
5318 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5319 neon_store_reg64(cpu_V0, rd + pass);
5320 } else {
5321 /* Accumulate. */
5322 neon_load_reg64(cpu_V1, rd + pass);
5323 switch (op) {
5324 case 9: case 11: /* VQDMLAL, VQDMLSL */
5325 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5326 if (op == 11) {
5327 gen_neon_negl(cpu_V0, size);
5329 gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
5330 break;
5331 default:
5332 abort();
5334 neon_store_reg64(cpu_V0, rd + pass);
5337 } else {
5338 /* Two registers and a scalar. NB that for ops of this form
5339 * the ARM ARM labels bit 24 as Q, but it is in our variable
5340 * 'u', not 'q'.
5342 if (size == 0) {
5343 return 1;
5345 switch (op) {
5346 case 1: /* Float VMLA scalar */
5347 case 5: /* Floating point VMLS scalar */
5348 case 9: /* Floating point VMUL scalar */
5349 if (size == 1) {
5350 return 1;
5352 /* fall through */
5353 case 0: /* Integer VMLA scalar */
5354 case 4: /* Integer VMLS scalar */
5355 case 8: /* Integer VMUL scalar */
5356 case 12: /* VQDMULH scalar */
5357 case 13: /* VQRDMULH scalar */
5358 if (u && ((rd | rn) & 1)) {
5359 return 1;
5361 tmp = neon_get_scalar(size, rm);
5362 neon_store_scratch(0, tmp);
5363 for (pass = 0; pass < (u ? 4 : 2); pass++) {
5364 tmp = neon_load_scratch(0);
5365 tmp2 = neon_load_reg(rn, pass);
5366 if (op == 12) {
5367 if (size == 1) {
5368 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
5369 } else {
5370 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
5372 } else if (op == 13) {
5373 if (size == 1) {
5374 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
5375 } else {
5376 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
5378 } else if (op & 1) {
5379 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5380 gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
5381 tcg_temp_free_ptr(fpstatus);
5382 } else {
5383 switch (size) {
5384 case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
5385 case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
5386 case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
5387 default: abort();
5390 tcg_temp_free_i32(tmp2);
5391 if (op < 8) {
5392 /* Accumulate. */
5393 tmp2 = neon_load_reg(rd, pass);
5394 switch (op) {
5395 case 0:
5396 gen_neon_add(size, tmp, tmp2);
5397 break;
5398 case 1:
5400 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5401 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5402 tcg_temp_free_ptr(fpstatus);
5403 break;
5405 case 4:
5406 gen_neon_rsb(size, tmp, tmp2);
5407 break;
5408 case 5:
5410 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5411 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
5412 tcg_temp_free_ptr(fpstatus);
5413 break;
5415 default:
5416 abort();
5418 tcg_temp_free_i32(tmp2);
5420 neon_store_reg(rd, pass, tmp);
5422 break;
5423 case 3: /* VQDMLAL scalar */
5424 case 7: /* VQDMLSL scalar */
5425 case 11: /* VQDMULL scalar */
5426 if (u == 1) {
5427 return 1;
5429 /* fall through */
5430 case 2: /* VMLAL sclar */
5431 case 6: /* VMLSL scalar */
5432 case 10: /* VMULL scalar */
5433 if (rd & 1) {
5434 return 1;
5436 tmp2 = neon_get_scalar(size, rm);
5437 /* We need a copy of tmp2 because gen_neon_mull
5438 * deletes it during pass 0. */
5439 tmp4 = tcg_temp_new_i32();
5440 tcg_gen_mov_i32(tmp4, tmp2);
5441 tmp3 = neon_load_reg(rn, 1);
5443 for (pass = 0; pass < 2; pass++) {
5444 if (pass == 0) {
5445 tmp = neon_load_reg(rn, 0);
5446 } else {
5447 tmp = tmp3;
5448 tmp2 = tmp4;
5450 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
5451 if (op != 11) {
5452 neon_load_reg64(cpu_V1, rd + pass);
5454 switch (op) {
5455 case 6:
5456 gen_neon_negl(cpu_V0, size);
5457 /* Fall through */
5458 case 2:
5459 gen_neon_addl(size);
5460 break;
5461 case 3: case 7:
5462 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5463 if (op == 7) {
5464 gen_neon_negl(cpu_V0, size);
5466 gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
5467 break;
5468 case 10:
5469 /* no-op */
5470 break;
5471 case 11:
5472 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5473 break;
5474 default:
5475 abort();
5477 neon_store_reg64(cpu_V0, rd + pass);
5479 break;
5480 case 14: /* VQRDMLAH scalar */
5481 case 15: /* VQRDMLSH scalar */
5483 NeonGenThreeOpEnvFn *fn;
5485 if (!dc_isar_feature(aa32_rdm, s)) {
5486 return 1;
5488 if (u && ((rd | rn) & 1)) {
5489 return 1;
5491 if (op == 14) {
5492 if (size == 1) {
5493 fn = gen_helper_neon_qrdmlah_s16;
5494 } else {
5495 fn = gen_helper_neon_qrdmlah_s32;
5497 } else {
5498 if (size == 1) {
5499 fn = gen_helper_neon_qrdmlsh_s16;
5500 } else {
5501 fn = gen_helper_neon_qrdmlsh_s32;
5505 tmp2 = neon_get_scalar(size, rm);
5506 for (pass = 0; pass < (u ? 4 : 2); pass++) {
5507 tmp = neon_load_reg(rn, pass);
5508 tmp3 = neon_load_reg(rd, pass);
5509 fn(tmp, cpu_env, tmp, tmp2, tmp3);
5510 tcg_temp_free_i32(tmp3);
5511 neon_store_reg(rd, pass, tmp);
5513 tcg_temp_free_i32(tmp2);
5515 break;
5516 default:
5517 g_assert_not_reached();
5520 } else { /* size == 3 */
5521 if (!u) {
5522 /* Extract. */
5523 imm = (insn >> 8) & 0xf;
5525 if (imm > 7 && !q)
5526 return 1;
5528 if (q && ((rd | rn | rm) & 1)) {
5529 return 1;
5532 if (imm == 0) {
5533 neon_load_reg64(cpu_V0, rn);
5534 if (q) {
5535 neon_load_reg64(cpu_V1, rn + 1);
5537 } else if (imm == 8) {
5538 neon_load_reg64(cpu_V0, rn + 1);
5539 if (q) {
5540 neon_load_reg64(cpu_V1, rm);
5542 } else if (q) {
5543 tmp64 = tcg_temp_new_i64();
5544 if (imm < 8) {
5545 neon_load_reg64(cpu_V0, rn);
5546 neon_load_reg64(tmp64, rn + 1);
5547 } else {
5548 neon_load_reg64(cpu_V0, rn + 1);
5549 neon_load_reg64(tmp64, rm);
5551 tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
5552 tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
5553 tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
5554 if (imm < 8) {
5555 neon_load_reg64(cpu_V1, rm);
5556 } else {
5557 neon_load_reg64(cpu_V1, rm + 1);
5558 imm -= 8;
5560 tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
5561 tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
5562 tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
5563 tcg_temp_free_i64(tmp64);
5564 } else {
5565 /* BUGFIX */
5566 neon_load_reg64(cpu_V0, rn);
5567 tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
5568 neon_load_reg64(cpu_V1, rm);
5569 tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
5570 tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
5572 neon_store_reg64(cpu_V0, rd);
5573 if (q) {
5574 neon_store_reg64(cpu_V1, rd + 1);
5576 } else if ((insn & (1 << 11)) == 0) {
5577 /* Two register misc. */
5578 op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
5579 size = (insn >> 18) & 3;
5580 /* UNDEF for unknown op values and bad op-size combinations */
5581 if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
5582 return 1;
5584 if (neon_2rm_is_v8_op(op) &&
5585 !arm_dc_feature(s, ARM_FEATURE_V8)) {
5586 return 1;
5588 if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
5589 q && ((rm | rd) & 1)) {
5590 return 1;
5592 switch (op) {
5593 case NEON_2RM_VREV64:
5594 for (pass = 0; pass < (q ? 2 : 1); pass++) {
5595 tmp = neon_load_reg(rm, pass * 2);
5596 tmp2 = neon_load_reg(rm, pass * 2 + 1);
5597 switch (size) {
5598 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
5599 case 1: gen_swap_half(tmp); break;
5600 case 2: /* no-op */ break;
5601 default: abort();
5603 neon_store_reg(rd, pass * 2 + 1, tmp);
5604 if (size == 2) {
5605 neon_store_reg(rd, pass * 2, tmp2);
5606 } else {
5607 switch (size) {
5608 case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
5609 case 1: gen_swap_half(tmp2); break;
5610 default: abort();
5612 neon_store_reg(rd, pass * 2, tmp2);
5615 break;
5616 case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
5617 case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
5618 for (pass = 0; pass < q + 1; pass++) {
5619 tmp = neon_load_reg(rm, pass * 2);
5620 gen_neon_widen(cpu_V0, tmp, size, op & 1);
5621 tmp = neon_load_reg(rm, pass * 2 + 1);
5622 gen_neon_widen(cpu_V1, tmp, size, op & 1);
5623 switch (size) {
5624 case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
5625 case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
5626 case 2: tcg_gen_add_i64(CPU_V001); break;
5627 default: abort();
5629 if (op >= NEON_2RM_VPADAL) {
5630 /* Accumulate. */
5631 neon_load_reg64(cpu_V1, rd + pass);
5632 gen_neon_addl(size);
5634 neon_store_reg64(cpu_V0, rd + pass);
5636 break;
5637 case NEON_2RM_VTRN:
5638 if (size == 2) {
5639 int n;
5640 for (n = 0; n < (q ? 4 : 2); n += 2) {
5641 tmp = neon_load_reg(rm, n);
5642 tmp2 = neon_load_reg(rd, n + 1);
5643 neon_store_reg(rm, n, tmp2);
5644 neon_store_reg(rd, n + 1, tmp);
5646 } else {
5647 goto elementwise;
5649 break;
5650 case NEON_2RM_VUZP:
5651 if (gen_neon_unzip(rd, rm, size, q)) {
5652 return 1;
5654 break;
5655 case NEON_2RM_VZIP:
5656 if (gen_neon_zip(rd, rm, size, q)) {
5657 return 1;
5659 break;
5660 case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
5661 /* also VQMOVUN; op field and mnemonics don't line up */
5662 if (rm & 1) {
5663 return 1;
5665 tmp2 = NULL;
5666 for (pass = 0; pass < 2; pass++) {
5667 neon_load_reg64(cpu_V0, rm + pass);
5668 tmp = tcg_temp_new_i32();
5669 gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
5670 tmp, cpu_V0);
5671 if (pass == 0) {
5672 tmp2 = tmp;
5673 } else {
5674 neon_store_reg(rd, 0, tmp2);
5675 neon_store_reg(rd, 1, tmp);
5678 break;
5679 case NEON_2RM_VSHLL:
5680 if (q || (rd & 1)) {
5681 return 1;
5683 tmp = neon_load_reg(rm, 0);
5684 tmp2 = neon_load_reg(rm, 1);
5685 for (pass = 0; pass < 2; pass++) {
5686 if (pass == 1)
5687 tmp = tmp2;
5688 gen_neon_widen(cpu_V0, tmp, size, 1);
5689 tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
5690 neon_store_reg64(cpu_V0, rd + pass);
5692 break;
5693 case NEON_2RM_VCVT_F16_F32:
5695 TCGv_ptr fpst;
5696 TCGv_i32 ahp;
5698 if (!dc_isar_feature(aa32_fp16_spconv, s) ||
5699 q || (rm & 1)) {
5700 return 1;
5702 fpst = get_fpstatus_ptr(true);
5703 ahp = get_ahp_flag();
5704 tmp = neon_load_reg(rm, 0);
5705 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
5706 tmp2 = neon_load_reg(rm, 1);
5707 gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
5708 tcg_gen_shli_i32(tmp2, tmp2, 16);
5709 tcg_gen_or_i32(tmp2, tmp2, tmp);
5710 tcg_temp_free_i32(tmp);
5711 tmp = neon_load_reg(rm, 2);
5712 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
5713 tmp3 = neon_load_reg(rm, 3);
5714 neon_store_reg(rd, 0, tmp2);
5715 gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
5716 tcg_gen_shli_i32(tmp3, tmp3, 16);
5717 tcg_gen_or_i32(tmp3, tmp3, tmp);
5718 neon_store_reg(rd, 1, tmp3);
5719 tcg_temp_free_i32(tmp);
5720 tcg_temp_free_i32(ahp);
5721 tcg_temp_free_ptr(fpst);
5722 break;
5724 case NEON_2RM_VCVT_F32_F16:
5726 TCGv_ptr fpst;
5727 TCGv_i32 ahp;
5728 if (!dc_isar_feature(aa32_fp16_spconv, s) ||
5729 q || (rd & 1)) {
5730 return 1;
5732 fpst = get_fpstatus_ptr(true);
5733 ahp = get_ahp_flag();
5734 tmp3 = tcg_temp_new_i32();
5735 tmp = neon_load_reg(rm, 0);
5736 tmp2 = neon_load_reg(rm, 1);
5737 tcg_gen_ext16u_i32(tmp3, tmp);
5738 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
5739 neon_store_reg(rd, 0, tmp3);
5740 tcg_gen_shri_i32(tmp, tmp, 16);
5741 gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
5742 neon_store_reg(rd, 1, tmp);
5743 tmp3 = tcg_temp_new_i32();
5744 tcg_gen_ext16u_i32(tmp3, tmp2);
5745 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
5746 neon_store_reg(rd, 2, tmp3);
5747 tcg_gen_shri_i32(tmp2, tmp2, 16);
5748 gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
5749 neon_store_reg(rd, 3, tmp2);
5750 tcg_temp_free_i32(ahp);
5751 tcg_temp_free_ptr(fpst);
5752 break;
5754 case NEON_2RM_AESE: case NEON_2RM_AESMC:
5755 if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) {
5756 return 1;
5759 * Bit 6 is the lowest opcode bit; it distinguishes
5760 * between encryption (AESE/AESMC) and decryption
5761 * (AESD/AESIMC).
5763 if (op == NEON_2RM_AESE) {
5764 tcg_gen_gvec_3_ool(vfp_reg_offset(true, rd),
5765 vfp_reg_offset(true, rd),
5766 vfp_reg_offset(true, rm),
5767 16, 16, extract32(insn, 6, 1),
5768 gen_helper_crypto_aese);
5769 } else {
5770 tcg_gen_gvec_2_ool(vfp_reg_offset(true, rd),
5771 vfp_reg_offset(true, rm),
5772 16, 16, extract32(insn, 6, 1),
5773 gen_helper_crypto_aesmc);
5775 break;
5776 case NEON_2RM_SHA1H:
5777 if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) {
5778 return 1;
5780 tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, 16, 16, 0,
5781 gen_helper_crypto_sha1h);
5782 break;
5783 case NEON_2RM_SHA1SU1:
5784 if ((rm | rd) & 1) {
5785 return 1;
5787 /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
5788 if (q) {
5789 if (!dc_isar_feature(aa32_sha2, s)) {
5790 return 1;
5792 } else if (!dc_isar_feature(aa32_sha1, s)) {
5793 return 1;
5795 tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, 16, 16, 0,
5796 q ? gen_helper_crypto_sha256su0
5797 : gen_helper_crypto_sha1su1);
5798 break;
5799 case NEON_2RM_VMVN:
5800 tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size);
5801 break;
5802 case NEON_2RM_VNEG:
5803 tcg_gen_gvec_neg(size, rd_ofs, rm_ofs, vec_size, vec_size);
5804 break;
5805 case NEON_2RM_VABS:
5806 tcg_gen_gvec_abs(size, rd_ofs, rm_ofs, vec_size, vec_size);
5807 break;
5809 case NEON_2RM_VCEQ0:
5810 gen_gvec_ceq0(size, rd_ofs, rm_ofs, vec_size, vec_size);
5811 break;
5812 case NEON_2RM_VCGT0:
5813 gen_gvec_cgt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
5814 break;
5815 case NEON_2RM_VCLE0:
5816 gen_gvec_cle0(size, rd_ofs, rm_ofs, vec_size, vec_size);
5817 break;
5818 case NEON_2RM_VCGE0:
5819 gen_gvec_cge0(size, rd_ofs, rm_ofs, vec_size, vec_size);
5820 break;
5821 case NEON_2RM_VCLT0:
5822 gen_gvec_clt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
5823 break;
5825 default:
5826 elementwise:
5827 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5828 tmp = neon_load_reg(rm, pass);
5829 switch (op) {
5830 case NEON_2RM_VREV32:
5831 switch (size) {
5832 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
5833 case 1: gen_swap_half(tmp); break;
5834 default: abort();
5836 break;
5837 case NEON_2RM_VREV16:
5838 gen_rev16(tmp, tmp);
5839 break;
5840 case NEON_2RM_VCLS:
5841 switch (size) {
5842 case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
5843 case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
5844 case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
5845 default: abort();
5847 break;
5848 case NEON_2RM_VCLZ:
5849 switch (size) {
5850 case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
5851 case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
5852 case 2: tcg_gen_clzi_i32(tmp, tmp, 32); break;
5853 default: abort();
5855 break;
5856 case NEON_2RM_VCNT:
5857 gen_helper_neon_cnt_u8(tmp, tmp);
5858 break;
5859 case NEON_2RM_VQABS:
5860 switch (size) {
5861 case 0:
5862 gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
5863 break;
5864 case 1:
5865 gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
5866 break;
5867 case 2:
5868 gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
5869 break;
5870 default: abort();
5872 break;
5873 case NEON_2RM_VQNEG:
5874 switch (size) {
5875 case 0:
5876 gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
5877 break;
5878 case 1:
5879 gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
5880 break;
5881 case 2:
5882 gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
5883 break;
5884 default: abort();
5886 break;
5887 case NEON_2RM_VCGT0_F:
5889 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5890 tmp2 = tcg_const_i32(0);
5891 gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
5892 tcg_temp_free_i32(tmp2);
5893 tcg_temp_free_ptr(fpstatus);
5894 break;
5896 case NEON_2RM_VCGE0_F:
5898 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5899 tmp2 = tcg_const_i32(0);
5900 gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
5901 tcg_temp_free_i32(tmp2);
5902 tcg_temp_free_ptr(fpstatus);
5903 break;
5905 case NEON_2RM_VCEQ0_F:
5907 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5908 tmp2 = tcg_const_i32(0);
5909 gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
5910 tcg_temp_free_i32(tmp2);
5911 tcg_temp_free_ptr(fpstatus);
5912 break;
5914 case NEON_2RM_VCLE0_F:
5916 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5917 tmp2 = tcg_const_i32(0);
5918 gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
5919 tcg_temp_free_i32(tmp2);
5920 tcg_temp_free_ptr(fpstatus);
5921 break;
5923 case NEON_2RM_VCLT0_F:
5925 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5926 tmp2 = tcg_const_i32(0);
5927 gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
5928 tcg_temp_free_i32(tmp2);
5929 tcg_temp_free_ptr(fpstatus);
5930 break;
5932 case NEON_2RM_VABS_F:
5933 gen_helper_vfp_abss(tmp, tmp);
5934 break;
5935 case NEON_2RM_VNEG_F:
5936 gen_helper_vfp_negs(tmp, tmp);
5937 break;
5938 case NEON_2RM_VSWP:
5939 tmp2 = neon_load_reg(rd, pass);
5940 neon_store_reg(rm, pass, tmp2);
5941 break;
5942 case NEON_2RM_VTRN:
5943 tmp2 = neon_load_reg(rd, pass);
5944 switch (size) {
5945 case 0: gen_neon_trn_u8(tmp, tmp2); break;
5946 case 1: gen_neon_trn_u16(tmp, tmp2); break;
5947 default: abort();
5949 neon_store_reg(rm, pass, tmp2);
5950 break;
5951 case NEON_2RM_VRINTN:
5952 case NEON_2RM_VRINTA:
5953 case NEON_2RM_VRINTM:
5954 case NEON_2RM_VRINTP:
5955 case NEON_2RM_VRINTZ:
5957 TCGv_i32 tcg_rmode;
5958 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5959 int rmode;
5961 if (op == NEON_2RM_VRINTZ) {
5962 rmode = FPROUNDING_ZERO;
5963 } else {
5964 rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
5967 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
5968 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
5969 cpu_env);
5970 gen_helper_rints(tmp, tmp, fpstatus);
5971 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
5972 cpu_env);
5973 tcg_temp_free_ptr(fpstatus);
5974 tcg_temp_free_i32(tcg_rmode);
5975 break;
5977 case NEON_2RM_VRINTX:
5979 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5980 gen_helper_rints_exact(tmp, tmp, fpstatus);
5981 tcg_temp_free_ptr(fpstatus);
5982 break;
5984 case NEON_2RM_VCVTAU:
5985 case NEON_2RM_VCVTAS:
5986 case NEON_2RM_VCVTNU:
5987 case NEON_2RM_VCVTNS:
5988 case NEON_2RM_VCVTPU:
5989 case NEON_2RM_VCVTPS:
5990 case NEON_2RM_VCVTMU:
5991 case NEON_2RM_VCVTMS:
5993 bool is_signed = !extract32(insn, 7, 1);
5994 TCGv_ptr fpst = get_fpstatus_ptr(1);
5995 TCGv_i32 tcg_rmode, tcg_shift;
5996 int rmode = fp_decode_rm[extract32(insn, 8, 2)];
5998 tcg_shift = tcg_const_i32(0);
5999 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6000 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6001 cpu_env);
6003 if (is_signed) {
6004 gen_helper_vfp_tosls(tmp, tmp,
6005 tcg_shift, fpst);
6006 } else {
6007 gen_helper_vfp_touls(tmp, tmp,
6008 tcg_shift, fpst);
6011 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6012 cpu_env);
6013 tcg_temp_free_i32(tcg_rmode);
6014 tcg_temp_free_i32(tcg_shift);
6015 tcg_temp_free_ptr(fpst);
6016 break;
6018 case NEON_2RM_VRECPE:
6019 gen_helper_recpe_u32(tmp, tmp);
6020 break;
6021 case NEON_2RM_VRSQRTE:
6022 gen_helper_rsqrte_u32(tmp, tmp);
6023 break;
6024 case NEON_2RM_VRECPE_F:
6026 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6027 gen_helper_recpe_f32(tmp, tmp, fpstatus);
6028 tcg_temp_free_ptr(fpstatus);
6029 break;
6031 case NEON_2RM_VRSQRTE_F:
6033 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6034 gen_helper_rsqrte_f32(tmp, tmp, fpstatus);
6035 tcg_temp_free_ptr(fpstatus);
6036 break;
6038 case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
6040 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6041 gen_helper_vfp_sitos(tmp, tmp, fpstatus);
6042 tcg_temp_free_ptr(fpstatus);
6043 break;
6045 case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
6047 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6048 gen_helper_vfp_uitos(tmp, tmp, fpstatus);
6049 tcg_temp_free_ptr(fpstatus);
6050 break;
6052 case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
6054 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6055 gen_helper_vfp_tosizs(tmp, tmp, fpstatus);
6056 tcg_temp_free_ptr(fpstatus);
6057 break;
6059 case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
6061 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6062 gen_helper_vfp_touizs(tmp, tmp, fpstatus);
6063 tcg_temp_free_ptr(fpstatus);
6064 break;
6066 default:
6067 /* Reserved op values were caught by the
6068 * neon_2rm_sizes[] check earlier.
6070 abort();
6072 neon_store_reg(rd, pass, tmp);
6074 break;
6076 } else if ((insn & (1 << 10)) == 0) {
6077 /* VTBL, VTBX. */
6078 int n = ((insn >> 8) & 3) + 1;
6079 if ((rn + n) > 32) {
6080 /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
6081 * helper function running off the end of the register file.
6083 return 1;
6085 n <<= 3;
6086 if (insn & (1 << 6)) {
6087 tmp = neon_load_reg(rd, 0);
6088 } else {
6089 tmp = tcg_temp_new_i32();
6090 tcg_gen_movi_i32(tmp, 0);
6092 tmp2 = neon_load_reg(rm, 0);
6093 ptr1 = vfp_reg_ptr(true, rn);
6094 tmp5 = tcg_const_i32(n);
6095 gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5);
6096 tcg_temp_free_i32(tmp);
6097 if (insn & (1 << 6)) {
6098 tmp = neon_load_reg(rd, 1);
6099 } else {
6100 tmp = tcg_temp_new_i32();
6101 tcg_gen_movi_i32(tmp, 0);
6103 tmp3 = neon_load_reg(rm, 1);
6104 gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5);
6105 tcg_temp_free_i32(tmp5);
6106 tcg_temp_free_ptr(ptr1);
6107 neon_store_reg(rd, 0, tmp2);
6108 neon_store_reg(rd, 1, tmp3);
6109 tcg_temp_free_i32(tmp);
6110 } else if ((insn & 0x380) == 0) {
6111 /* VDUP */
6112 int element;
6113 MemOp size;
6115 if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
6116 return 1;
6118 if (insn & (1 << 16)) {
6119 size = MO_8;
6120 element = (insn >> 17) & 7;
6121 } else if (insn & (1 << 17)) {
6122 size = MO_16;
6123 element = (insn >> 18) & 3;
6124 } else {
6125 size = MO_32;
6126 element = (insn >> 19) & 1;
6128 tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0),
6129 neon_element_offset(rm, element, size),
6130 q ? 16 : 8, q ? 16 : 8);
6131 } else {
6132 return 1;
6136 return 0;
6139 static int disas_coproc_insn(DisasContext *s, uint32_t insn)
6141 int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
6142 const ARMCPRegInfo *ri;
6144 cpnum = (insn >> 8) & 0xf;
6146 /* First check for coprocessor space used for XScale/iwMMXt insns */
6147 if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cpnum < 2)) {
6148 if (extract32(s->c15_cpar, cpnum, 1) == 0) {
6149 return 1;
6151 if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
6152 return disas_iwmmxt_insn(s, insn);
6153 } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
6154 return disas_dsp_insn(s, insn);
6156 return 1;
6159 /* Otherwise treat as a generic register access */
6160 is64 = (insn & (1 << 25)) == 0;
6161 if (!is64 && ((insn & (1 << 4)) == 0)) {
6162 /* cdp */
6163 return 1;
6166 crm = insn & 0xf;
6167 if (is64) {
6168 crn = 0;
6169 opc1 = (insn >> 4) & 0xf;
6170 opc2 = 0;
6171 rt2 = (insn >> 16) & 0xf;
6172 } else {
6173 crn = (insn >> 16) & 0xf;
6174 opc1 = (insn >> 21) & 7;
6175 opc2 = (insn >> 5) & 7;
6176 rt2 = 0;
6178 isread = (insn >> 20) & 1;
6179 rt = (insn >> 12) & 0xf;
6181 ri = get_arm_cp_reginfo(s->cp_regs,
6182 ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
6183 if (ri) {
6184 bool need_exit_tb;
6186 /* Check access permissions */
6187 if (!cp_access_ok(s->current_el, ri, isread)) {
6188 return 1;
6191 if (s->hstr_active || ri->accessfn ||
6192 (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
6193 /* Emit code to perform further access permissions checks at
6194 * runtime; this may result in an exception.
6195 * Note that on XScale all cp0..c13 registers do an access check
6196 * call in order to handle c15_cpar.
6198 TCGv_ptr tmpptr;
6199 TCGv_i32 tcg_syn, tcg_isread;
6200 uint32_t syndrome;
6202 /* Note that since we are an implementation which takes an
6203 * exception on a trapped conditional instruction only if the
6204 * instruction passes its condition code check, we can take
6205 * advantage of the clause in the ARM ARM that allows us to set
6206 * the COND field in the instruction to 0xE in all cases.
6207 * We could fish the actual condition out of the insn (ARM)
6208 * or the condexec bits (Thumb) but it isn't necessary.
6210 switch (cpnum) {
6211 case 14:
6212 if (is64) {
6213 syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
6214 isread, false);
6215 } else {
6216 syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
6217 rt, isread, false);
6219 break;
6220 case 15:
6221 if (is64) {
6222 syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
6223 isread, false);
6224 } else {
6225 syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
6226 rt, isread, false);
6228 break;
6229 default:
6230 /* ARMv8 defines that only coprocessors 14 and 15 exist,
6231 * so this can only happen if this is an ARMv7 or earlier CPU,
6232 * in which case the syndrome information won't actually be
6233 * guest visible.
6235 assert(!arm_dc_feature(s, ARM_FEATURE_V8));
6236 syndrome = syn_uncategorized();
6237 break;
6240 gen_set_condexec(s);
6241 gen_set_pc_im(s, s->pc_curr);
6242 tmpptr = tcg_const_ptr(ri);
6243 tcg_syn = tcg_const_i32(syndrome);
6244 tcg_isread = tcg_const_i32(isread);
6245 gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn,
6246 tcg_isread);
6247 tcg_temp_free_ptr(tmpptr);
6248 tcg_temp_free_i32(tcg_syn);
6249 tcg_temp_free_i32(tcg_isread);
6250 } else if (ri->type & ARM_CP_RAISES_EXC) {
6252 * The readfn or writefn might raise an exception;
6253 * synchronize the CPU state in case it does.
6255 gen_set_condexec(s);
6256 gen_set_pc_im(s, s->pc_curr);
6259 /* Handle special cases first */
6260 switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
6261 case ARM_CP_NOP:
6262 return 0;
6263 case ARM_CP_WFI:
6264 if (isread) {
6265 return 1;
6267 gen_set_pc_im(s, s->base.pc_next);
6268 s->base.is_jmp = DISAS_WFI;
6269 return 0;
6270 default:
6271 break;
6274 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
6275 gen_io_start();
6278 if (isread) {
6279 /* Read */
6280 if (is64) {
6281 TCGv_i64 tmp64;
6282 TCGv_i32 tmp;
6283 if (ri->type & ARM_CP_CONST) {
6284 tmp64 = tcg_const_i64(ri->resetvalue);
6285 } else if (ri->readfn) {
6286 TCGv_ptr tmpptr;
6287 tmp64 = tcg_temp_new_i64();
6288 tmpptr = tcg_const_ptr(ri);
6289 gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
6290 tcg_temp_free_ptr(tmpptr);
6291 } else {
6292 tmp64 = tcg_temp_new_i64();
6293 tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
6295 tmp = tcg_temp_new_i32();
6296 tcg_gen_extrl_i64_i32(tmp, tmp64);
6297 store_reg(s, rt, tmp);
6298 tmp = tcg_temp_new_i32();
6299 tcg_gen_extrh_i64_i32(tmp, tmp64);
6300 tcg_temp_free_i64(tmp64);
6301 store_reg(s, rt2, tmp);
6302 } else {
6303 TCGv_i32 tmp;
6304 if (ri->type & ARM_CP_CONST) {
6305 tmp = tcg_const_i32(ri->resetvalue);
6306 } else if (ri->readfn) {
6307 TCGv_ptr tmpptr;
6308 tmp = tcg_temp_new_i32();
6309 tmpptr = tcg_const_ptr(ri);
6310 gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
6311 tcg_temp_free_ptr(tmpptr);
6312 } else {
6313 tmp = load_cpu_offset(ri->fieldoffset);
6315 if (rt == 15) {
6316 /* Destination register of r15 for 32 bit loads sets
6317 * the condition codes from the high 4 bits of the value
6319 gen_set_nzcv(tmp);
6320 tcg_temp_free_i32(tmp);
6321 } else {
6322 store_reg(s, rt, tmp);
6325 } else {
6326 /* Write */
6327 if (ri->type & ARM_CP_CONST) {
6328 /* If not forbidden by access permissions, treat as WI */
6329 return 0;
6332 if (is64) {
6333 TCGv_i32 tmplo, tmphi;
6334 TCGv_i64 tmp64 = tcg_temp_new_i64();
6335 tmplo = load_reg(s, rt);
6336 tmphi = load_reg(s, rt2);
6337 tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
6338 tcg_temp_free_i32(tmplo);
6339 tcg_temp_free_i32(tmphi);
6340 if (ri->writefn) {
6341 TCGv_ptr tmpptr = tcg_const_ptr(ri);
6342 gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
6343 tcg_temp_free_ptr(tmpptr);
6344 } else {
6345 tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
6347 tcg_temp_free_i64(tmp64);
6348 } else {
6349 if (ri->writefn) {
6350 TCGv_i32 tmp;
6351 TCGv_ptr tmpptr;
6352 tmp = load_reg(s, rt);
6353 tmpptr = tcg_const_ptr(ri);
6354 gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
6355 tcg_temp_free_ptr(tmpptr);
6356 tcg_temp_free_i32(tmp);
6357 } else {
6358 TCGv_i32 tmp = load_reg(s, rt);
6359 store_cpu_offset(tmp, ri->fieldoffset);
6364 /* I/O operations must end the TB here (whether read or write) */
6365 need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
6366 (ri->type & ARM_CP_IO));
6368 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
6370 * A write to any coprocessor register that ends a TB
6371 * must rebuild the hflags for the next TB.
6373 TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
6374 if (arm_dc_feature(s, ARM_FEATURE_M)) {
6375 gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
6376 } else {
6377 if (ri->type & ARM_CP_NEWEL) {
6378 gen_helper_rebuild_hflags_a32_newel(cpu_env);
6379 } else {
6380 gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
6383 tcg_temp_free_i32(tcg_el);
6385 * We default to ending the TB on a coprocessor register write,
6386 * but allow this to be suppressed by the register definition
6387 * (usually only necessary to work around guest bugs).
6389 need_exit_tb = true;
6391 if (need_exit_tb) {
6392 gen_lookup_tb(s);
6395 return 0;
6398 /* Unknown register; this might be a guest error or a QEMU
6399 * unimplemented feature.
6401 if (is64) {
6402 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
6403 "64 bit system register cp:%d opc1: %d crm:%d "
6404 "(%s)\n",
6405 isread ? "read" : "write", cpnum, opc1, crm,
6406 s->ns ? "non-secure" : "secure");
6407 } else {
6408 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
6409 "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
6410 "(%s)\n",
6411 isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
6412 s->ns ? "non-secure" : "secure");
6415 return 1;
6419 /* Store a 64-bit value to a register pair. Clobbers val. */
6420 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
6422 TCGv_i32 tmp;
6423 tmp = tcg_temp_new_i32();
6424 tcg_gen_extrl_i64_i32(tmp, val);
6425 store_reg(s, rlow, tmp);
6426 tmp = tcg_temp_new_i32();
6427 tcg_gen_extrh_i64_i32(tmp, val);
6428 store_reg(s, rhigh, tmp);
6431 /* load and add a 64-bit value from a register pair. */
6432 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
6434 TCGv_i64 tmp;
6435 TCGv_i32 tmpl;
6436 TCGv_i32 tmph;
6438 /* Load 64-bit value rd:rn. */
6439 tmpl = load_reg(s, rlow);
6440 tmph = load_reg(s, rhigh);
6441 tmp = tcg_temp_new_i64();
6442 tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
6443 tcg_temp_free_i32(tmpl);
6444 tcg_temp_free_i32(tmph);
6445 tcg_gen_add_i64(val, val, tmp);
6446 tcg_temp_free_i64(tmp);
6449 /* Set N and Z flags from hi|lo. */
6450 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
6452 tcg_gen_mov_i32(cpu_NF, hi);
6453 tcg_gen_or_i32(cpu_ZF, lo, hi);
6456 /* Load/Store exclusive instructions are implemented by remembering
6457 the value/address loaded, and seeing if these are the same
6458 when the store is performed. This should be sufficient to implement
6459 the architecturally mandated semantics, and avoids having to monitor
6460 regular stores. The compare vs the remembered value is done during
6461 the cmpxchg operation, but we must compare the addresses manually. */
6462 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
6463 TCGv_i32 addr, int size)
6465 TCGv_i32 tmp = tcg_temp_new_i32();
6466 MemOp opc = size | MO_ALIGN | s->be_data;
6468 s->is_ldex = true;
6470 if (size == 3) {
6471 TCGv_i32 tmp2 = tcg_temp_new_i32();
6472 TCGv_i64 t64 = tcg_temp_new_i64();
6474 /* For AArch32, architecturally the 32-bit word at the lowest
6475 * address is always Rt and the one at addr+4 is Rt2, even if
6476 * the CPU is big-endian. That means we don't want to do a
6477 * gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if
6478 * for an architecturally 64-bit access, but instead do a
6479 * 64-bit access using MO_BE if appropriate and then split
6480 * the two halves.
6481 * This only makes a difference for BE32 user-mode, where
6482 * frob64() must not flip the two halves of the 64-bit data
6483 * but this code must treat BE32 user-mode like BE32 system.
6485 TCGv taddr = gen_aa32_addr(s, addr, opc);
6487 tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
6488 tcg_temp_free(taddr);
6489 tcg_gen_mov_i64(cpu_exclusive_val, t64);
6490 if (s->be_data == MO_BE) {
6491 tcg_gen_extr_i64_i32(tmp2, tmp, t64);
6492 } else {
6493 tcg_gen_extr_i64_i32(tmp, tmp2, t64);
6495 tcg_temp_free_i64(t64);
6497 store_reg(s, rt2, tmp2);
6498 } else {
6499 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
6500 tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
6503 store_reg(s, rt, tmp);
6504 tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
6507 static void gen_clrex(DisasContext *s)
6509 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
6512 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
6513 TCGv_i32 addr, int size)
6515 TCGv_i32 t0, t1, t2;
6516 TCGv_i64 extaddr;
6517 TCGv taddr;
6518 TCGLabel *done_label;
6519 TCGLabel *fail_label;
6520 MemOp opc = size | MO_ALIGN | s->be_data;
6522 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
6523 [addr] = {Rt};
6524 {Rd} = 0;
6525 } else {
6526 {Rd} = 1;
6527 } */
6528 fail_label = gen_new_label();
6529 done_label = gen_new_label();
6530 extaddr = tcg_temp_new_i64();
6531 tcg_gen_extu_i32_i64(extaddr, addr);
6532 tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
6533 tcg_temp_free_i64(extaddr);
6535 taddr = gen_aa32_addr(s, addr, opc);
6536 t0 = tcg_temp_new_i32();
6537 t1 = load_reg(s, rt);
6538 if (size == 3) {
6539 TCGv_i64 o64 = tcg_temp_new_i64();
6540 TCGv_i64 n64 = tcg_temp_new_i64();
6542 t2 = load_reg(s, rt2);
6543 /* For AArch32, architecturally the 32-bit word at the lowest
6544 * address is always Rt and the one at addr+4 is Rt2, even if
6545 * the CPU is big-endian. Since we're going to treat this as a
6546 * single 64-bit BE store, we need to put the two halves in the
6547 * opposite order for BE to LE, so that they end up in the right
6548 * places.
6549 * We don't want gen_aa32_frob64() because that does the wrong
6550 * thing for BE32 usermode.
6552 if (s->be_data == MO_BE) {
6553 tcg_gen_concat_i32_i64(n64, t2, t1);
6554 } else {
6555 tcg_gen_concat_i32_i64(n64, t1, t2);
6557 tcg_temp_free_i32(t2);
6559 tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
6560 get_mem_index(s), opc);
6561 tcg_temp_free_i64(n64);
6563 tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
6564 tcg_gen_extrl_i64_i32(t0, o64);
6566 tcg_temp_free_i64(o64);
6567 } else {
6568 t2 = tcg_temp_new_i32();
6569 tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
6570 tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
6571 tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
6572 tcg_temp_free_i32(t2);
6574 tcg_temp_free_i32(t1);
6575 tcg_temp_free(taddr);
6576 tcg_gen_mov_i32(cpu_R[rd], t0);
6577 tcg_temp_free_i32(t0);
6578 tcg_gen_br(done_label);
6580 gen_set_label(fail_label);
6581 tcg_gen_movi_i32(cpu_R[rd], 1);
6582 gen_set_label(done_label);
6583 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
6586 /* gen_srs:
6587 * @env: CPUARMState
6588 * @s: DisasContext
6589 * @mode: mode field from insn (which stack to store to)
6590 * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
6591 * @writeback: true if writeback bit set
6593 * Generate code for the SRS (Store Return State) insn.
6595 static void gen_srs(DisasContext *s,
6596 uint32_t mode, uint32_t amode, bool writeback)
6598 int32_t offset;
6599 TCGv_i32 addr, tmp;
6600 bool undef = false;
6602 /* SRS is:
6603 * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
6604 * and specified mode is monitor mode
6605 * - UNDEFINED in Hyp mode
6606 * - UNPREDICTABLE in User or System mode
6607 * - UNPREDICTABLE if the specified mode is:
6608 * -- not implemented
6609 * -- not a valid mode number
6610 * -- a mode that's at a higher exception level
6611 * -- Monitor, if we are Non-secure
6612 * For the UNPREDICTABLE cases we choose to UNDEF.
6614 if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
6615 gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(), 3);
6616 return;
6619 if (s->current_el == 0 || s->current_el == 2) {
6620 undef = true;
6623 switch (mode) {
6624 case ARM_CPU_MODE_USR:
6625 case ARM_CPU_MODE_FIQ:
6626 case ARM_CPU_MODE_IRQ:
6627 case ARM_CPU_MODE_SVC:
6628 case ARM_CPU_MODE_ABT:
6629 case ARM_CPU_MODE_UND:
6630 case ARM_CPU_MODE_SYS:
6631 break;
6632 case ARM_CPU_MODE_HYP:
6633 if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
6634 undef = true;
6636 break;
6637 case ARM_CPU_MODE_MON:
6638 /* No need to check specifically for "are we non-secure" because
6639 * we've already made EL0 UNDEF and handled the trap for S-EL1;
6640 * so if this isn't EL3 then we must be non-secure.
6642 if (s->current_el != 3) {
6643 undef = true;
6645 break;
6646 default:
6647 undef = true;
6650 if (undef) {
6651 unallocated_encoding(s);
6652 return;
6655 addr = tcg_temp_new_i32();
6656 tmp = tcg_const_i32(mode);
6657 /* get_r13_banked() will raise an exception if called from System mode */
6658 gen_set_condexec(s);
6659 gen_set_pc_im(s, s->pc_curr);
6660 gen_helper_get_r13_banked(addr, cpu_env, tmp);
6661 tcg_temp_free_i32(tmp);
6662 switch (amode) {
6663 case 0: /* DA */
6664 offset = -4;
6665 break;
6666 case 1: /* IA */
6667 offset = 0;
6668 break;
6669 case 2: /* DB */
6670 offset = -8;
6671 break;
6672 case 3: /* IB */
6673 offset = 4;
6674 break;
6675 default:
6676 abort();
6678 tcg_gen_addi_i32(addr, addr, offset);
6679 tmp = load_reg(s, 14);
6680 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
6681 tcg_temp_free_i32(tmp);
6682 tmp = load_cpu_field(spsr);
6683 tcg_gen_addi_i32(addr, addr, 4);
6684 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
6685 tcg_temp_free_i32(tmp);
6686 if (writeback) {
6687 switch (amode) {
6688 case 0:
6689 offset = -8;
6690 break;
6691 case 1:
6692 offset = 4;
6693 break;
6694 case 2:
6695 offset = -4;
6696 break;
6697 case 3:
6698 offset = 0;
6699 break;
6700 default:
6701 abort();
6703 tcg_gen_addi_i32(addr, addr, offset);
6704 tmp = tcg_const_i32(mode);
6705 gen_helper_set_r13_banked(cpu_env, tmp, addr);
6706 tcg_temp_free_i32(tmp);
6708 tcg_temp_free_i32(addr);
6709 s->base.is_jmp = DISAS_UPDATE;
6712 /* Generate a label used for skipping this instruction */
6713 static void arm_gen_condlabel(DisasContext *s)
6715 if (!s->condjmp) {
6716 s->condlabel = gen_new_label();
6717 s->condjmp = 1;
6721 /* Skip this instruction if the ARM condition is false */
6722 static void arm_skip_unless(DisasContext *s, uint32_t cond)
6724 arm_gen_condlabel(s);
6725 arm_gen_test_cc(cond ^ 1, s->condlabel);
6730 * Constant expanders for the decoders.
6733 static int negate(DisasContext *s, int x)
6735 return -x;
6738 static int plus_2(DisasContext *s, int x)
6740 return x + 2;
6743 static int times_2(DisasContext *s, int x)
6745 return x * 2;
6748 static int times_4(DisasContext *s, int x)
6750 return x * 4;
6753 /* Return only the rotation part of T32ExpandImm. */
6754 static int t32_expandimm_rot(DisasContext *s, int x)
6756 return x & 0xc00 ? extract32(x, 7, 5) : 0;
6759 /* Return the unrotated immediate from T32ExpandImm. */
6760 static int t32_expandimm_imm(DisasContext *s, int x)
6762 int imm = extract32(x, 0, 8);
6764 switch (extract32(x, 8, 4)) {
6765 case 0: /* XY */
6766 /* Nothing to do. */
6767 break;
6768 case 1: /* 00XY00XY */
6769 imm *= 0x00010001;
6770 break;
6771 case 2: /* XY00XY00 */
6772 imm *= 0x01000100;
6773 break;
6774 case 3: /* XYXYXYXY */
6775 imm *= 0x01010101;
6776 break;
6777 default:
6778 /* Rotated constant. */
6779 imm |= 0x80;
6780 break;
6782 return imm;
6785 static int t32_branch24(DisasContext *s, int x)
6787 /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S. */
6788 x ^= !(x < 0) * (3 << 21);
6789 /* Append the final zero. */
6790 return x << 1;
6793 static int t16_setflags(DisasContext *s)
6795 return s->condexec_mask == 0;
6798 static int t16_push_list(DisasContext *s, int x)
6800 return (x & 0xff) | (x & 0x100) << (14 - 8);
6803 static int t16_pop_list(DisasContext *s, int x)
6805 return (x & 0xff) | (x & 0x100) << (15 - 8);
6809 * Include the generated decoders.
6812 #include "decode-a32.inc.c"
6813 #include "decode-a32-uncond.inc.c"
6814 #include "decode-t32.inc.c"
6815 #include "decode-t16.inc.c"
6817 /* Helpers to swap operands for reverse-subtract. */
6818 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
6820 tcg_gen_sub_i32(dst, b, a);
6823 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
6825 gen_sub_CC(dst, b, a);
6828 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
6830 gen_sub_carry(dest, b, a);
6833 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
6835 gen_sbc_CC(dest, b, a);
6839 * Helpers for the data processing routines.
6841 * After the computation store the results back.
6842 * This may be suppressed altogether (STREG_NONE), require a runtime
6843 * check against the stack limits (STREG_SP_CHECK), or generate an
6844 * exception return. Oh, or store into a register.
6846 * Always return true, indicating success for a trans_* function.
6848 typedef enum {
6849 STREG_NONE,
6850 STREG_NORMAL,
6851 STREG_SP_CHECK,
6852 STREG_EXC_RET,
6853 } StoreRegKind;
6855 static bool store_reg_kind(DisasContext *s, int rd,
6856 TCGv_i32 val, StoreRegKind kind)
6858 switch (kind) {
6859 case STREG_NONE:
6860 tcg_temp_free_i32(val);
6861 return true;
6862 case STREG_NORMAL:
6863 /* See ALUWritePC: Interworking only from a32 mode. */
6864 if (s->thumb) {
6865 store_reg(s, rd, val);
6866 } else {
6867 store_reg_bx(s, rd, val);
6869 return true;
6870 case STREG_SP_CHECK:
6871 store_sp_checked(s, val);
6872 return true;
6873 case STREG_EXC_RET:
6874 gen_exception_return(s, val);
6875 return true;
6877 g_assert_not_reached();
6881 * Data Processing (register)
6883 * Operate, with set flags, one register source,
6884 * one immediate shifted register source, and a destination.
6886 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
6887 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
6888 int logic_cc, StoreRegKind kind)
6890 TCGv_i32 tmp1, tmp2;
6892 tmp2 = load_reg(s, a->rm);
6893 gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
6894 tmp1 = load_reg(s, a->rn);
6896 gen(tmp1, tmp1, tmp2);
6897 tcg_temp_free_i32(tmp2);
6899 if (logic_cc) {
6900 gen_logic_CC(tmp1);
6902 return store_reg_kind(s, a->rd, tmp1, kind);
6905 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
6906 void (*gen)(TCGv_i32, TCGv_i32),
6907 int logic_cc, StoreRegKind kind)
6909 TCGv_i32 tmp;
6911 tmp = load_reg(s, a->rm);
6912 gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
6914 gen(tmp, tmp);
6915 if (logic_cc) {
6916 gen_logic_CC(tmp);
6918 return store_reg_kind(s, a->rd, tmp, kind);
6922 * Data-processing (register-shifted register)
6924 * Operate, with set flags, one register source,
6925 * one register shifted register source, and a destination.
6927 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
6928 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
6929 int logic_cc, StoreRegKind kind)
6931 TCGv_i32 tmp1, tmp2;
6933 tmp1 = load_reg(s, a->rs);
6934 tmp2 = load_reg(s, a->rm);
6935 gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
6936 tmp1 = load_reg(s, a->rn);
6938 gen(tmp1, tmp1, tmp2);
6939 tcg_temp_free_i32(tmp2);
6941 if (logic_cc) {
6942 gen_logic_CC(tmp1);
6944 return store_reg_kind(s, a->rd, tmp1, kind);
6947 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
6948 void (*gen)(TCGv_i32, TCGv_i32),
6949 int logic_cc, StoreRegKind kind)
6951 TCGv_i32 tmp1, tmp2;
6953 tmp1 = load_reg(s, a->rs);
6954 tmp2 = load_reg(s, a->rm);
6955 gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
6957 gen(tmp2, tmp2);
6958 if (logic_cc) {
6959 gen_logic_CC(tmp2);
6961 return store_reg_kind(s, a->rd, tmp2, kind);
6965 * Data-processing (immediate)
6967 * Operate, with set flags, one register source,
6968 * one rotated immediate, and a destination.
6970 * Note that logic_cc && a->rot setting CF based on the msb of the
6971 * immediate is the reason why we must pass in the unrotated form
6972 * of the immediate.
6974 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
6975 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
6976 int logic_cc, StoreRegKind kind)
6978 TCGv_i32 tmp1, tmp2;
6979 uint32_t imm;
6981 imm = ror32(a->imm, a->rot);
6982 if (logic_cc && a->rot) {
6983 tcg_gen_movi_i32(cpu_CF, imm >> 31);
6985 tmp2 = tcg_const_i32(imm);
6986 tmp1 = load_reg(s, a->rn);
6988 gen(tmp1, tmp1, tmp2);
6989 tcg_temp_free_i32(tmp2);
6991 if (logic_cc) {
6992 gen_logic_CC(tmp1);
6994 return store_reg_kind(s, a->rd, tmp1, kind);
6997 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
6998 void (*gen)(TCGv_i32, TCGv_i32),
6999 int logic_cc, StoreRegKind kind)
7001 TCGv_i32 tmp;
7002 uint32_t imm;
7004 imm = ror32(a->imm, a->rot);
7005 if (logic_cc && a->rot) {
7006 tcg_gen_movi_i32(cpu_CF, imm >> 31);
7008 tmp = tcg_const_i32(imm);
7010 gen(tmp, tmp);
7011 if (logic_cc) {
7012 gen_logic_CC(tmp);
7014 return store_reg_kind(s, a->rd, tmp, kind);
7017 #define DO_ANY3(NAME, OP, L, K) \
7018 static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a) \
7019 { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); } \
7020 static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a) \
7021 { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); } \
7022 static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a) \
7023 { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
7025 #define DO_ANY2(NAME, OP, L, K) \
7026 static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a) \
7027 { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); } \
7028 static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a) \
7029 { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); } \
7030 static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a) \
7031 { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
7033 #define DO_CMP2(NAME, OP, L) \
7034 static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a) \
7035 { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); } \
7036 static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a) \
7037 { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); } \
7038 static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a) \
7039 { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
7041 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
7042 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
7043 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
7044 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
7046 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
7047 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
7048 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
7049 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
7051 DO_CMP2(TST, tcg_gen_and_i32, true)
7052 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
7053 DO_CMP2(CMN, gen_add_CC, false)
7054 DO_CMP2(CMP, gen_sub_CC, false)
7056 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
7057 a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
7060 * Note for the computation of StoreRegKind we return out of the
7061 * middle of the functions that are expanded by DO_ANY3, and that
7062 * we modify a->s via that parameter before it is used by OP.
7064 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
7066 StoreRegKind ret = STREG_NORMAL;
7067 if (a->rd == 15 && a->s) {
7069 * See ALUExceptionReturn:
7070 * In User mode, UNPREDICTABLE; we choose UNDEF.
7071 * In Hyp mode, UNDEFINED.
7073 if (IS_USER(s) || s->current_el == 2) {
7074 unallocated_encoding(s);
7075 return true;
7077 /* There is no writeback of nzcv to PSTATE. */
7078 a->s = 0;
7079 ret = STREG_EXC_RET;
7080 } else if (a->rd == 13 && a->rn == 13) {
7081 ret = STREG_SP_CHECK;
7083 ret;
7086 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
7088 StoreRegKind ret = STREG_NORMAL;
7089 if (a->rd == 15 && a->s) {
7091 * See ALUExceptionReturn:
7092 * In User mode, UNPREDICTABLE; we choose UNDEF.
7093 * In Hyp mode, UNDEFINED.
7095 if (IS_USER(s) || s->current_el == 2) {
7096 unallocated_encoding(s);
7097 return true;
7099 /* There is no writeback of nzcv to PSTATE. */
7100 a->s = 0;
7101 ret = STREG_EXC_RET;
7102 } else if (a->rd == 13) {
7103 ret = STREG_SP_CHECK;
7105 ret;
7108 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
7111 * ORN is only available with T32, so there is no register-shifted-register
7112 * form of the insn. Using the DO_ANY3 macro would create an unused function.
7114 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
7116 return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
7119 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
7121 return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
7124 #undef DO_ANY3
7125 #undef DO_ANY2
7126 #undef DO_CMP2
7128 static bool trans_ADR(DisasContext *s, arg_ri *a)
7130 store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
7131 return true;
7134 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
7136 TCGv_i32 tmp;
7138 if (!ENABLE_ARCH_6T2) {
7139 return false;
7142 tmp = tcg_const_i32(a->imm);
7143 store_reg(s, a->rd, tmp);
7144 return true;
7147 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
7149 TCGv_i32 tmp;
7151 if (!ENABLE_ARCH_6T2) {
7152 return false;
7155 tmp = load_reg(s, a->rd);
7156 tcg_gen_ext16u_i32(tmp, tmp);
7157 tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
7158 store_reg(s, a->rd, tmp);
7159 return true;
7163 * Multiply and multiply accumulate
7166 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
7168 TCGv_i32 t1, t2;
7170 t1 = load_reg(s, a->rn);
7171 t2 = load_reg(s, a->rm);
7172 tcg_gen_mul_i32(t1, t1, t2);
7173 tcg_temp_free_i32(t2);
7174 if (add) {
7175 t2 = load_reg(s, a->ra);
7176 tcg_gen_add_i32(t1, t1, t2);
7177 tcg_temp_free_i32(t2);
7179 if (a->s) {
7180 gen_logic_CC(t1);
7182 store_reg(s, a->rd, t1);
7183 return true;
7186 static bool trans_MUL(DisasContext *s, arg_MUL *a)
7188 return op_mla(s, a, false);
7191 static bool trans_MLA(DisasContext *s, arg_MLA *a)
7193 return op_mla(s, a, true);
7196 static bool trans_MLS(DisasContext *s, arg_MLS *a)
7198 TCGv_i32 t1, t2;
7200 if (!ENABLE_ARCH_6T2) {
7201 return false;
7203 t1 = load_reg(s, a->rn);
7204 t2 = load_reg(s, a->rm);
7205 tcg_gen_mul_i32(t1, t1, t2);
7206 tcg_temp_free_i32(t2);
7207 t2 = load_reg(s, a->ra);
7208 tcg_gen_sub_i32(t1, t2, t1);
7209 tcg_temp_free_i32(t2);
7210 store_reg(s, a->rd, t1);
7211 return true;
7214 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
7216 TCGv_i32 t0, t1, t2, t3;
7218 t0 = load_reg(s, a->rm);
7219 t1 = load_reg(s, a->rn);
7220 if (uns) {
7221 tcg_gen_mulu2_i32(t0, t1, t0, t1);
7222 } else {
7223 tcg_gen_muls2_i32(t0, t1, t0, t1);
7225 if (add) {
7226 t2 = load_reg(s, a->ra);
7227 t3 = load_reg(s, a->rd);
7228 tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
7229 tcg_temp_free_i32(t2);
7230 tcg_temp_free_i32(t3);
7232 if (a->s) {
7233 gen_logicq_cc(t0, t1);
7235 store_reg(s, a->ra, t0);
7236 store_reg(s, a->rd, t1);
7237 return true;
7240 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
7242 return op_mlal(s, a, true, false);
7245 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
7247 return op_mlal(s, a, false, false);
7250 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
7252 return op_mlal(s, a, true, true);
7255 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
7257 return op_mlal(s, a, false, true);
7260 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
7262 TCGv_i32 t0, t1, t2, zero;
7264 if (s->thumb
7265 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7266 : !ENABLE_ARCH_6) {
7267 return false;
7270 t0 = load_reg(s, a->rm);
7271 t1 = load_reg(s, a->rn);
7272 tcg_gen_mulu2_i32(t0, t1, t0, t1);
7273 zero = tcg_const_i32(0);
7274 t2 = load_reg(s, a->ra);
7275 tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
7276 tcg_temp_free_i32(t2);
7277 t2 = load_reg(s, a->rd);
7278 tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
7279 tcg_temp_free_i32(t2);
7280 tcg_temp_free_i32(zero);
7281 store_reg(s, a->ra, t0);
7282 store_reg(s, a->rd, t1);
7283 return true;
7287 * Saturating addition and subtraction
7290 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
7292 TCGv_i32 t0, t1;
7294 if (s->thumb
7295 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7296 : !ENABLE_ARCH_5TE) {
7297 return false;
7300 t0 = load_reg(s, a->rm);
7301 t1 = load_reg(s, a->rn);
7302 if (doub) {
7303 gen_helper_add_saturate(t1, cpu_env, t1, t1);
7305 if (add) {
7306 gen_helper_add_saturate(t0, cpu_env, t0, t1);
7307 } else {
7308 gen_helper_sub_saturate(t0, cpu_env, t0, t1);
7310 tcg_temp_free_i32(t1);
7311 store_reg(s, a->rd, t0);
7312 return true;
7315 #define DO_QADDSUB(NAME, ADD, DOUB) \
7316 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
7318 return op_qaddsub(s, a, ADD, DOUB); \
7321 DO_QADDSUB(QADD, true, false)
7322 DO_QADDSUB(QSUB, false, false)
7323 DO_QADDSUB(QDADD, true, true)
7324 DO_QADDSUB(QDSUB, false, true)
7326 #undef DO_QADDSUB
7329 * Halfword multiply and multiply accumulate
7332 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
7333 int add_long, bool nt, bool mt)
7335 TCGv_i32 t0, t1, tl, th;
7337 if (s->thumb
7338 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7339 : !ENABLE_ARCH_5TE) {
7340 return false;
7343 t0 = load_reg(s, a->rn);
7344 t1 = load_reg(s, a->rm);
7345 gen_mulxy(t0, t1, nt, mt);
7346 tcg_temp_free_i32(t1);
7348 switch (add_long) {
7349 case 0:
7350 store_reg(s, a->rd, t0);
7351 break;
7352 case 1:
7353 t1 = load_reg(s, a->ra);
7354 gen_helper_add_setq(t0, cpu_env, t0, t1);
7355 tcg_temp_free_i32(t1);
7356 store_reg(s, a->rd, t0);
7357 break;
7358 case 2:
7359 tl = load_reg(s, a->ra);
7360 th = load_reg(s, a->rd);
7361 /* Sign-extend the 32-bit product to 64 bits. */
7362 t1 = tcg_temp_new_i32();
7363 tcg_gen_sari_i32(t1, t0, 31);
7364 tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
7365 tcg_temp_free_i32(t0);
7366 tcg_temp_free_i32(t1);
7367 store_reg(s, a->ra, tl);
7368 store_reg(s, a->rd, th);
7369 break;
7370 default:
7371 g_assert_not_reached();
7373 return true;
7376 #define DO_SMLAX(NAME, add, nt, mt) \
7377 static bool trans_##NAME(DisasContext *s, arg_rrrr *a) \
7379 return op_smlaxxx(s, a, add, nt, mt); \
7382 DO_SMLAX(SMULBB, 0, 0, 0)
7383 DO_SMLAX(SMULBT, 0, 0, 1)
7384 DO_SMLAX(SMULTB, 0, 1, 0)
7385 DO_SMLAX(SMULTT, 0, 1, 1)
7387 DO_SMLAX(SMLABB, 1, 0, 0)
7388 DO_SMLAX(SMLABT, 1, 0, 1)
7389 DO_SMLAX(SMLATB, 1, 1, 0)
7390 DO_SMLAX(SMLATT, 1, 1, 1)
7392 DO_SMLAX(SMLALBB, 2, 0, 0)
7393 DO_SMLAX(SMLALBT, 2, 0, 1)
7394 DO_SMLAX(SMLALTB, 2, 1, 0)
7395 DO_SMLAX(SMLALTT, 2, 1, 1)
7397 #undef DO_SMLAX
7399 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
7401 TCGv_i32 t0, t1;
7403 if (!ENABLE_ARCH_5TE) {
7404 return false;
7407 t0 = load_reg(s, a->rn);
7408 t1 = load_reg(s, a->rm);
7410 * Since the nominal result is product<47:16>, shift the 16-bit
7411 * input up by 16 bits, so that the result is at product<63:32>.
7413 if (mt) {
7414 tcg_gen_andi_i32(t1, t1, 0xffff0000);
7415 } else {
7416 tcg_gen_shli_i32(t1, t1, 16);
7418 tcg_gen_muls2_i32(t0, t1, t0, t1);
7419 tcg_temp_free_i32(t0);
7420 if (add) {
7421 t0 = load_reg(s, a->ra);
7422 gen_helper_add_setq(t1, cpu_env, t1, t0);
7423 tcg_temp_free_i32(t0);
7425 store_reg(s, a->rd, t1);
7426 return true;
7429 #define DO_SMLAWX(NAME, add, mt) \
7430 static bool trans_##NAME(DisasContext *s, arg_rrrr *a) \
7432 return op_smlawx(s, a, add, mt); \
7435 DO_SMLAWX(SMULWB, 0, 0)
7436 DO_SMLAWX(SMULWT, 0, 1)
7437 DO_SMLAWX(SMLAWB, 1, 0)
7438 DO_SMLAWX(SMLAWT, 1, 1)
7440 #undef DO_SMLAWX
7443 * MSR (immediate) and hints
7446 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
7449 * When running single-threaded TCG code, use the helper to ensure that
7450 * the next round-robin scheduled vCPU gets a crack. When running in
7451 * MTTCG we don't generate jumps to the helper as it won't affect the
7452 * scheduling of other vCPUs.
7454 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
7455 gen_set_pc_im(s, s->base.pc_next);
7456 s->base.is_jmp = DISAS_YIELD;
7458 return true;
7461 static bool trans_WFE(DisasContext *s, arg_WFE *a)
7464 * When running single-threaded TCG code, use the helper to ensure that
7465 * the next round-robin scheduled vCPU gets a crack. In MTTCG mode we
7466 * just skip this instruction. Currently the SEV/SEVL instructions,
7467 * which are *one* of many ways to wake the CPU from WFE, are not
7468 * implemented so we can't sleep like WFI does.
7470 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
7471 gen_set_pc_im(s, s->base.pc_next);
7472 s->base.is_jmp = DISAS_WFE;
7474 return true;
7477 static bool trans_WFI(DisasContext *s, arg_WFI *a)
7479 /* For WFI, halt the vCPU until an IRQ. */
7480 gen_set_pc_im(s, s->base.pc_next);
7481 s->base.is_jmp = DISAS_WFI;
7482 return true;
7485 static bool trans_NOP(DisasContext *s, arg_NOP *a)
7487 return true;
7490 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
7492 uint32_t val = ror32(a->imm, a->rot * 2);
7493 uint32_t mask = msr_mask(s, a->mask, a->r);
7495 if (gen_set_psr_im(s, mask, a->r, val)) {
7496 unallocated_encoding(s);
7498 return true;
7502 * Cyclic Redundancy Check
7505 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
7507 TCGv_i32 t1, t2, t3;
7509 if (!dc_isar_feature(aa32_crc32, s)) {
7510 return false;
7513 t1 = load_reg(s, a->rn);
7514 t2 = load_reg(s, a->rm);
7515 switch (sz) {
7516 case MO_8:
7517 gen_uxtb(t2);
7518 break;
7519 case MO_16:
7520 gen_uxth(t2);
7521 break;
7522 case MO_32:
7523 break;
7524 default:
7525 g_assert_not_reached();
7527 t3 = tcg_const_i32(1 << sz);
7528 if (c) {
7529 gen_helper_crc32c(t1, t1, t2, t3);
7530 } else {
7531 gen_helper_crc32(t1, t1, t2, t3);
7533 tcg_temp_free_i32(t2);
7534 tcg_temp_free_i32(t3);
7535 store_reg(s, a->rd, t1);
7536 return true;
7539 #define DO_CRC32(NAME, c, sz) \
7540 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
7541 { return op_crc32(s, a, c, sz); }
7543 DO_CRC32(CRC32B, false, MO_8)
7544 DO_CRC32(CRC32H, false, MO_16)
7545 DO_CRC32(CRC32W, false, MO_32)
7546 DO_CRC32(CRC32CB, true, MO_8)
7547 DO_CRC32(CRC32CH, true, MO_16)
7548 DO_CRC32(CRC32CW, true, MO_32)
7550 #undef DO_CRC32
7553 * Miscellaneous instructions
7556 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
7558 if (arm_dc_feature(s, ARM_FEATURE_M)) {
7559 return false;
7561 gen_mrs_banked(s, a->r, a->sysm, a->rd);
7562 return true;
7565 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
7567 if (arm_dc_feature(s, ARM_FEATURE_M)) {
7568 return false;
7570 gen_msr_banked(s, a->r, a->sysm, a->rn);
7571 return true;
7574 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
7576 TCGv_i32 tmp;
7578 if (arm_dc_feature(s, ARM_FEATURE_M)) {
7579 return false;
7581 if (a->r) {
7582 if (IS_USER(s)) {
7583 unallocated_encoding(s);
7584 return true;
7586 tmp = load_cpu_field(spsr);
7587 } else {
7588 tmp = tcg_temp_new_i32();
7589 gen_helper_cpsr_read(tmp, cpu_env);
7591 store_reg(s, a->rd, tmp);
7592 return true;
7595 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
7597 TCGv_i32 tmp;
7598 uint32_t mask = msr_mask(s, a->mask, a->r);
7600 if (arm_dc_feature(s, ARM_FEATURE_M)) {
7601 return false;
7603 tmp = load_reg(s, a->rn);
7604 if (gen_set_psr(s, mask, a->r, tmp)) {
7605 unallocated_encoding(s);
7607 return true;
7610 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
7612 TCGv_i32 tmp;
7614 if (!arm_dc_feature(s, ARM_FEATURE_M)) {
7615 return false;
7617 tmp = tcg_const_i32(a->sysm);
7618 gen_helper_v7m_mrs(tmp, cpu_env, tmp);
7619 store_reg(s, a->rd, tmp);
7620 return true;
7623 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
7625 TCGv_i32 addr, reg;
7627 if (!arm_dc_feature(s, ARM_FEATURE_M)) {
7628 return false;
7630 addr = tcg_const_i32((a->mask << 10) | a->sysm);
7631 reg = load_reg(s, a->rn);
7632 gen_helper_v7m_msr(cpu_env, addr, reg);
7633 tcg_temp_free_i32(addr);
7634 tcg_temp_free_i32(reg);
7635 /* If we wrote to CONTROL, the EL might have changed */
7636 gen_helper_rebuild_hflags_m32_newel(cpu_env);
7637 gen_lookup_tb(s);
7638 return true;
7641 static bool trans_BX(DisasContext *s, arg_BX *a)
7643 if (!ENABLE_ARCH_4T) {
7644 return false;
7646 gen_bx_excret(s, load_reg(s, a->rm));
7647 return true;
7650 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
7652 if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
7653 return false;
7655 /* Trivial implementation equivalent to bx. */
7656 gen_bx(s, load_reg(s, a->rm));
7657 return true;
7660 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
7662 TCGv_i32 tmp;
7664 if (!ENABLE_ARCH_5) {
7665 return false;
7667 tmp = load_reg(s, a->rm);
7668 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
7669 gen_bx(s, tmp);
7670 return true;
7674 * BXNS/BLXNS: only exist for v8M with the security extensions,
7675 * and always UNDEF if NonSecure. We don't implement these in
7676 * the user-only mode either (in theory you can use them from
7677 * Secure User mode but they are too tied in to system emulation).
7679 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
7681 if (!s->v8m_secure || IS_USER_ONLY) {
7682 unallocated_encoding(s);
7683 } else {
7684 gen_bxns(s, a->rm);
7686 return true;
7689 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
7691 if (!s->v8m_secure || IS_USER_ONLY) {
7692 unallocated_encoding(s);
7693 } else {
7694 gen_blxns(s, a->rm);
7696 return true;
7699 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
7701 TCGv_i32 tmp;
7703 if (!ENABLE_ARCH_5) {
7704 return false;
7706 tmp = load_reg(s, a->rm);
7707 tcg_gen_clzi_i32(tmp, tmp, 32);
7708 store_reg(s, a->rd, tmp);
7709 return true;
7712 static bool trans_ERET(DisasContext *s, arg_ERET *a)
7714 TCGv_i32 tmp;
7716 if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
7717 return false;
7719 if (IS_USER(s)) {
7720 unallocated_encoding(s);
7721 return true;
7723 if (s->current_el == 2) {
7724 /* ERET from Hyp uses ELR_Hyp, not LR */
7725 tmp = load_cpu_field(elr_el[2]);
7726 } else {
7727 tmp = load_reg(s, 14);
7729 gen_exception_return(s, tmp);
7730 return true;
7733 static bool trans_HLT(DisasContext *s, arg_HLT *a)
7735 gen_hlt(s, a->imm);
7736 return true;
7739 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
7741 if (!ENABLE_ARCH_5) {
7742 return false;
7744 if (arm_dc_feature(s, ARM_FEATURE_M) &&
7745 semihosting_enabled() &&
7746 #ifndef CONFIG_USER_ONLY
7747 !IS_USER(s) &&
7748 #endif
7749 (a->imm == 0xab)) {
7750 gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
7751 } else {
7752 gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
7754 return true;
7757 static bool trans_HVC(DisasContext *s, arg_HVC *a)
7759 if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
7760 return false;
7762 if (IS_USER(s)) {
7763 unallocated_encoding(s);
7764 } else {
7765 gen_hvc(s, a->imm);
7767 return true;
7770 static bool trans_SMC(DisasContext *s, arg_SMC *a)
7772 if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
7773 return false;
7775 if (IS_USER(s)) {
7776 unallocated_encoding(s);
7777 } else {
7778 gen_smc(s);
7780 return true;
7783 static bool trans_SG(DisasContext *s, arg_SG *a)
7785 if (!arm_dc_feature(s, ARM_FEATURE_M) ||
7786 !arm_dc_feature(s, ARM_FEATURE_V8)) {
7787 return false;
7790 * SG (v8M only)
7791 * The bulk of the behaviour for this instruction is implemented
7792 * in v7m_handle_execute_nsc(), which deals with the insn when
7793 * it is executed by a CPU in non-secure state from memory
7794 * which is Secure & NonSecure-Callable.
7795 * Here we only need to handle the remaining cases:
7796 * * in NS memory (including the "security extension not
7797 * implemented" case) : NOP
7798 * * in S memory but CPU already secure (clear IT bits)
7799 * We know that the attribute for the memory this insn is
7800 * in must match the current CPU state, because otherwise
7801 * get_phys_addr_pmsav8 would have generated an exception.
7803 if (s->v8m_secure) {
7804 /* Like the IT insn, we don't need to generate any code */
7805 s->condexec_cond = 0;
7806 s->condexec_mask = 0;
7808 return true;
7811 static bool trans_TT(DisasContext *s, arg_TT *a)
7813 TCGv_i32 addr, tmp;
7815 if (!arm_dc_feature(s, ARM_FEATURE_M) ||
7816 !arm_dc_feature(s, ARM_FEATURE_V8)) {
7817 return false;
7819 if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
7820 /* We UNDEF for these UNPREDICTABLE cases */
7821 unallocated_encoding(s);
7822 return true;
7824 if (a->A && !s->v8m_secure) {
7825 /* This case is UNDEFINED. */
7826 unallocated_encoding(s);
7827 return true;
7830 addr = load_reg(s, a->rn);
7831 tmp = tcg_const_i32((a->A << 1) | a->T);
7832 gen_helper_v7m_tt(tmp, cpu_env, addr, tmp);
7833 tcg_temp_free_i32(addr);
7834 store_reg(s, a->rd, tmp);
7835 return true;
7839 * Load/store register index
7842 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
7844 ISSInfo ret;
7846 /* ISS not valid if writeback */
7847 if (p && !w) {
7848 ret = rd;
7849 if (s->base.pc_next - s->pc_curr == 2) {
7850 ret |= ISSIs16Bit;
7852 } else {
7853 ret = ISSInvalid;
7855 return ret;
7858 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
7860 TCGv_i32 addr = load_reg(s, a->rn);
7862 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
7863 gen_helper_v8m_stackcheck(cpu_env, addr);
7866 if (a->p) {
7867 TCGv_i32 ofs = load_reg(s, a->rm);
7868 gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
7869 if (a->u) {
7870 tcg_gen_add_i32(addr, addr, ofs);
7871 } else {
7872 tcg_gen_sub_i32(addr, addr, ofs);
7874 tcg_temp_free_i32(ofs);
7876 return addr;
7879 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
7880 TCGv_i32 addr, int address_offset)
7882 if (!a->p) {
7883 TCGv_i32 ofs = load_reg(s, a->rm);
7884 gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
7885 if (a->u) {
7886 tcg_gen_add_i32(addr, addr, ofs);
7887 } else {
7888 tcg_gen_sub_i32(addr, addr, ofs);
7890 tcg_temp_free_i32(ofs);
7891 } else if (!a->w) {
7892 tcg_temp_free_i32(addr);
7893 return;
7895 tcg_gen_addi_i32(addr, addr, address_offset);
7896 store_reg(s, a->rn, addr);
7899 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
7900 MemOp mop, int mem_idx)
7902 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
7903 TCGv_i32 addr, tmp;
7905 addr = op_addr_rr_pre(s, a);
7907 tmp = tcg_temp_new_i32();
7908 gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
7909 disas_set_da_iss(s, mop, issinfo);
7912 * Perform base writeback before the loaded value to
7913 * ensure correct behavior with overlapping index registers.
7915 op_addr_rr_post(s, a, addr, 0);
7916 store_reg_from_load(s, a->rt, tmp);
7917 return true;
7920 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
7921 MemOp mop, int mem_idx)
7923 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
7924 TCGv_i32 addr, tmp;
7926 addr = op_addr_rr_pre(s, a);
7928 tmp = load_reg(s, a->rt);
7929 gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
7930 disas_set_da_iss(s, mop, issinfo);
7931 tcg_temp_free_i32(tmp);
7933 op_addr_rr_post(s, a, addr, 0);
7934 return true;
7937 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
7939 int mem_idx = get_mem_index(s);
7940 TCGv_i32 addr, tmp;
7942 if (!ENABLE_ARCH_5TE) {
7943 return false;
7945 if (a->rt & 1) {
7946 unallocated_encoding(s);
7947 return true;
7949 addr = op_addr_rr_pre(s, a);
7951 tmp = tcg_temp_new_i32();
7952 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
7953 store_reg(s, a->rt, tmp);
7955 tcg_gen_addi_i32(addr, addr, 4);
7957 tmp = tcg_temp_new_i32();
7958 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
7959 store_reg(s, a->rt + 1, tmp);
7961 /* LDRD w/ base writeback is undefined if the registers overlap. */
7962 op_addr_rr_post(s, a, addr, -4);
7963 return true;
7966 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
7968 int mem_idx = get_mem_index(s);
7969 TCGv_i32 addr, tmp;
7971 if (!ENABLE_ARCH_5TE) {
7972 return false;
7974 if (a->rt & 1) {
7975 unallocated_encoding(s);
7976 return true;
7978 addr = op_addr_rr_pre(s, a);
7980 tmp = load_reg(s, a->rt);
7981 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
7982 tcg_temp_free_i32(tmp);
7984 tcg_gen_addi_i32(addr, addr, 4);
7986 tmp = load_reg(s, a->rt + 1);
7987 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
7988 tcg_temp_free_i32(tmp);
7990 op_addr_rr_post(s, a, addr, -4);
7991 return true;
7995 * Load/store immediate index
7998 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
8000 int ofs = a->imm;
8002 if (!a->u) {
8003 ofs = -ofs;
8006 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8008 * Stackcheck. Here we know 'addr' is the current SP;
8009 * U is set if we're moving SP up, else down. It is
8010 * UNKNOWN whether the limit check triggers when SP starts
8011 * below the limit and ends up above it; we chose to do so.
8013 if (!a->u) {
8014 TCGv_i32 newsp = tcg_temp_new_i32();
8015 tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
8016 gen_helper_v8m_stackcheck(cpu_env, newsp);
8017 tcg_temp_free_i32(newsp);
8018 } else {
8019 gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
8023 return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
8026 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
8027 TCGv_i32 addr, int address_offset)
8029 if (!a->p) {
8030 if (a->u) {
8031 address_offset += a->imm;
8032 } else {
8033 address_offset -= a->imm;
8035 } else if (!a->w) {
8036 tcg_temp_free_i32(addr);
8037 return;
8039 tcg_gen_addi_i32(addr, addr, address_offset);
8040 store_reg(s, a->rn, addr);
8043 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
8044 MemOp mop, int mem_idx)
8046 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
8047 TCGv_i32 addr, tmp;
8049 addr = op_addr_ri_pre(s, a);
8051 tmp = tcg_temp_new_i32();
8052 gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8053 disas_set_da_iss(s, mop, issinfo);
8056 * Perform base writeback before the loaded value to
8057 * ensure correct behavior with overlapping index registers.
8059 op_addr_ri_post(s, a, addr, 0);
8060 store_reg_from_load(s, a->rt, tmp);
8061 return true;
8064 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
8065 MemOp mop, int mem_idx)
8067 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
8068 TCGv_i32 addr, tmp;
8070 addr = op_addr_ri_pre(s, a);
8072 tmp = load_reg(s, a->rt);
8073 gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8074 disas_set_da_iss(s, mop, issinfo);
8075 tcg_temp_free_i32(tmp);
8077 op_addr_ri_post(s, a, addr, 0);
8078 return true;
8081 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
8083 int mem_idx = get_mem_index(s);
8084 TCGv_i32 addr, tmp;
8086 addr = op_addr_ri_pre(s, a);
8088 tmp = tcg_temp_new_i32();
8089 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8090 store_reg(s, a->rt, tmp);
8092 tcg_gen_addi_i32(addr, addr, 4);
8094 tmp = tcg_temp_new_i32();
8095 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8096 store_reg(s, rt2, tmp);
8098 /* LDRD w/ base writeback is undefined if the registers overlap. */
8099 op_addr_ri_post(s, a, addr, -4);
8100 return true;
8103 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
8105 if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
8106 return false;
8108 return op_ldrd_ri(s, a, a->rt + 1);
8111 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
8113 arg_ldst_ri b = {
8114 .u = a->u, .w = a->w, .p = a->p,
8115 .rn = a->rn, .rt = a->rt, .imm = a->imm
8117 return op_ldrd_ri(s, &b, a->rt2);
8120 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
8122 int mem_idx = get_mem_index(s);
8123 TCGv_i32 addr, tmp;
8125 addr = op_addr_ri_pre(s, a);
8127 tmp = load_reg(s, a->rt);
8128 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8129 tcg_temp_free_i32(tmp);
8131 tcg_gen_addi_i32(addr, addr, 4);
8133 tmp = load_reg(s, rt2);
8134 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8135 tcg_temp_free_i32(tmp);
8137 op_addr_ri_post(s, a, addr, -4);
8138 return true;
8141 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
8143 if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
8144 return false;
8146 return op_strd_ri(s, a, a->rt + 1);
8149 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
8151 arg_ldst_ri b = {
8152 .u = a->u, .w = a->w, .p = a->p,
8153 .rn = a->rn, .rt = a->rt, .imm = a->imm
8155 return op_strd_ri(s, &b, a->rt2);
8158 #define DO_LDST(NAME, WHICH, MEMOP) \
8159 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a) \
8161 return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s)); \
8163 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a) \
8165 return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s)); \
8167 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a) \
8169 return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s)); \
8171 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a) \
8173 return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s)); \
8176 DO_LDST(LDR, load, MO_UL)
8177 DO_LDST(LDRB, load, MO_UB)
8178 DO_LDST(LDRH, load, MO_UW)
8179 DO_LDST(LDRSB, load, MO_SB)
8180 DO_LDST(LDRSH, load, MO_SW)
8182 DO_LDST(STR, store, MO_UL)
8183 DO_LDST(STRB, store, MO_UB)
8184 DO_LDST(STRH, store, MO_UW)
8186 #undef DO_LDST
8189 * Synchronization primitives
8192 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
8194 TCGv_i32 addr, tmp;
8195 TCGv taddr;
8197 opc |= s->be_data;
8198 addr = load_reg(s, a->rn);
8199 taddr = gen_aa32_addr(s, addr, opc);
8200 tcg_temp_free_i32(addr);
8202 tmp = load_reg(s, a->rt2);
8203 tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
8204 tcg_temp_free(taddr);
8206 store_reg(s, a->rt, tmp);
8207 return true;
8210 static bool trans_SWP(DisasContext *s, arg_SWP *a)
8212 return op_swp(s, a, MO_UL | MO_ALIGN);
8215 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
8217 return op_swp(s, a, MO_UB);
8221 * Load/Store Exclusive and Load-Acquire/Store-Release
8224 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
8226 TCGv_i32 addr;
8227 /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
8228 bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
8230 /* We UNDEF for these UNPREDICTABLE cases. */
8231 if (a->rd == 15 || a->rn == 15 || a->rt == 15
8232 || a->rd == a->rn || a->rd == a->rt
8233 || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
8234 || (mop == MO_64
8235 && (a->rt2 == 15
8236 || a->rd == a->rt2
8237 || (!v8a && s->thumb && a->rt2 == 13)))) {
8238 unallocated_encoding(s);
8239 return true;
8242 if (rel) {
8243 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
8246 addr = tcg_temp_local_new_i32();
8247 load_reg_var(s, addr, a->rn);
8248 tcg_gen_addi_i32(addr, addr, a->imm);
8250 gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
8251 tcg_temp_free_i32(addr);
8252 return true;
8255 static bool trans_STREX(DisasContext *s, arg_STREX *a)
8257 if (!ENABLE_ARCH_6) {
8258 return false;
8260 return op_strex(s, a, MO_32, false);
8263 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
8265 if (!ENABLE_ARCH_6K) {
8266 return false;
8268 /* We UNDEF for these UNPREDICTABLE cases. */
8269 if (a->rt & 1) {
8270 unallocated_encoding(s);
8271 return true;
8273 a->rt2 = a->rt + 1;
8274 return op_strex(s, a, MO_64, false);
8277 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
8279 return op_strex(s, a, MO_64, false);
8282 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
8284 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
8285 return false;
8287 return op_strex(s, a, MO_8, false);
8290 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
8292 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
8293 return false;
8295 return op_strex(s, a, MO_16, false);
8298 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
8300 if (!ENABLE_ARCH_8) {
8301 return false;
8303 return op_strex(s, a, MO_32, true);
8306 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
8308 if (!ENABLE_ARCH_8) {
8309 return false;
8311 /* We UNDEF for these UNPREDICTABLE cases. */
8312 if (a->rt & 1) {
8313 unallocated_encoding(s);
8314 return true;
8316 a->rt2 = a->rt + 1;
8317 return op_strex(s, a, MO_64, true);
8320 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
8322 if (!ENABLE_ARCH_8) {
8323 return false;
8325 return op_strex(s, a, MO_64, true);
8328 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
8330 if (!ENABLE_ARCH_8) {
8331 return false;
8333 return op_strex(s, a, MO_8, true);
8336 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
8338 if (!ENABLE_ARCH_8) {
8339 return false;
8341 return op_strex(s, a, MO_16, true);
8344 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
8346 TCGv_i32 addr, tmp;
8348 if (!ENABLE_ARCH_8) {
8349 return false;
8351 /* We UNDEF for these UNPREDICTABLE cases. */
8352 if (a->rn == 15 || a->rt == 15) {
8353 unallocated_encoding(s);
8354 return true;
8357 addr = load_reg(s, a->rn);
8358 tmp = load_reg(s, a->rt);
8359 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
8360 gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
8361 disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
8363 tcg_temp_free_i32(tmp);
8364 tcg_temp_free_i32(addr);
8365 return true;
8368 static bool trans_STL(DisasContext *s, arg_STL *a)
8370 return op_stl(s, a, MO_UL);
8373 static bool trans_STLB(DisasContext *s, arg_STL *a)
8375 return op_stl(s, a, MO_UB);
8378 static bool trans_STLH(DisasContext *s, arg_STL *a)
8380 return op_stl(s, a, MO_UW);
8383 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
8385 TCGv_i32 addr;
8386 /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
8387 bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
8389 /* We UNDEF for these UNPREDICTABLE cases. */
8390 if (a->rn == 15 || a->rt == 15
8391 || (!v8a && s->thumb && a->rt == 13)
8392 || (mop == MO_64
8393 && (a->rt2 == 15 || a->rt == a->rt2
8394 || (!v8a && s->thumb && a->rt2 == 13)))) {
8395 unallocated_encoding(s);
8396 return true;
8399 addr = tcg_temp_local_new_i32();
8400 load_reg_var(s, addr, a->rn);
8401 tcg_gen_addi_i32(addr, addr, a->imm);
8403 gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
8404 tcg_temp_free_i32(addr);
8406 if (acq) {
8407 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
8409 return true;
8412 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
8414 if (!ENABLE_ARCH_6) {
8415 return false;
8417 return op_ldrex(s, a, MO_32, false);
8420 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
8422 if (!ENABLE_ARCH_6K) {
8423 return false;
8425 /* We UNDEF for these UNPREDICTABLE cases. */
8426 if (a->rt & 1) {
8427 unallocated_encoding(s);
8428 return true;
8430 a->rt2 = a->rt + 1;
8431 return op_ldrex(s, a, MO_64, false);
8434 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
8436 return op_ldrex(s, a, MO_64, false);
8439 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
8441 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
8442 return false;
8444 return op_ldrex(s, a, MO_8, false);
8447 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
8449 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
8450 return false;
8452 return op_ldrex(s, a, MO_16, false);
8455 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
8457 if (!ENABLE_ARCH_8) {
8458 return false;
8460 return op_ldrex(s, a, MO_32, true);
8463 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
8465 if (!ENABLE_ARCH_8) {
8466 return false;
8468 /* We UNDEF for these UNPREDICTABLE cases. */
8469 if (a->rt & 1) {
8470 unallocated_encoding(s);
8471 return true;
8473 a->rt2 = a->rt + 1;
8474 return op_ldrex(s, a, MO_64, true);
8477 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
8479 if (!ENABLE_ARCH_8) {
8480 return false;
8482 return op_ldrex(s, a, MO_64, true);
8485 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
8487 if (!ENABLE_ARCH_8) {
8488 return false;
8490 return op_ldrex(s, a, MO_8, true);
8493 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
8495 if (!ENABLE_ARCH_8) {
8496 return false;
8498 return op_ldrex(s, a, MO_16, true);
8501 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
8503 TCGv_i32 addr, tmp;
8505 if (!ENABLE_ARCH_8) {
8506 return false;
8508 /* We UNDEF for these UNPREDICTABLE cases. */
8509 if (a->rn == 15 || a->rt == 15) {
8510 unallocated_encoding(s);
8511 return true;
8514 addr = load_reg(s, a->rn);
8515 tmp = tcg_temp_new_i32();
8516 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
8517 disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
8518 tcg_temp_free_i32(addr);
8520 store_reg(s, a->rt, tmp);
8521 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
8522 return true;
8525 static bool trans_LDA(DisasContext *s, arg_LDA *a)
8527 return op_lda(s, a, MO_UL);
8530 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
8532 return op_lda(s, a, MO_UB);
8535 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
8537 return op_lda(s, a, MO_UW);
8541 * Media instructions
8544 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
8546 TCGv_i32 t1, t2;
8548 if (!ENABLE_ARCH_6) {
8549 return false;
8552 t1 = load_reg(s, a->rn);
8553 t2 = load_reg(s, a->rm);
8554 gen_helper_usad8(t1, t1, t2);
8555 tcg_temp_free_i32(t2);
8556 if (a->ra != 15) {
8557 t2 = load_reg(s, a->ra);
8558 tcg_gen_add_i32(t1, t1, t2);
8559 tcg_temp_free_i32(t2);
8561 store_reg(s, a->rd, t1);
8562 return true;
8565 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
8567 TCGv_i32 tmp;
8568 int width = a->widthm1 + 1;
8569 int shift = a->lsb;
8571 if (!ENABLE_ARCH_6T2) {
8572 return false;
8574 if (shift + width > 32) {
8575 /* UNPREDICTABLE; we choose to UNDEF */
8576 unallocated_encoding(s);
8577 return true;
8580 tmp = load_reg(s, a->rn);
8581 if (u) {
8582 tcg_gen_extract_i32(tmp, tmp, shift, width);
8583 } else {
8584 tcg_gen_sextract_i32(tmp, tmp, shift, width);
8586 store_reg(s, a->rd, tmp);
8587 return true;
8590 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
8592 return op_bfx(s, a, false);
8595 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
8597 return op_bfx(s, a, true);
8600 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
8602 TCGv_i32 tmp;
8603 int msb = a->msb, lsb = a->lsb;
8604 int width;
8606 if (!ENABLE_ARCH_6T2) {
8607 return false;
8609 if (msb < lsb) {
8610 /* UNPREDICTABLE; we choose to UNDEF */
8611 unallocated_encoding(s);
8612 return true;
8615 width = msb + 1 - lsb;
8616 if (a->rn == 15) {
8617 /* BFC */
8618 tmp = tcg_const_i32(0);
8619 } else {
8620 /* BFI */
8621 tmp = load_reg(s, a->rn);
8623 if (width != 32) {
8624 TCGv_i32 tmp2 = load_reg(s, a->rd);
8625 tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
8626 tcg_temp_free_i32(tmp2);
8628 store_reg(s, a->rd, tmp);
8629 return true;
8632 static bool trans_UDF(DisasContext *s, arg_UDF *a)
8634 unallocated_encoding(s);
8635 return true;
8639 * Parallel addition and subtraction
8642 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
8643 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
8645 TCGv_i32 t0, t1;
8647 if (s->thumb
8648 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8649 : !ENABLE_ARCH_6) {
8650 return false;
8653 t0 = load_reg(s, a->rn);
8654 t1 = load_reg(s, a->rm);
8656 gen(t0, t0, t1);
8658 tcg_temp_free_i32(t1);
8659 store_reg(s, a->rd, t0);
8660 return true;
8663 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
8664 void (*gen)(TCGv_i32, TCGv_i32,
8665 TCGv_i32, TCGv_ptr))
8667 TCGv_i32 t0, t1;
8668 TCGv_ptr ge;
8670 if (s->thumb
8671 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8672 : !ENABLE_ARCH_6) {
8673 return false;
8676 t0 = load_reg(s, a->rn);
8677 t1 = load_reg(s, a->rm);
8679 ge = tcg_temp_new_ptr();
8680 tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
8681 gen(t0, t0, t1, ge);
8683 tcg_temp_free_ptr(ge);
8684 tcg_temp_free_i32(t1);
8685 store_reg(s, a->rd, t0);
8686 return true;
8689 #define DO_PAR_ADDSUB(NAME, helper) \
8690 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
8692 return op_par_addsub(s, a, helper); \
8695 #define DO_PAR_ADDSUB_GE(NAME, helper) \
8696 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
8698 return op_par_addsub_ge(s, a, helper); \
8701 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
8702 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
8703 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
8704 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
8705 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
8706 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
8708 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
8709 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
8710 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
8711 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
8712 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
8713 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
8715 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
8716 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
8717 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
8718 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
8719 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
8720 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
8722 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
8723 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
8724 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
8725 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
8726 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
8727 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
8729 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
8730 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
8731 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
8732 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
8733 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
8734 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
8736 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
8737 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
8738 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
8739 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
8740 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
8741 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
8743 #undef DO_PAR_ADDSUB
8744 #undef DO_PAR_ADDSUB_GE
8747 * Packing, unpacking, saturation, and reversal
8750 static bool trans_PKH(DisasContext *s, arg_PKH *a)
8752 TCGv_i32 tn, tm;
8753 int shift = a->imm;
8755 if (s->thumb
8756 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8757 : !ENABLE_ARCH_6) {
8758 return false;
8761 tn = load_reg(s, a->rn);
8762 tm = load_reg(s, a->rm);
8763 if (a->tb) {
8764 /* PKHTB */
8765 if (shift == 0) {
8766 shift = 31;
8768 tcg_gen_sari_i32(tm, tm, shift);
8769 tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
8770 } else {
8771 /* PKHBT */
8772 tcg_gen_shli_i32(tm, tm, shift);
8773 tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
8775 tcg_temp_free_i32(tm);
8776 store_reg(s, a->rd, tn);
8777 return true;
8780 static bool op_sat(DisasContext *s, arg_sat *a,
8781 void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
8783 TCGv_i32 tmp, satimm;
8784 int shift = a->imm;
8786 if (!ENABLE_ARCH_6) {
8787 return false;
8790 tmp = load_reg(s, a->rn);
8791 if (a->sh) {
8792 tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
8793 } else {
8794 tcg_gen_shli_i32(tmp, tmp, shift);
8797 satimm = tcg_const_i32(a->satimm);
8798 gen(tmp, cpu_env, tmp, satimm);
8799 tcg_temp_free_i32(satimm);
8801 store_reg(s, a->rd, tmp);
8802 return true;
8805 static bool trans_SSAT(DisasContext *s, arg_sat *a)
8807 return op_sat(s, a, gen_helper_ssat);
8810 static bool trans_USAT(DisasContext *s, arg_sat *a)
8812 return op_sat(s, a, gen_helper_usat);
8815 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
8817 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
8818 return false;
8820 return op_sat(s, a, gen_helper_ssat16);
8823 static bool trans_USAT16(DisasContext *s, arg_sat *a)
8825 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
8826 return false;
8828 return op_sat(s, a, gen_helper_usat16);
8831 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
8832 void (*gen_extract)(TCGv_i32, TCGv_i32),
8833 void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
8835 TCGv_i32 tmp;
8837 if (!ENABLE_ARCH_6) {
8838 return false;
8841 tmp = load_reg(s, a->rm);
8843 * TODO: In many cases we could do a shift instead of a rotate.
8844 * Combined with a simple extend, that becomes an extract.
8846 tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
8847 gen_extract(tmp, tmp);
8849 if (a->rn != 15) {
8850 TCGv_i32 tmp2 = load_reg(s, a->rn);
8851 gen_add(tmp, tmp, tmp2);
8852 tcg_temp_free_i32(tmp2);
8854 store_reg(s, a->rd, tmp);
8855 return true;
8858 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
8860 return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
8863 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
8865 return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
8868 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
8870 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
8871 return false;
8873 return op_xta(s, a, gen_helper_sxtb16, gen_add16);
8876 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
8878 return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
8881 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
8883 return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
8886 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
8888 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
8889 return false;
8891 return op_xta(s, a, gen_helper_uxtb16, gen_add16);
8894 static bool trans_SEL(DisasContext *s, arg_rrr *a)
8896 TCGv_i32 t1, t2, t3;
8898 if (s->thumb
8899 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8900 : !ENABLE_ARCH_6) {
8901 return false;
8904 t1 = load_reg(s, a->rn);
8905 t2 = load_reg(s, a->rm);
8906 t3 = tcg_temp_new_i32();
8907 tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
8908 gen_helper_sel_flags(t1, t3, t1, t2);
8909 tcg_temp_free_i32(t3);
8910 tcg_temp_free_i32(t2);
8911 store_reg(s, a->rd, t1);
8912 return true;
8915 static bool op_rr(DisasContext *s, arg_rr *a,
8916 void (*gen)(TCGv_i32, TCGv_i32))
8918 TCGv_i32 tmp;
8920 tmp = load_reg(s, a->rm);
8921 gen(tmp, tmp);
8922 store_reg(s, a->rd, tmp);
8923 return true;
8926 static bool trans_REV(DisasContext *s, arg_rr *a)
8928 if (!ENABLE_ARCH_6) {
8929 return false;
8931 return op_rr(s, a, tcg_gen_bswap32_i32);
8934 static bool trans_REV16(DisasContext *s, arg_rr *a)
8936 if (!ENABLE_ARCH_6) {
8937 return false;
8939 return op_rr(s, a, gen_rev16);
8942 static bool trans_REVSH(DisasContext *s, arg_rr *a)
8944 if (!ENABLE_ARCH_6) {
8945 return false;
8947 return op_rr(s, a, gen_revsh);
8950 static bool trans_RBIT(DisasContext *s, arg_rr *a)
8952 if (!ENABLE_ARCH_6T2) {
8953 return false;
8955 return op_rr(s, a, gen_helper_rbit);
8959 * Signed multiply, signed and unsigned divide
8962 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
8964 TCGv_i32 t1, t2;
8966 if (!ENABLE_ARCH_6) {
8967 return false;
8970 t1 = load_reg(s, a->rn);
8971 t2 = load_reg(s, a->rm);
8972 if (m_swap) {
8973 gen_swap_half(t2);
8975 gen_smul_dual(t1, t2);
8977 if (sub) {
8978 /* This subtraction cannot overflow. */
8979 tcg_gen_sub_i32(t1, t1, t2);
8980 } else {
8982 * This addition cannot overflow 32 bits; however it may
8983 * overflow considered as a signed operation, in which case
8984 * we must set the Q flag.
8986 gen_helper_add_setq(t1, cpu_env, t1, t2);
8988 tcg_temp_free_i32(t2);
8990 if (a->ra != 15) {
8991 t2 = load_reg(s, a->ra);
8992 gen_helper_add_setq(t1, cpu_env, t1, t2);
8993 tcg_temp_free_i32(t2);
8995 store_reg(s, a->rd, t1);
8996 return true;
8999 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
9001 return op_smlad(s, a, false, false);
9004 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
9006 return op_smlad(s, a, true, false);
9009 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
9011 return op_smlad(s, a, false, true);
9014 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
9016 return op_smlad(s, a, true, true);
9019 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
9021 TCGv_i32 t1, t2;
9022 TCGv_i64 l1, l2;
9024 if (!ENABLE_ARCH_6) {
9025 return false;
9028 t1 = load_reg(s, a->rn);
9029 t2 = load_reg(s, a->rm);
9030 if (m_swap) {
9031 gen_swap_half(t2);
9033 gen_smul_dual(t1, t2);
9035 l1 = tcg_temp_new_i64();
9036 l2 = tcg_temp_new_i64();
9037 tcg_gen_ext_i32_i64(l1, t1);
9038 tcg_gen_ext_i32_i64(l2, t2);
9039 tcg_temp_free_i32(t1);
9040 tcg_temp_free_i32(t2);
9042 if (sub) {
9043 tcg_gen_sub_i64(l1, l1, l2);
9044 } else {
9045 tcg_gen_add_i64(l1, l1, l2);
9047 tcg_temp_free_i64(l2);
9049 gen_addq(s, l1, a->ra, a->rd);
9050 gen_storeq_reg(s, a->ra, a->rd, l1);
9051 tcg_temp_free_i64(l1);
9052 return true;
9055 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
9057 return op_smlald(s, a, false, false);
9060 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
9062 return op_smlald(s, a, true, false);
9065 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
9067 return op_smlald(s, a, false, true);
9070 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
9072 return op_smlald(s, a, true, true);
9075 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
9077 TCGv_i32 t1, t2;
9079 if (s->thumb
9080 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9081 : !ENABLE_ARCH_6) {
9082 return false;
9085 t1 = load_reg(s, a->rn);
9086 t2 = load_reg(s, a->rm);
9087 tcg_gen_muls2_i32(t2, t1, t1, t2);
9089 if (a->ra != 15) {
9090 TCGv_i32 t3 = load_reg(s, a->ra);
9091 if (sub) {
9093 * For SMMLS, we need a 64-bit subtract. Borrow caused by
9094 * a non-zero multiplicand lowpart, and the correct result
9095 * lowpart for rounding.
9097 TCGv_i32 zero = tcg_const_i32(0);
9098 tcg_gen_sub2_i32(t2, t1, zero, t3, t2, t1);
9099 tcg_temp_free_i32(zero);
9100 } else {
9101 tcg_gen_add_i32(t1, t1, t3);
9103 tcg_temp_free_i32(t3);
9105 if (round) {
9107 * Adding 0x80000000 to the 64-bit quantity means that we have
9108 * carry in to the high word when the low word has the msb set.
9110 tcg_gen_shri_i32(t2, t2, 31);
9111 tcg_gen_add_i32(t1, t1, t2);
9113 tcg_temp_free_i32(t2);
9114 store_reg(s, a->rd, t1);
9115 return true;
9118 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
9120 return op_smmla(s, a, false, false);
9123 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
9125 return op_smmla(s, a, true, false);
9128 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
9130 return op_smmla(s, a, false, true);
9133 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
9135 return op_smmla(s, a, true, true);
9138 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
9140 TCGv_i32 t1, t2;
9142 if (s->thumb
9143 ? !dc_isar_feature(aa32_thumb_div, s)
9144 : !dc_isar_feature(aa32_arm_div, s)) {
9145 return false;
9148 t1 = load_reg(s, a->rn);
9149 t2 = load_reg(s, a->rm);
9150 if (u) {
9151 gen_helper_udiv(t1, t1, t2);
9152 } else {
9153 gen_helper_sdiv(t1, t1, t2);
9155 tcg_temp_free_i32(t2);
9156 store_reg(s, a->rd, t1);
9157 return true;
9160 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
9162 return op_div(s, a, false);
9165 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
9167 return op_div(s, a, true);
9171 * Block data transfer
9174 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
9176 TCGv_i32 addr = load_reg(s, a->rn);
9178 if (a->b) {
9179 if (a->i) {
9180 /* pre increment */
9181 tcg_gen_addi_i32(addr, addr, 4);
9182 } else {
9183 /* pre decrement */
9184 tcg_gen_addi_i32(addr, addr, -(n * 4));
9186 } else if (!a->i && n != 1) {
9187 /* post decrement */
9188 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
9191 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
9193 * If the writeback is incrementing SP rather than
9194 * decrementing it, and the initial SP is below the
9195 * stack limit but the final written-back SP would
9196 * be above, then then we must not perform any memory
9197 * accesses, but it is IMPDEF whether we generate
9198 * an exception. We choose to do so in this case.
9199 * At this point 'addr' is the lowest address, so
9200 * either the original SP (if incrementing) or our
9201 * final SP (if decrementing), so that's what we check.
9203 gen_helper_v8m_stackcheck(cpu_env, addr);
9206 return addr;
9209 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
9210 TCGv_i32 addr, int n)
9212 if (a->w) {
9213 /* write back */
9214 if (!a->b) {
9215 if (a->i) {
9216 /* post increment */
9217 tcg_gen_addi_i32(addr, addr, 4);
9218 } else {
9219 /* post decrement */
9220 tcg_gen_addi_i32(addr, addr, -(n * 4));
9222 } else if (!a->i && n != 1) {
9223 /* pre decrement */
9224 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
9226 store_reg(s, a->rn, addr);
9227 } else {
9228 tcg_temp_free_i32(addr);
9232 static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
9234 int i, j, n, list, mem_idx;
9235 bool user = a->u;
9236 TCGv_i32 addr, tmp, tmp2;
9238 if (user) {
9239 /* STM (user) */
9240 if (IS_USER(s)) {
9241 /* Only usable in supervisor mode. */
9242 unallocated_encoding(s);
9243 return true;
9247 list = a->list;
9248 n = ctpop16(list);
9249 if (n < min_n || a->rn == 15) {
9250 unallocated_encoding(s);
9251 return true;
9254 addr = op_addr_block_pre(s, a, n);
9255 mem_idx = get_mem_index(s);
9257 for (i = j = 0; i < 16; i++) {
9258 if (!(list & (1 << i))) {
9259 continue;
9262 if (user && i != 15) {
9263 tmp = tcg_temp_new_i32();
9264 tmp2 = tcg_const_i32(i);
9265 gen_helper_get_user_reg(tmp, cpu_env, tmp2);
9266 tcg_temp_free_i32(tmp2);
9267 } else {
9268 tmp = load_reg(s, i);
9270 gen_aa32_st32(s, tmp, addr, mem_idx);
9271 tcg_temp_free_i32(tmp);
9273 /* No need to add after the last transfer. */
9274 if (++j != n) {
9275 tcg_gen_addi_i32(addr, addr, 4);
9279 op_addr_block_post(s, a, addr, n);
9280 return true;
9283 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
9285 /* BitCount(list) < 1 is UNPREDICTABLE */
9286 return op_stm(s, a, 1);
9289 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
9291 /* Writeback register in register list is UNPREDICTABLE for T32. */
9292 if (a->w && (a->list & (1 << a->rn))) {
9293 unallocated_encoding(s);
9294 return true;
9296 /* BitCount(list) < 2 is UNPREDICTABLE */
9297 return op_stm(s, a, 2);
9300 static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
9302 int i, j, n, list, mem_idx;
9303 bool loaded_base;
9304 bool user = a->u;
9305 bool exc_return = false;
9306 TCGv_i32 addr, tmp, tmp2, loaded_var;
9308 if (user) {
9309 /* LDM (user), LDM (exception return) */
9310 if (IS_USER(s)) {
9311 /* Only usable in supervisor mode. */
9312 unallocated_encoding(s);
9313 return true;
9315 if (extract32(a->list, 15, 1)) {
9316 exc_return = true;
9317 user = false;
9318 } else {
9319 /* LDM (user) does not allow writeback. */
9320 if (a->w) {
9321 unallocated_encoding(s);
9322 return true;
9327 list = a->list;
9328 n = ctpop16(list);
9329 if (n < min_n || a->rn == 15) {
9330 unallocated_encoding(s);
9331 return true;
9334 addr = op_addr_block_pre(s, a, n);
9335 mem_idx = get_mem_index(s);
9336 loaded_base = false;
9337 loaded_var = NULL;
9339 for (i = j = 0; i < 16; i++) {
9340 if (!(list & (1 << i))) {
9341 continue;
9344 tmp = tcg_temp_new_i32();
9345 gen_aa32_ld32u(s, tmp, addr, mem_idx);
9346 if (user) {
9347 tmp2 = tcg_const_i32(i);
9348 gen_helper_set_user_reg(cpu_env, tmp2, tmp);
9349 tcg_temp_free_i32(tmp2);
9350 tcg_temp_free_i32(tmp);
9351 } else if (i == a->rn) {
9352 loaded_var = tmp;
9353 loaded_base = true;
9354 } else if (i == 15 && exc_return) {
9355 store_pc_exc_ret(s, tmp);
9356 } else {
9357 store_reg_from_load(s, i, tmp);
9360 /* No need to add after the last transfer. */
9361 if (++j != n) {
9362 tcg_gen_addi_i32(addr, addr, 4);
9366 op_addr_block_post(s, a, addr, n);
9368 if (loaded_base) {
9369 /* Note that we reject base == pc above. */
9370 store_reg(s, a->rn, loaded_var);
9373 if (exc_return) {
9374 /* Restore CPSR from SPSR. */
9375 tmp = load_cpu_field(spsr);
9376 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
9377 gen_io_start();
9379 gen_helper_cpsr_write_eret(cpu_env, tmp);
9380 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
9381 gen_io_end();
9383 tcg_temp_free_i32(tmp);
9384 /* Must exit loop to check un-masked IRQs */
9385 s->base.is_jmp = DISAS_EXIT;
9387 return true;
9390 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
9393 * Writeback register in register list is UNPREDICTABLE
9394 * for ArchVersion() >= 7. Prior to v7, A32 would write
9395 * an UNKNOWN value to the base register.
9397 if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
9398 unallocated_encoding(s);
9399 return true;
9401 /* BitCount(list) < 1 is UNPREDICTABLE */
9402 return do_ldm(s, a, 1);
9405 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
9407 /* Writeback register in register list is UNPREDICTABLE for T32. */
9408 if (a->w && (a->list & (1 << a->rn))) {
9409 unallocated_encoding(s);
9410 return true;
9412 /* BitCount(list) < 2 is UNPREDICTABLE */
9413 return do_ldm(s, a, 2);
9416 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
9418 /* Writeback is conditional on the base register not being loaded. */
9419 a->w = !(a->list & (1 << a->rn));
9420 /* BitCount(list) < 1 is UNPREDICTABLE */
9421 return do_ldm(s, a, 1);
9425 * Branch, branch with link
9428 static bool trans_B(DisasContext *s, arg_i *a)
9430 gen_jmp(s, read_pc(s) + a->imm);
9431 return true;
9434 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
9436 /* This has cond from encoding, required to be outside IT block. */
9437 if (a->cond >= 0xe) {
9438 return false;
9440 if (s->condexec_mask) {
9441 unallocated_encoding(s);
9442 return true;
9444 arm_skip_unless(s, a->cond);
9445 gen_jmp(s, read_pc(s) + a->imm);
9446 return true;
9449 static bool trans_BL(DisasContext *s, arg_i *a)
9451 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
9452 gen_jmp(s, read_pc(s) + a->imm);
9453 return true;
9456 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
9458 TCGv_i32 tmp;
9460 /* For A32, ARCH(5) is checked near the start of the uncond block. */
9461 if (s->thumb && (a->imm & 2)) {
9462 return false;
9464 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
9465 tmp = tcg_const_i32(!s->thumb);
9466 store_cpu_field(tmp, thumb);
9467 gen_jmp(s, (read_pc(s) & ~3) + a->imm);
9468 return true;
9471 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
9473 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
9474 tcg_gen_movi_i32(cpu_R[14], read_pc(s) + (a->imm << 12));
9475 return true;
9478 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
9480 TCGv_i32 tmp = tcg_temp_new_i32();
9482 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
9483 tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
9484 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
9485 gen_bx(s, tmp);
9486 return true;
9489 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
9491 TCGv_i32 tmp;
9493 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
9494 if (!ENABLE_ARCH_5) {
9495 return false;
9497 tmp = tcg_temp_new_i32();
9498 tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
9499 tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
9500 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
9501 gen_bx(s, tmp);
9502 return true;
9505 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
9507 TCGv_i32 addr, tmp;
9509 tmp = load_reg(s, a->rm);
9510 if (half) {
9511 tcg_gen_add_i32(tmp, tmp, tmp);
9513 addr = load_reg(s, a->rn);
9514 tcg_gen_add_i32(addr, addr, tmp);
9516 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
9517 half ? MO_UW | s->be_data : MO_UB);
9518 tcg_temp_free_i32(addr);
9520 tcg_gen_add_i32(tmp, tmp, tmp);
9521 tcg_gen_addi_i32(tmp, tmp, read_pc(s));
9522 store_reg(s, 15, tmp);
9523 return true;
9526 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
9528 return op_tbranch(s, a, false);
9531 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
9533 return op_tbranch(s, a, true);
9536 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
9538 TCGv_i32 tmp = load_reg(s, a->rn);
9540 arm_gen_condlabel(s);
9541 tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
9542 tmp, 0, s->condlabel);
9543 tcg_temp_free_i32(tmp);
9544 gen_jmp(s, read_pc(s) + a->imm);
9545 return true;
9549 * Supervisor call - both T32 & A32 come here so we need to check
9550 * which mode we are in when checking for semihosting.
9553 static bool trans_SVC(DisasContext *s, arg_SVC *a)
9555 const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
9557 if (!arm_dc_feature(s, ARM_FEATURE_M) && semihosting_enabled() &&
9558 #ifndef CONFIG_USER_ONLY
9559 !IS_USER(s) &&
9560 #endif
9561 (a->imm == semihost_imm)) {
9562 gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
9563 } else {
9564 gen_set_pc_im(s, s->base.pc_next);
9565 s->svc_imm = a->imm;
9566 s->base.is_jmp = DISAS_SWI;
9568 return true;
9572 * Unconditional system instructions
9575 static bool trans_RFE(DisasContext *s, arg_RFE *a)
9577 static const int8_t pre_offset[4] = {
9578 /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
9580 static const int8_t post_offset[4] = {
9581 /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
9583 TCGv_i32 addr, t1, t2;
9585 if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
9586 return false;
9588 if (IS_USER(s)) {
9589 unallocated_encoding(s);
9590 return true;
9593 addr = load_reg(s, a->rn);
9594 tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
9596 /* Load PC into tmp and CPSR into tmp2. */
9597 t1 = tcg_temp_new_i32();
9598 gen_aa32_ld32u(s, t1, addr, get_mem_index(s));
9599 tcg_gen_addi_i32(addr, addr, 4);
9600 t2 = tcg_temp_new_i32();
9601 gen_aa32_ld32u(s, t2, addr, get_mem_index(s));
9603 if (a->w) {
9604 /* Base writeback. */
9605 tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
9606 store_reg(s, a->rn, addr);
9607 } else {
9608 tcg_temp_free_i32(addr);
9610 gen_rfe(s, t1, t2);
9611 return true;
9614 static bool trans_SRS(DisasContext *s, arg_SRS *a)
9616 if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
9617 return false;
9619 gen_srs(s, a->mode, a->pu, a->w);
9620 return true;
9623 static bool trans_CPS(DisasContext *s, arg_CPS *a)
9625 uint32_t mask, val;
9627 if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
9628 return false;
9630 if (IS_USER(s)) {
9631 /* Implemented as NOP in user mode. */
9632 return true;
9634 /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
9636 mask = val = 0;
9637 if (a->imod & 2) {
9638 if (a->A) {
9639 mask |= CPSR_A;
9641 if (a->I) {
9642 mask |= CPSR_I;
9644 if (a->F) {
9645 mask |= CPSR_F;
9647 if (a->imod & 1) {
9648 val |= mask;
9651 if (a->M) {
9652 mask |= CPSR_M;
9653 val |= a->mode;
9655 if (mask) {
9656 gen_set_psr_im(s, mask, 0, val);
9658 return true;
9661 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
9663 TCGv_i32 tmp, addr, el;
9665 if (!arm_dc_feature(s, ARM_FEATURE_M)) {
9666 return false;
9668 if (IS_USER(s)) {
9669 /* Implemented as NOP in user mode. */
9670 return true;
9673 tmp = tcg_const_i32(a->im);
9674 /* FAULTMASK */
9675 if (a->F) {
9676 addr = tcg_const_i32(19);
9677 gen_helper_v7m_msr(cpu_env, addr, tmp);
9678 tcg_temp_free_i32(addr);
9680 /* PRIMASK */
9681 if (a->I) {
9682 addr = tcg_const_i32(16);
9683 gen_helper_v7m_msr(cpu_env, addr, tmp);
9684 tcg_temp_free_i32(addr);
9686 el = tcg_const_i32(s->current_el);
9687 gen_helper_rebuild_hflags_m32(cpu_env, el);
9688 tcg_temp_free_i32(el);
9689 tcg_temp_free_i32(tmp);
9690 gen_lookup_tb(s);
9691 return true;
9695 * Clear-Exclusive, Barriers
9698 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
9700 if (s->thumb
9701 ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
9702 : !ENABLE_ARCH_6K) {
9703 return false;
9705 gen_clrex(s);
9706 return true;
9709 static bool trans_DSB(DisasContext *s, arg_DSB *a)
9711 if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
9712 return false;
9714 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
9715 return true;
9718 static bool trans_DMB(DisasContext *s, arg_DMB *a)
9720 return trans_DSB(s, NULL);
9723 static bool trans_ISB(DisasContext *s, arg_ISB *a)
9725 if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
9726 return false;
9729 * We need to break the TB after this insn to execute
9730 * self-modifying code correctly and also to take
9731 * any pending interrupts immediately.
9733 gen_goto_tb(s, 0, s->base.pc_next);
9734 return true;
9737 static bool trans_SB(DisasContext *s, arg_SB *a)
9739 if (!dc_isar_feature(aa32_sb, s)) {
9740 return false;
9743 * TODO: There is no speculation barrier opcode
9744 * for TCG; MB and end the TB instead.
9746 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
9747 gen_goto_tb(s, 0, s->base.pc_next);
9748 return true;
9751 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
9753 if (!ENABLE_ARCH_6) {
9754 return false;
9756 if (a->E != (s->be_data == MO_BE)) {
9757 gen_helper_setend(cpu_env);
9758 s->base.is_jmp = DISAS_UPDATE;
9760 return true;
9764 * Preload instructions
9765 * All are nops, contingent on the appropriate arch level.
9768 static bool trans_PLD(DisasContext *s, arg_PLD *a)
9770 return ENABLE_ARCH_5TE;
9773 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
9775 return arm_dc_feature(s, ARM_FEATURE_V7MP);
9778 static bool trans_PLI(DisasContext *s, arg_PLD *a)
9780 return ENABLE_ARCH_7;
9784 * If-then
9787 static bool trans_IT(DisasContext *s, arg_IT *a)
9789 int cond_mask = a->cond_mask;
9792 * No actual code generated for this insn, just setup state.
9794 * Combinations of firstcond and mask which set up an 0b1111
9795 * condition are UNPREDICTABLE; we take the CONSTRAINED
9796 * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
9797 * i.e. both meaning "execute always".
9799 s->condexec_cond = (cond_mask >> 4) & 0xe;
9800 s->condexec_mask = cond_mask & 0x1f;
9801 return true;
9805 * Legacy decoder.
9808 static void disas_arm_insn(DisasContext *s, unsigned int insn)
9810 unsigned int cond = insn >> 28;
9812 /* M variants do not implement ARM mode; this must raise the INVSTATE
9813 * UsageFault exception.
9815 if (arm_dc_feature(s, ARM_FEATURE_M)) {
9816 gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
9817 default_exception_el(s));
9818 return;
9821 if (cond == 0xf) {
9822 /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
9823 * choose to UNDEF. In ARMv5 and above the space is used
9824 * for miscellaneous unconditional instructions.
9826 ARCH(5);
9828 /* Unconditional instructions. */
9829 /* TODO: Perhaps merge these into one decodetree output file. */
9830 if (disas_a32_uncond(s, insn) ||
9831 disas_vfp_uncond(s, insn) ||
9832 disas_neon_dp(s, insn) ||
9833 disas_neon_ls(s, insn) ||
9834 disas_neon_shared(s, insn)) {
9835 return;
9837 /* fall back to legacy decoder */
9839 if (((insn >> 25) & 7) == 1) {
9840 /* NEON Data processing. */
9841 if (disas_neon_data_insn(s, insn)) {
9842 goto illegal_op;
9844 return;
9846 if ((insn & 0x0e000f00) == 0x0c000100) {
9847 if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
9848 /* iWMMXt register transfer. */
9849 if (extract32(s->c15_cpar, 1, 1)) {
9850 if (!disas_iwmmxt_insn(s, insn)) {
9851 return;
9856 goto illegal_op;
9858 if (cond != 0xe) {
9859 /* if not always execute, we generate a conditional jump to
9860 next instruction */
9861 arm_skip_unless(s, cond);
9864 /* TODO: Perhaps merge these into one decodetree output file. */
9865 if (disas_a32(s, insn) ||
9866 disas_vfp(s, insn)) {
9867 return;
9869 /* fall back to legacy decoder */
9871 switch ((insn >> 24) & 0xf) {
9872 case 0xc:
9873 case 0xd:
9874 case 0xe:
9875 if (((insn >> 8) & 0xe) == 10) {
9876 /* VFP, but failed disas_vfp. */
9877 goto illegal_op;
9879 if (disas_coproc_insn(s, insn)) {
9880 /* Coprocessor. */
9881 goto illegal_op;
9883 break;
9884 default:
9885 illegal_op:
9886 unallocated_encoding(s);
9887 break;
9891 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
9894 * Return true if this is a 16 bit instruction. We must be precise
9895 * about this (matching the decode).
9897 if ((insn >> 11) < 0x1d) {
9898 /* Definitely a 16-bit instruction */
9899 return true;
9902 /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
9903 * first half of a 32-bit Thumb insn. Thumb-1 cores might
9904 * end up actually treating this as two 16-bit insns, though,
9905 * if it's half of a bl/blx pair that might span a page boundary.
9907 if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
9908 arm_dc_feature(s, ARM_FEATURE_M)) {
9909 /* Thumb2 cores (including all M profile ones) always treat
9910 * 32-bit insns as 32-bit.
9912 return false;
9915 if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
9916 /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
9917 * is not on the next page; we merge this into a 32-bit
9918 * insn.
9920 return false;
9922 /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
9923 * 0b1111_1xxx_xxxx_xxxx : BL suffix;
9924 * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
9925 * -- handle as single 16 bit insn
9927 return true;
9930 /* Translate a 32-bit thumb instruction. */
9931 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
9934 * ARMv6-M supports a limited subset of Thumb2 instructions.
9935 * Other Thumb1 architectures allow only 32-bit
9936 * combined BL/BLX prefix and suffix.
9938 if (arm_dc_feature(s, ARM_FEATURE_M) &&
9939 !arm_dc_feature(s, ARM_FEATURE_V7)) {
9940 int i;
9941 bool found = false;
9942 static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
9943 0xf3b08040 /* dsb */,
9944 0xf3b08050 /* dmb */,
9945 0xf3b08060 /* isb */,
9946 0xf3e08000 /* mrs */,
9947 0xf000d000 /* bl */};
9948 static const uint32_t armv6m_mask[] = {0xffe0d000,
9949 0xfff0d0f0,
9950 0xfff0d0f0,
9951 0xfff0d0f0,
9952 0xffe0d000,
9953 0xf800d000};
9955 for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
9956 if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
9957 found = true;
9958 break;
9961 if (!found) {
9962 goto illegal_op;
9964 } else if ((insn & 0xf800e800) != 0xf000e800) {
9965 ARCH(6T2);
9968 if ((insn & 0xef000000) == 0xef000000) {
9970 * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9971 * transform into
9972 * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9974 uint32_t a32_insn = (insn & 0xe2ffffff) |
9975 ((insn & (1 << 28)) >> 4) | (1 << 28);
9977 if (disas_neon_dp(s, a32_insn)) {
9978 return;
9982 if ((insn & 0xff100000) == 0xf9000000) {
9984 * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9985 * transform into
9986 * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9988 uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
9990 if (disas_neon_ls(s, a32_insn)) {
9991 return;
9996 * TODO: Perhaps merge these into one decodetree output file.
9997 * Note disas_vfp is written for a32 with cond field in the
9998 * top nibble. The t32 encoding requires 0xe in the top nibble.
10000 if (disas_t32(s, insn) ||
10001 disas_vfp_uncond(s, insn) ||
10002 disas_neon_shared(s, insn) ||
10003 ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
10004 return;
10006 /* fall back to legacy decoder */
10008 switch ((insn >> 25) & 0xf) {
10009 case 0: case 1: case 2: case 3:
10010 /* 16-bit instructions. Should never happen. */
10011 abort();
10012 case 6: case 7: case 14: case 15:
10013 /* Coprocessor. */
10014 if (arm_dc_feature(s, ARM_FEATURE_M)) {
10015 /* 0b111x_11xx_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx */
10016 if (extract32(insn, 24, 2) == 3) {
10017 goto illegal_op; /* op0 = 0b11 : unallocated */
10020 if (((insn >> 8) & 0xe) == 10 &&
10021 dc_isar_feature(aa32_fpsp_v2, s)) {
10022 /* FP, and the CPU supports it */
10023 goto illegal_op;
10024 } else {
10025 /* All other insns: NOCP */
10026 gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
10027 syn_uncategorized(),
10028 default_exception_el(s));
10030 break;
10032 if (((insn >> 24) & 3) == 3) {
10033 /* Translate into the equivalent ARM encoding. */
10034 insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
10035 if (disas_neon_data_insn(s, insn)) {
10036 goto illegal_op;
10038 } else if (((insn >> 8) & 0xe) == 10) {
10039 /* VFP, but failed disas_vfp. */
10040 goto illegal_op;
10041 } else {
10042 if (insn & (1 << 28))
10043 goto illegal_op;
10044 if (disas_coproc_insn(s, insn)) {
10045 goto illegal_op;
10048 break;
10049 case 12:
10050 goto illegal_op;
10051 default:
10052 illegal_op:
10053 unallocated_encoding(s);
10057 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
10059 if (!disas_t16(s, insn)) {
10060 unallocated_encoding(s);
10064 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
10066 /* Return true if the insn at dc->base.pc_next might cross a page boundary.
10067 * (False positives are OK, false negatives are not.)
10068 * We know this is a Thumb insn, and our caller ensures we are
10069 * only called if dc->base.pc_next is less than 4 bytes from the page
10070 * boundary, so we cross the page if the first 16 bits indicate
10071 * that this is a 32 bit insn.
10073 uint16_t insn = arm_lduw_code(env, s->base.pc_next, s->sctlr_b);
10075 return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
10078 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
10080 DisasContext *dc = container_of(dcbase, DisasContext, base);
10081 CPUARMState *env = cs->env_ptr;
10082 ARMCPU *cpu = env_archcpu(env);
10083 uint32_t tb_flags = dc->base.tb->flags;
10084 uint32_t condexec, core_mmu_idx;
10086 dc->isar = &cpu->isar;
10087 dc->condjmp = 0;
10089 dc->aarch64 = 0;
10090 /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
10091 * there is no secure EL1, so we route exceptions to EL3.
10093 dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
10094 !arm_el_is_aa64(env, 3);
10095 dc->thumb = FIELD_EX32(tb_flags, TBFLAG_AM32, THUMB);
10096 dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
10097 condexec = FIELD_EX32(tb_flags, TBFLAG_AM32, CONDEXEC);
10098 dc->condexec_mask = (condexec & 0xf) << 1;
10099 dc->condexec_cond = condexec >> 4;
10101 core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
10102 dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
10103 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
10104 #if !defined(CONFIG_USER_ONLY)
10105 dc->user = (dc->current_el == 0);
10106 #endif
10107 dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
10109 if (arm_feature(env, ARM_FEATURE_M)) {
10110 dc->vfp_enabled = 1;
10111 dc->be_data = MO_TE;
10112 dc->v7m_handler_mode = FIELD_EX32(tb_flags, TBFLAG_M32, HANDLER);
10113 dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
10114 regime_is_secure(env, dc->mmu_idx);
10115 dc->v8m_stackcheck = FIELD_EX32(tb_flags, TBFLAG_M32, STACKCHECK);
10116 dc->v8m_fpccr_s_wrong =
10117 FIELD_EX32(tb_flags, TBFLAG_M32, FPCCR_S_WRONG);
10118 dc->v7m_new_fp_ctxt_needed =
10119 FIELD_EX32(tb_flags, TBFLAG_M32, NEW_FP_CTXT_NEEDED);
10120 dc->v7m_lspact = FIELD_EX32(tb_flags, TBFLAG_M32, LSPACT);
10121 } else {
10122 dc->be_data =
10123 FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
10124 dc->debug_target_el =
10125 FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL);
10126 dc->sctlr_b = FIELD_EX32(tb_flags, TBFLAG_A32, SCTLR_B);
10127 dc->hstr_active = FIELD_EX32(tb_flags, TBFLAG_A32, HSTR_ACTIVE);
10128 dc->ns = FIELD_EX32(tb_flags, TBFLAG_A32, NS);
10129 dc->vfp_enabled = FIELD_EX32(tb_flags, TBFLAG_A32, VFPEN);
10130 if (arm_feature(env, ARM_FEATURE_XSCALE)) {
10131 dc->c15_cpar = FIELD_EX32(tb_flags, TBFLAG_A32, XSCALE_CPAR);
10132 } else {
10133 dc->vec_len = FIELD_EX32(tb_flags, TBFLAG_A32, VECLEN);
10134 dc->vec_stride = FIELD_EX32(tb_flags, TBFLAG_A32, VECSTRIDE);
10137 dc->cp_regs = cpu->cp_regs;
10138 dc->features = env->features;
10140 /* Single step state. The code-generation logic here is:
10141 * SS_ACTIVE == 0:
10142 * generate code with no special handling for single-stepping (except
10143 * that anything that can make us go to SS_ACTIVE == 1 must end the TB;
10144 * this happens anyway because those changes are all system register or
10145 * PSTATE writes).
10146 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
10147 * emit code for one insn
10148 * emit code to clear PSTATE.SS
10149 * emit code to generate software step exception for completed step
10150 * end TB (as usual for having generated an exception)
10151 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
10152 * emit code to generate a software step exception
10153 * end the TB
10155 dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
10156 dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
10157 dc->is_ldex = false;
10159 dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
10161 /* If architectural single step active, limit to 1. */
10162 if (is_singlestepping(dc)) {
10163 dc->base.max_insns = 1;
10166 /* ARM is a fixed-length ISA. Bound the number of insns to execute
10167 to those left on the page. */
10168 if (!dc->thumb) {
10169 int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
10170 dc->base.max_insns = MIN(dc->base.max_insns, bound);
10173 cpu_V0 = tcg_temp_new_i64();
10174 cpu_V1 = tcg_temp_new_i64();
10175 /* FIXME: cpu_M0 can probably be the same as cpu_V0. */
10176 cpu_M0 = tcg_temp_new_i64();
10179 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
10181 DisasContext *dc = container_of(dcbase, DisasContext, base);
10183 /* A note on handling of the condexec (IT) bits:
10185 * We want to avoid the overhead of having to write the updated condexec
10186 * bits back to the CPUARMState for every instruction in an IT block. So:
10187 * (1) if the condexec bits are not already zero then we write
10188 * zero back into the CPUARMState now. This avoids complications trying
10189 * to do it at the end of the block. (For example if we don't do this
10190 * it's hard to identify whether we can safely skip writing condexec
10191 * at the end of the TB, which we definitely want to do for the case
10192 * where a TB doesn't do anything with the IT state at all.)
10193 * (2) if we are going to leave the TB then we call gen_set_condexec()
10194 * which will write the correct value into CPUARMState if zero is wrong.
10195 * This is done both for leaving the TB at the end, and for leaving
10196 * it because of an exception we know will happen, which is done in
10197 * gen_exception_insn(). The latter is necessary because we need to
10198 * leave the TB with the PC/IT state just prior to execution of the
10199 * instruction which caused the exception.
10200 * (3) if we leave the TB unexpectedly (eg a data abort on a load)
10201 * then the CPUARMState will be wrong and we need to reset it.
10202 * This is handled in the same way as restoration of the
10203 * PC in these situations; we save the value of the condexec bits
10204 * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
10205 * then uses this to restore them after an exception.
10207 * Note that there are no instructions which can read the condexec
10208 * bits, and none which can write non-static values to them, so
10209 * we don't need to care about whether CPUARMState is correct in the
10210 * middle of a TB.
10213 /* Reset the conditional execution bits immediately. This avoids
10214 complications trying to do it at the end of the block. */
10215 if (dc->condexec_mask || dc->condexec_cond) {
10216 TCGv_i32 tmp = tcg_temp_new_i32();
10217 tcg_gen_movi_i32(tmp, 0);
10218 store_cpu_field(tmp, condexec_bits);
10222 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
10224 DisasContext *dc = container_of(dcbase, DisasContext, base);
10226 tcg_gen_insn_start(dc->base.pc_next,
10227 (dc->condexec_cond << 4) | (dc->condexec_mask >> 1),
10229 dc->insn_start = tcg_last_op();
10232 static bool arm_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
10233 const CPUBreakpoint *bp)
10235 DisasContext *dc = container_of(dcbase, DisasContext, base);
10237 if (bp->flags & BP_CPU) {
10238 gen_set_condexec(dc);
10239 gen_set_pc_im(dc, dc->base.pc_next);
10240 gen_helper_check_breakpoints(cpu_env);
10241 /* End the TB early; it's likely not going to be executed */
10242 dc->base.is_jmp = DISAS_TOO_MANY;
10243 } else {
10244 gen_exception_internal_insn(dc, dc->base.pc_next, EXCP_DEBUG);
10245 /* The address covered by the breakpoint must be
10246 included in [tb->pc, tb->pc + tb->size) in order
10247 to for it to be properly cleared -- thus we
10248 increment the PC here so that the logic setting
10249 tb->size below does the right thing. */
10250 /* TODO: Advance PC by correct instruction length to
10251 * avoid disassembler error messages */
10252 dc->base.pc_next += 2;
10253 dc->base.is_jmp = DISAS_NORETURN;
10256 return true;
10259 static bool arm_pre_translate_insn(DisasContext *dc)
10261 #ifdef CONFIG_USER_ONLY
10262 /* Intercept jump to the magic kernel page. */
10263 if (dc->base.pc_next >= 0xffff0000) {
10264 /* We always get here via a jump, so know we are not in a
10265 conditional execution block. */
10266 gen_exception_internal(EXCP_KERNEL_TRAP);
10267 dc->base.is_jmp = DISAS_NORETURN;
10268 return true;
10270 #endif
10272 if (dc->ss_active && !dc->pstate_ss) {
10273 /* Singlestep state is Active-pending.
10274 * If we're in this state at the start of a TB then either
10275 * a) we just took an exception to an EL which is being debugged
10276 * and this is the first insn in the exception handler
10277 * b) debug exceptions were masked and we just unmasked them
10278 * without changing EL (eg by clearing PSTATE.D)
10279 * In either case we're going to take a swstep exception in the
10280 * "did not step an insn" case, and so the syndrome ISV and EX
10281 * bits should be zero.
10283 assert(dc->base.num_insns == 1);
10284 gen_swstep_exception(dc, 0, 0);
10285 dc->base.is_jmp = DISAS_NORETURN;
10286 return true;
10289 return false;
10292 static void arm_post_translate_insn(DisasContext *dc)
10294 if (dc->condjmp && !dc->base.is_jmp) {
10295 gen_set_label(dc->condlabel);
10296 dc->condjmp = 0;
10298 translator_loop_temp_check(&dc->base);
10301 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
10303 DisasContext *dc = container_of(dcbase, DisasContext, base);
10304 CPUARMState *env = cpu->env_ptr;
10305 unsigned int insn;
10307 if (arm_pre_translate_insn(dc)) {
10308 return;
10311 dc->pc_curr = dc->base.pc_next;
10312 insn = arm_ldl_code(env, dc->base.pc_next, dc->sctlr_b);
10313 dc->insn = insn;
10314 dc->base.pc_next += 4;
10315 disas_arm_insn(dc, insn);
10317 arm_post_translate_insn(dc);
10319 /* ARM is a fixed-length ISA. We performed the cross-page check
10320 in init_disas_context by adjusting max_insns. */
10323 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
10325 /* Return true if this Thumb insn is always unconditional,
10326 * even inside an IT block. This is true of only a very few
10327 * instructions: BKPT, HLT, and SG.
10329 * A larger class of instructions are UNPREDICTABLE if used
10330 * inside an IT block; we do not need to detect those here, because
10331 * what we do by default (perform the cc check and update the IT
10332 * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
10333 * choice for those situations.
10335 * insn is either a 16-bit or a 32-bit instruction; the two are
10336 * distinguishable because for the 16-bit case the top 16 bits
10337 * are zeroes, and that isn't a valid 32-bit encoding.
10339 if ((insn & 0xffffff00) == 0xbe00) {
10340 /* BKPT */
10341 return true;
10344 if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
10345 !arm_dc_feature(s, ARM_FEATURE_M)) {
10346 /* HLT: v8A only. This is unconditional even when it is going to
10347 * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
10348 * For v7 cores this was a plain old undefined encoding and so
10349 * honours its cc check. (We might be using the encoding as
10350 * a semihosting trap, but we don't change the cc check behaviour
10351 * on that account, because a debugger connected to a real v7A
10352 * core and emulating semihosting traps by catching the UNDEF
10353 * exception would also only see cases where the cc check passed.
10354 * No guest code should be trying to do a HLT semihosting trap
10355 * in an IT block anyway.
10357 return true;
10360 if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
10361 arm_dc_feature(s, ARM_FEATURE_M)) {
10362 /* SG: v8M only */
10363 return true;
10366 return false;
10369 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
10371 DisasContext *dc = container_of(dcbase, DisasContext, base);
10372 CPUARMState *env = cpu->env_ptr;
10373 uint32_t insn;
10374 bool is_16bit;
10376 if (arm_pre_translate_insn(dc)) {
10377 return;
10380 dc->pc_curr = dc->base.pc_next;
10381 insn = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
10382 is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
10383 dc->base.pc_next += 2;
10384 if (!is_16bit) {
10385 uint32_t insn2 = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
10387 insn = insn << 16 | insn2;
10388 dc->base.pc_next += 2;
10390 dc->insn = insn;
10392 if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
10393 uint32_t cond = dc->condexec_cond;
10396 * Conditionally skip the insn. Note that both 0xe and 0xf mean
10397 * "always"; 0xf is not "never".
10399 if (cond < 0x0e) {
10400 arm_skip_unless(dc, cond);
10404 if (is_16bit) {
10405 disas_thumb_insn(dc, insn);
10406 } else {
10407 disas_thumb2_insn(dc, insn);
10410 /* Advance the Thumb condexec condition. */
10411 if (dc->condexec_mask) {
10412 dc->condexec_cond = ((dc->condexec_cond & 0xe) |
10413 ((dc->condexec_mask >> 4) & 1));
10414 dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
10415 if (dc->condexec_mask == 0) {
10416 dc->condexec_cond = 0;
10420 arm_post_translate_insn(dc);
10422 /* Thumb is a variable-length ISA. Stop translation when the next insn
10423 * will touch a new page. This ensures that prefetch aborts occur at
10424 * the right place.
10426 * We want to stop the TB if the next insn starts in a new page,
10427 * or if it spans between this page and the next. This means that
10428 * if we're looking at the last halfword in the page we need to
10429 * see if it's a 16-bit Thumb insn (which will fit in this TB)
10430 * or a 32-bit Thumb insn (which won't).
10431 * This is to avoid generating a silly TB with a single 16-bit insn
10432 * in it at the end of this page (which would execute correctly
10433 * but isn't very efficient).
10435 if (dc->base.is_jmp == DISAS_NEXT
10436 && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
10437 || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
10438 && insn_crosses_page(env, dc)))) {
10439 dc->base.is_jmp = DISAS_TOO_MANY;
10443 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
10445 DisasContext *dc = container_of(dcbase, DisasContext, base);
10447 if (tb_cflags(dc->base.tb) & CF_LAST_IO && dc->condjmp) {
10448 /* FIXME: This can theoretically happen with self-modifying code. */
10449 cpu_abort(cpu, "IO on conditional branch instruction");
10452 /* At this stage dc->condjmp will only be set when the skipped
10453 instruction was a conditional branch or trap, and the PC has
10454 already been written. */
10455 gen_set_condexec(dc);
10456 if (dc->base.is_jmp == DISAS_BX_EXCRET) {
10457 /* Exception return branches need some special case code at the
10458 * end of the TB, which is complex enough that it has to
10459 * handle the single-step vs not and the condition-failed
10460 * insn codepath itself.
10462 gen_bx_excret_final_code(dc);
10463 } else if (unlikely(is_singlestepping(dc))) {
10464 /* Unconditional and "condition passed" instruction codepath. */
10465 switch (dc->base.is_jmp) {
10466 case DISAS_SWI:
10467 gen_ss_advance(dc);
10468 gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
10469 default_exception_el(dc));
10470 break;
10471 case DISAS_HVC:
10472 gen_ss_advance(dc);
10473 gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
10474 break;
10475 case DISAS_SMC:
10476 gen_ss_advance(dc);
10477 gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
10478 break;
10479 case DISAS_NEXT:
10480 case DISAS_TOO_MANY:
10481 case DISAS_UPDATE:
10482 gen_set_pc_im(dc, dc->base.pc_next);
10483 /* fall through */
10484 default:
10485 /* FIXME: Single stepping a WFI insn will not halt the CPU. */
10486 gen_singlestep_exception(dc);
10487 break;
10488 case DISAS_NORETURN:
10489 break;
10491 } else {
10492 /* While branches must always occur at the end of an IT block,
10493 there are a few other things that can cause us to terminate
10494 the TB in the middle of an IT block:
10495 - Exception generating instructions (bkpt, swi, undefined).
10496 - Page boundaries.
10497 - Hardware watchpoints.
10498 Hardware breakpoints have already been handled and skip this code.
10500 switch(dc->base.is_jmp) {
10501 case DISAS_NEXT:
10502 case DISAS_TOO_MANY:
10503 gen_goto_tb(dc, 1, dc->base.pc_next);
10504 break;
10505 case DISAS_JUMP:
10506 gen_goto_ptr();
10507 break;
10508 case DISAS_UPDATE:
10509 gen_set_pc_im(dc, dc->base.pc_next);
10510 /* fall through */
10511 default:
10512 /* indicate that the hash table must be used to find the next TB */
10513 tcg_gen_exit_tb(NULL, 0);
10514 break;
10515 case DISAS_NORETURN:
10516 /* nothing more to generate */
10517 break;
10518 case DISAS_WFI:
10520 TCGv_i32 tmp = tcg_const_i32((dc->thumb &&
10521 !(dc->insn & (1U << 31))) ? 2 : 4);
10523 gen_helper_wfi(cpu_env, tmp);
10524 tcg_temp_free_i32(tmp);
10525 /* The helper doesn't necessarily throw an exception, but we
10526 * must go back to the main loop to check for interrupts anyway.
10528 tcg_gen_exit_tb(NULL, 0);
10529 break;
10531 case DISAS_WFE:
10532 gen_helper_wfe(cpu_env);
10533 break;
10534 case DISAS_YIELD:
10535 gen_helper_yield(cpu_env);
10536 break;
10537 case DISAS_SWI:
10538 gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
10539 default_exception_el(dc));
10540 break;
10541 case DISAS_HVC:
10542 gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
10543 break;
10544 case DISAS_SMC:
10545 gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
10546 break;
10550 if (dc->condjmp) {
10551 /* "Condition failed" instruction codepath for the branch/trap insn */
10552 gen_set_label(dc->condlabel);
10553 gen_set_condexec(dc);
10554 if (unlikely(is_singlestepping(dc))) {
10555 gen_set_pc_im(dc, dc->base.pc_next);
10556 gen_singlestep_exception(dc);
10557 } else {
10558 gen_goto_tb(dc, 1, dc->base.pc_next);
10563 static void arm_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
10565 DisasContext *dc = container_of(dcbase, DisasContext, base);
10567 qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
10568 log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
10571 static const TranslatorOps arm_translator_ops = {
10572 .init_disas_context = arm_tr_init_disas_context,
10573 .tb_start = arm_tr_tb_start,
10574 .insn_start = arm_tr_insn_start,
10575 .breakpoint_check = arm_tr_breakpoint_check,
10576 .translate_insn = arm_tr_translate_insn,
10577 .tb_stop = arm_tr_tb_stop,
10578 .disas_log = arm_tr_disas_log,
10581 static const TranslatorOps thumb_translator_ops = {
10582 .init_disas_context = arm_tr_init_disas_context,
10583 .tb_start = arm_tr_tb_start,
10584 .insn_start = arm_tr_insn_start,
10585 .breakpoint_check = arm_tr_breakpoint_check,
10586 .translate_insn = thumb_tr_translate_insn,
10587 .tb_stop = arm_tr_tb_stop,
10588 .disas_log = arm_tr_disas_log,
10591 /* generate intermediate code for basic block 'tb'. */
10592 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
10594 DisasContext dc = { };
10595 const TranslatorOps *ops = &arm_translator_ops;
10597 if (FIELD_EX32(tb->flags, TBFLAG_AM32, THUMB)) {
10598 ops = &thumb_translator_ops;
10600 #ifdef TARGET_AARCH64
10601 if (FIELD_EX32(tb->flags, TBFLAG_ANY, AARCH64_STATE)) {
10602 ops = &aarch64_translator_ops;
10604 #endif
10606 translator_loop(ops, &dc.base, cpu, tb, max_insns);
10609 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
10610 target_ulong *data)
10612 if (is_a64(env)) {
10613 env->pc = data[0];
10614 env->condexec_bits = 0;
10615 env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
10616 } else {
10617 env->regs[15] = data[0];
10618 env->condexec_bits = data[1];
10619 env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;