target/arm: Remove unused GEN_NEON_INTEGER_OP macro
[qemu/ar7.git] / target / arm / translate.c
blobc8296116d4b3ea150d75118bac2c8e0e0bbe0b81
1 /*
2 * ARM translation
4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21 #include "qemu/osdep.h"
23 #include "cpu.h"
24 #include "internals.h"
25 #include "disas/disas.h"
26 #include "exec/exec-all.h"
27 #include "tcg/tcg-op.h"
28 #include "tcg/tcg-op-gvec.h"
29 #include "qemu/log.h"
30 #include "qemu/bitops.h"
31 #include "arm_ldst.h"
32 #include "hw/semihosting/semihost.h"
34 #include "exec/helper-proto.h"
35 #include "exec/helper-gen.h"
37 #include "trace-tcg.h"
38 #include "exec/log.h"
41 #define ENABLE_ARCH_4T arm_dc_feature(s, ARM_FEATURE_V4T)
42 #define ENABLE_ARCH_5 arm_dc_feature(s, ARM_FEATURE_V5)
43 /* currently all emulated v5 cores are also v5TE, so don't bother */
44 #define ENABLE_ARCH_5TE arm_dc_feature(s, ARM_FEATURE_V5)
45 #define ENABLE_ARCH_5J dc_isar_feature(aa32_jazelle, s)
46 #define ENABLE_ARCH_6 arm_dc_feature(s, ARM_FEATURE_V6)
47 #define ENABLE_ARCH_6K arm_dc_feature(s, ARM_FEATURE_V6K)
48 #define ENABLE_ARCH_6T2 arm_dc_feature(s, ARM_FEATURE_THUMB2)
49 #define ENABLE_ARCH_7 arm_dc_feature(s, ARM_FEATURE_V7)
50 #define ENABLE_ARCH_8 arm_dc_feature(s, ARM_FEATURE_V8)
52 #define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
54 #include "translate.h"
56 #if defined(CONFIG_USER_ONLY)
57 #define IS_USER(s) 1
58 #else
59 #define IS_USER(s) (s->user)
60 #endif
62 /* We reuse the same 64-bit temporaries for efficiency. */
63 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
64 static TCGv_i32 cpu_R[16];
65 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
66 TCGv_i64 cpu_exclusive_addr;
67 TCGv_i64 cpu_exclusive_val;
69 #include "exec/gen-icount.h"
71 static const char * const regnames[] =
72 { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
73 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
75 /* Function prototypes for gen_ functions calling Neon helpers. */
76 typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
77 TCGv_i32, TCGv_i32);
78 /* Function prototypes for gen_ functions for fix point conversions */
79 typedef void VFPGenFixPointFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
81 /* initialize TCG globals. */
82 void arm_translate_init(void)
84 int i;
86 for (i = 0; i < 16; i++) {
87 cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
88 offsetof(CPUARMState, regs[i]),
89 regnames[i]);
91 cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
92 cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
93 cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
94 cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
96 cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
97 offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
98 cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
99 offsetof(CPUARMState, exclusive_val), "exclusive_val");
101 a64_translate_init();
104 /* Flags for the disas_set_da_iss info argument:
105 * lower bits hold the Rt register number, higher bits are flags.
107 typedef enum ISSInfo {
108 ISSNone = 0,
109 ISSRegMask = 0x1f,
110 ISSInvalid = (1 << 5),
111 ISSIsAcqRel = (1 << 6),
112 ISSIsWrite = (1 << 7),
113 ISSIs16Bit = (1 << 8),
114 } ISSInfo;
116 /* Save the syndrome information for a Data Abort */
117 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
119 uint32_t syn;
120 int sas = memop & MO_SIZE;
121 bool sse = memop & MO_SIGN;
122 bool is_acqrel = issinfo & ISSIsAcqRel;
123 bool is_write = issinfo & ISSIsWrite;
124 bool is_16bit = issinfo & ISSIs16Bit;
125 int srt = issinfo & ISSRegMask;
127 if (issinfo & ISSInvalid) {
128 /* Some callsites want to conditionally provide ISS info,
129 * eg "only if this was not a writeback"
131 return;
134 if (srt == 15) {
135 /* For AArch32, insns where the src/dest is R15 never generate
136 * ISS information. Catching that here saves checking at all
137 * the call sites.
139 return;
142 syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
143 0, 0, 0, is_write, 0, is_16bit);
144 disas_set_insn_syndrome(s, syn);
147 static inline int get_a32_user_mem_index(DisasContext *s)
149 /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
150 * insns:
151 * if PL2, UNPREDICTABLE (we choose to implement as if PL0)
152 * otherwise, access as if at PL0.
154 switch (s->mmu_idx) {
155 case ARMMMUIdx_E2: /* this one is UNPREDICTABLE */
156 case ARMMMUIdx_E10_0:
157 case ARMMMUIdx_E10_1:
158 case ARMMMUIdx_E10_1_PAN:
159 return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
160 case ARMMMUIdx_SE3:
161 case ARMMMUIdx_SE10_0:
162 case ARMMMUIdx_SE10_1:
163 case ARMMMUIdx_SE10_1_PAN:
164 return arm_to_core_mmu_idx(ARMMMUIdx_SE10_0);
165 case ARMMMUIdx_MUser:
166 case ARMMMUIdx_MPriv:
167 return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
168 case ARMMMUIdx_MUserNegPri:
169 case ARMMMUIdx_MPrivNegPri:
170 return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
171 case ARMMMUIdx_MSUser:
172 case ARMMMUIdx_MSPriv:
173 return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
174 case ARMMMUIdx_MSUserNegPri:
175 case ARMMMUIdx_MSPrivNegPri:
176 return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
177 default:
178 g_assert_not_reached();
182 static inline TCGv_i32 load_cpu_offset(int offset)
184 TCGv_i32 tmp = tcg_temp_new_i32();
185 tcg_gen_ld_i32(tmp, cpu_env, offset);
186 return tmp;
189 #define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
191 static inline void store_cpu_offset(TCGv_i32 var, int offset)
193 tcg_gen_st_i32(var, cpu_env, offset);
194 tcg_temp_free_i32(var);
197 #define store_cpu_field(var, name) \
198 store_cpu_offset(var, offsetof(CPUARMState, name))
200 /* The architectural value of PC. */
201 static uint32_t read_pc(DisasContext *s)
203 return s->pc_curr + (s->thumb ? 4 : 8);
206 /* Set a variable to the value of a CPU register. */
207 static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
209 if (reg == 15) {
210 tcg_gen_movi_i32(var, read_pc(s));
211 } else {
212 tcg_gen_mov_i32(var, cpu_R[reg]);
216 /* Create a new temporary and set it to the value of a CPU register. */
217 static inline TCGv_i32 load_reg(DisasContext *s, int reg)
219 TCGv_i32 tmp = tcg_temp_new_i32();
220 load_reg_var(s, tmp, reg);
221 return tmp;
225 * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
226 * This is used for load/store for which use of PC implies (literal),
227 * or ADD that implies ADR.
229 static TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
231 TCGv_i32 tmp = tcg_temp_new_i32();
233 if (reg == 15) {
234 tcg_gen_movi_i32(tmp, (read_pc(s) & ~3) + ofs);
235 } else {
236 tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
238 return tmp;
241 /* Set a CPU register. The source must be a temporary and will be
242 marked as dead. */
243 static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
245 if (reg == 15) {
246 /* In Thumb mode, we must ignore bit 0.
247 * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
248 * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
249 * We choose to ignore [1:0] in ARM mode for all architecture versions.
251 tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
252 s->base.is_jmp = DISAS_JUMP;
254 tcg_gen_mov_i32(cpu_R[reg], var);
255 tcg_temp_free_i32(var);
259 * Variant of store_reg which applies v8M stack-limit checks before updating
260 * SP. If the check fails this will result in an exception being taken.
261 * We disable the stack checks for CONFIG_USER_ONLY because we have
262 * no idea what the stack limits should be in that case.
263 * If stack checking is not being done this just acts like store_reg().
265 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
267 #ifndef CONFIG_USER_ONLY
268 if (s->v8m_stackcheck) {
269 gen_helper_v8m_stackcheck(cpu_env, var);
271 #endif
272 store_reg(s, 13, var);
275 /* Value extensions. */
276 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
277 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
278 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
279 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
281 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
282 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
285 static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
287 TCGv_i32 tmp_mask = tcg_const_i32(mask);
288 gen_helper_cpsr_write(cpu_env, var, tmp_mask);
289 tcg_temp_free_i32(tmp_mask);
291 /* Set NZCV flags from the high 4 bits of var. */
292 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
294 static void gen_exception_internal(int excp)
296 TCGv_i32 tcg_excp = tcg_const_i32(excp);
298 assert(excp_is_internal(excp));
299 gen_helper_exception_internal(cpu_env, tcg_excp);
300 tcg_temp_free_i32(tcg_excp);
303 static void gen_step_complete_exception(DisasContext *s)
305 /* We just completed step of an insn. Move from Active-not-pending
306 * to Active-pending, and then also take the swstep exception.
307 * This corresponds to making the (IMPDEF) choice to prioritize
308 * swstep exceptions over asynchronous exceptions taken to an exception
309 * level where debug is disabled. This choice has the advantage that
310 * we do not need to maintain internal state corresponding to the
311 * ISV/EX syndrome bits between completion of the step and generation
312 * of the exception, and our syndrome information is always correct.
314 gen_ss_advance(s);
315 gen_swstep_exception(s, 1, s->is_ldex);
316 s->base.is_jmp = DISAS_NORETURN;
319 static void gen_singlestep_exception(DisasContext *s)
321 /* Generate the right kind of exception for singlestep, which is
322 * either the architectural singlestep or EXCP_DEBUG for QEMU's
323 * gdb singlestepping.
325 if (s->ss_active) {
326 gen_step_complete_exception(s);
327 } else {
328 gen_exception_internal(EXCP_DEBUG);
332 static inline bool is_singlestepping(DisasContext *s)
334 /* Return true if we are singlestepping either because of
335 * architectural singlestep or QEMU gdbstub singlestep. This does
336 * not include the command line '-singlestep' mode which is rather
337 * misnamed as it only means "one instruction per TB" and doesn't
338 * affect the code we generate.
340 return s->base.singlestep_enabled || s->ss_active;
343 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
345 TCGv_i32 tmp1 = tcg_temp_new_i32();
346 TCGv_i32 tmp2 = tcg_temp_new_i32();
347 tcg_gen_ext16s_i32(tmp1, a);
348 tcg_gen_ext16s_i32(tmp2, b);
349 tcg_gen_mul_i32(tmp1, tmp1, tmp2);
350 tcg_temp_free_i32(tmp2);
351 tcg_gen_sari_i32(a, a, 16);
352 tcg_gen_sari_i32(b, b, 16);
353 tcg_gen_mul_i32(b, b, a);
354 tcg_gen_mov_i32(a, tmp1);
355 tcg_temp_free_i32(tmp1);
358 /* Byteswap each halfword. */
359 static void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
361 TCGv_i32 tmp = tcg_temp_new_i32();
362 TCGv_i32 mask = tcg_const_i32(0x00ff00ff);
363 tcg_gen_shri_i32(tmp, var, 8);
364 tcg_gen_and_i32(tmp, tmp, mask);
365 tcg_gen_and_i32(var, var, mask);
366 tcg_gen_shli_i32(var, var, 8);
367 tcg_gen_or_i32(dest, var, tmp);
368 tcg_temp_free_i32(mask);
369 tcg_temp_free_i32(tmp);
372 /* Byteswap low halfword and sign extend. */
373 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
375 tcg_gen_ext16u_i32(var, var);
376 tcg_gen_bswap16_i32(var, var);
377 tcg_gen_ext16s_i32(dest, var);
380 /* 32x32->64 multiply. Marks inputs as dead. */
381 static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
383 TCGv_i32 lo = tcg_temp_new_i32();
384 TCGv_i32 hi = tcg_temp_new_i32();
385 TCGv_i64 ret;
387 tcg_gen_mulu2_i32(lo, hi, a, b);
388 tcg_temp_free_i32(a);
389 tcg_temp_free_i32(b);
391 ret = tcg_temp_new_i64();
392 tcg_gen_concat_i32_i64(ret, lo, hi);
393 tcg_temp_free_i32(lo);
394 tcg_temp_free_i32(hi);
396 return ret;
399 static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
401 TCGv_i32 lo = tcg_temp_new_i32();
402 TCGv_i32 hi = tcg_temp_new_i32();
403 TCGv_i64 ret;
405 tcg_gen_muls2_i32(lo, hi, a, b);
406 tcg_temp_free_i32(a);
407 tcg_temp_free_i32(b);
409 ret = tcg_temp_new_i64();
410 tcg_gen_concat_i32_i64(ret, lo, hi);
411 tcg_temp_free_i32(lo);
412 tcg_temp_free_i32(hi);
414 return ret;
417 /* Swap low and high halfwords. */
418 static void gen_swap_half(TCGv_i32 var)
420 tcg_gen_rotri_i32(var, var, 16);
423 /* Dual 16-bit add. Result placed in t0 and t1 is marked as dead.
424 tmp = (t0 ^ t1) & 0x8000;
425 t0 &= ~0x8000;
426 t1 &= ~0x8000;
427 t0 = (t0 + t1) ^ tmp;
430 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
432 TCGv_i32 tmp = tcg_temp_new_i32();
433 tcg_gen_xor_i32(tmp, t0, t1);
434 tcg_gen_andi_i32(tmp, tmp, 0x8000);
435 tcg_gen_andi_i32(t0, t0, ~0x8000);
436 tcg_gen_andi_i32(t1, t1, ~0x8000);
437 tcg_gen_add_i32(t0, t0, t1);
438 tcg_gen_xor_i32(dest, t0, tmp);
439 tcg_temp_free_i32(tmp);
442 /* Set N and Z flags from var. */
443 static inline void gen_logic_CC(TCGv_i32 var)
445 tcg_gen_mov_i32(cpu_NF, var);
446 tcg_gen_mov_i32(cpu_ZF, var);
449 /* dest = T0 + T1 + CF. */
450 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
452 tcg_gen_add_i32(dest, t0, t1);
453 tcg_gen_add_i32(dest, dest, cpu_CF);
456 /* dest = T0 - T1 + CF - 1. */
457 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
459 tcg_gen_sub_i32(dest, t0, t1);
460 tcg_gen_add_i32(dest, dest, cpu_CF);
461 tcg_gen_subi_i32(dest, dest, 1);
464 /* dest = T0 + T1. Compute C, N, V and Z flags */
465 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
467 TCGv_i32 tmp = tcg_temp_new_i32();
468 tcg_gen_movi_i32(tmp, 0);
469 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
470 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
471 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
472 tcg_gen_xor_i32(tmp, t0, t1);
473 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
474 tcg_temp_free_i32(tmp);
475 tcg_gen_mov_i32(dest, cpu_NF);
478 /* dest = T0 + T1 + CF. Compute C, N, V and Z flags */
479 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
481 TCGv_i32 tmp = tcg_temp_new_i32();
482 if (TCG_TARGET_HAS_add2_i32) {
483 tcg_gen_movi_i32(tmp, 0);
484 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
485 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
486 } else {
487 TCGv_i64 q0 = tcg_temp_new_i64();
488 TCGv_i64 q1 = tcg_temp_new_i64();
489 tcg_gen_extu_i32_i64(q0, t0);
490 tcg_gen_extu_i32_i64(q1, t1);
491 tcg_gen_add_i64(q0, q0, q1);
492 tcg_gen_extu_i32_i64(q1, cpu_CF);
493 tcg_gen_add_i64(q0, q0, q1);
494 tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
495 tcg_temp_free_i64(q0);
496 tcg_temp_free_i64(q1);
498 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
499 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
500 tcg_gen_xor_i32(tmp, t0, t1);
501 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
502 tcg_temp_free_i32(tmp);
503 tcg_gen_mov_i32(dest, cpu_NF);
506 /* dest = T0 - T1. Compute C, N, V and Z flags */
507 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
509 TCGv_i32 tmp;
510 tcg_gen_sub_i32(cpu_NF, t0, t1);
511 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
512 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
513 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
514 tmp = tcg_temp_new_i32();
515 tcg_gen_xor_i32(tmp, t0, t1);
516 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
517 tcg_temp_free_i32(tmp);
518 tcg_gen_mov_i32(dest, cpu_NF);
521 /* dest = T0 + ~T1 + CF. Compute C, N, V and Z flags */
522 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
524 TCGv_i32 tmp = tcg_temp_new_i32();
525 tcg_gen_not_i32(tmp, t1);
526 gen_adc_CC(dest, t0, tmp);
527 tcg_temp_free_i32(tmp);
530 #define GEN_SHIFT(name) \
531 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) \
533 TCGv_i32 tmp1, tmp2, tmp3; \
534 tmp1 = tcg_temp_new_i32(); \
535 tcg_gen_andi_i32(tmp1, t1, 0xff); \
536 tmp2 = tcg_const_i32(0); \
537 tmp3 = tcg_const_i32(0x1f); \
538 tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0); \
539 tcg_temp_free_i32(tmp3); \
540 tcg_gen_andi_i32(tmp1, tmp1, 0x1f); \
541 tcg_gen_##name##_i32(dest, tmp2, tmp1); \
542 tcg_temp_free_i32(tmp2); \
543 tcg_temp_free_i32(tmp1); \
545 GEN_SHIFT(shl)
546 GEN_SHIFT(shr)
547 #undef GEN_SHIFT
549 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
551 TCGv_i32 tmp1, tmp2;
552 tmp1 = tcg_temp_new_i32();
553 tcg_gen_andi_i32(tmp1, t1, 0xff);
554 tmp2 = tcg_const_i32(0x1f);
555 tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
556 tcg_temp_free_i32(tmp2);
557 tcg_gen_sar_i32(dest, t0, tmp1);
558 tcg_temp_free_i32(tmp1);
561 static void shifter_out_im(TCGv_i32 var, int shift)
563 tcg_gen_extract_i32(cpu_CF, var, shift, 1);
566 /* Shift by immediate. Includes special handling for shift == 0. */
567 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
568 int shift, int flags)
570 switch (shiftop) {
571 case 0: /* LSL */
572 if (shift != 0) {
573 if (flags)
574 shifter_out_im(var, 32 - shift);
575 tcg_gen_shli_i32(var, var, shift);
577 break;
578 case 1: /* LSR */
579 if (shift == 0) {
580 if (flags) {
581 tcg_gen_shri_i32(cpu_CF, var, 31);
583 tcg_gen_movi_i32(var, 0);
584 } else {
585 if (flags)
586 shifter_out_im(var, shift - 1);
587 tcg_gen_shri_i32(var, var, shift);
589 break;
590 case 2: /* ASR */
591 if (shift == 0)
592 shift = 32;
593 if (flags)
594 shifter_out_im(var, shift - 1);
595 if (shift == 32)
596 shift = 31;
597 tcg_gen_sari_i32(var, var, shift);
598 break;
599 case 3: /* ROR/RRX */
600 if (shift != 0) {
601 if (flags)
602 shifter_out_im(var, shift - 1);
603 tcg_gen_rotri_i32(var, var, shift); break;
604 } else {
605 TCGv_i32 tmp = tcg_temp_new_i32();
606 tcg_gen_shli_i32(tmp, cpu_CF, 31);
607 if (flags)
608 shifter_out_im(var, 0);
609 tcg_gen_shri_i32(var, var, 1);
610 tcg_gen_or_i32(var, var, tmp);
611 tcg_temp_free_i32(tmp);
616 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
617 TCGv_i32 shift, int flags)
619 if (flags) {
620 switch (shiftop) {
621 case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
622 case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
623 case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
624 case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
626 } else {
627 switch (shiftop) {
628 case 0:
629 gen_shl(var, var, shift);
630 break;
631 case 1:
632 gen_shr(var, var, shift);
633 break;
634 case 2:
635 gen_sar(var, var, shift);
636 break;
637 case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
638 tcg_gen_rotr_i32(var, var, shift); break;
641 tcg_temp_free_i32(shift);
645 * Generate a conditional based on ARM condition code cc.
646 * This is common between ARM and Aarch64 targets.
648 void arm_test_cc(DisasCompare *cmp, int cc)
650 TCGv_i32 value;
651 TCGCond cond;
652 bool global = true;
654 switch (cc) {
655 case 0: /* eq: Z */
656 case 1: /* ne: !Z */
657 cond = TCG_COND_EQ;
658 value = cpu_ZF;
659 break;
661 case 2: /* cs: C */
662 case 3: /* cc: !C */
663 cond = TCG_COND_NE;
664 value = cpu_CF;
665 break;
667 case 4: /* mi: N */
668 case 5: /* pl: !N */
669 cond = TCG_COND_LT;
670 value = cpu_NF;
671 break;
673 case 6: /* vs: V */
674 case 7: /* vc: !V */
675 cond = TCG_COND_LT;
676 value = cpu_VF;
677 break;
679 case 8: /* hi: C && !Z */
680 case 9: /* ls: !C || Z -> !(C && !Z) */
681 cond = TCG_COND_NE;
682 value = tcg_temp_new_i32();
683 global = false;
684 /* CF is 1 for C, so -CF is an all-bits-set mask for C;
685 ZF is non-zero for !Z; so AND the two subexpressions. */
686 tcg_gen_neg_i32(value, cpu_CF);
687 tcg_gen_and_i32(value, value, cpu_ZF);
688 break;
690 case 10: /* ge: N == V -> N ^ V == 0 */
691 case 11: /* lt: N != V -> N ^ V != 0 */
692 /* Since we're only interested in the sign bit, == 0 is >= 0. */
693 cond = TCG_COND_GE;
694 value = tcg_temp_new_i32();
695 global = false;
696 tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
697 break;
699 case 12: /* gt: !Z && N == V */
700 case 13: /* le: Z || N != V */
701 cond = TCG_COND_NE;
702 value = tcg_temp_new_i32();
703 global = false;
704 /* (N == V) is equal to the sign bit of ~(NF ^ VF). Propagate
705 * the sign bit then AND with ZF to yield the result. */
706 tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
707 tcg_gen_sari_i32(value, value, 31);
708 tcg_gen_andc_i32(value, cpu_ZF, value);
709 break;
711 case 14: /* always */
712 case 15: /* always */
713 /* Use the ALWAYS condition, which will fold early.
714 * It doesn't matter what we use for the value. */
715 cond = TCG_COND_ALWAYS;
716 value = cpu_ZF;
717 goto no_invert;
719 default:
720 fprintf(stderr, "Bad condition code 0x%x\n", cc);
721 abort();
724 if (cc & 1) {
725 cond = tcg_invert_cond(cond);
728 no_invert:
729 cmp->cond = cond;
730 cmp->value = value;
731 cmp->value_global = global;
734 void arm_free_cc(DisasCompare *cmp)
736 if (!cmp->value_global) {
737 tcg_temp_free_i32(cmp->value);
741 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
743 tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
746 void arm_gen_test_cc(int cc, TCGLabel *label)
748 DisasCompare cmp;
749 arm_test_cc(&cmp, cc);
750 arm_jump_cc(&cmp, label);
751 arm_free_cc(&cmp);
754 static inline void gen_set_condexec(DisasContext *s)
756 if (s->condexec_mask) {
757 uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
758 TCGv_i32 tmp = tcg_temp_new_i32();
759 tcg_gen_movi_i32(tmp, val);
760 store_cpu_field(tmp, condexec_bits);
764 static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
766 tcg_gen_movi_i32(cpu_R[15], val);
769 /* Set PC and Thumb state from var. var is marked as dead. */
770 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
772 s->base.is_jmp = DISAS_JUMP;
773 tcg_gen_andi_i32(cpu_R[15], var, ~1);
774 tcg_gen_andi_i32(var, var, 1);
775 store_cpu_field(var, thumb);
779 * Set PC and Thumb state from var. var is marked as dead.
780 * For M-profile CPUs, include logic to detect exception-return
781 * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
782 * and BX reg, and no others, and happens only for code in Handler mode.
783 * The Security Extension also requires us to check for the FNC_RETURN
784 * which signals a function return from non-secure state; this can happen
785 * in both Handler and Thread mode.
786 * To avoid having to do multiple comparisons in inline generated code,
787 * we make the check we do here loose, so it will match for EXC_RETURN
788 * in Thread mode. For system emulation do_v7m_exception_exit() checks
789 * for these spurious cases and returns without doing anything (giving
790 * the same behaviour as for a branch to a non-magic address).
792 * In linux-user mode it is unclear what the right behaviour for an
793 * attempted FNC_RETURN should be, because in real hardware this will go
794 * directly to Secure code (ie not the Linux kernel) which will then treat
795 * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
796 * attempt behave the way it would on a CPU without the security extension,
797 * which is to say "like a normal branch". That means we can simply treat
798 * all branches as normal with no magic address behaviour.
800 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
802 /* Generate the same code here as for a simple bx, but flag via
803 * s->base.is_jmp that we need to do the rest of the work later.
805 gen_bx(s, var);
806 #ifndef CONFIG_USER_ONLY
807 if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
808 (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
809 s->base.is_jmp = DISAS_BX_EXCRET;
811 #endif
814 static inline void gen_bx_excret_final_code(DisasContext *s)
816 /* Generate the code to finish possible exception return and end the TB */
817 TCGLabel *excret_label = gen_new_label();
818 uint32_t min_magic;
820 if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
821 /* Covers FNC_RETURN and EXC_RETURN magic */
822 min_magic = FNC_RETURN_MIN_MAGIC;
823 } else {
824 /* EXC_RETURN magic only */
825 min_magic = EXC_RETURN_MIN_MAGIC;
828 /* Is the new PC value in the magic range indicating exception return? */
829 tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
830 /* No: end the TB as we would for a DISAS_JMP */
831 if (is_singlestepping(s)) {
832 gen_singlestep_exception(s);
833 } else {
834 tcg_gen_exit_tb(NULL, 0);
836 gen_set_label(excret_label);
837 /* Yes: this is an exception return.
838 * At this point in runtime env->regs[15] and env->thumb will hold
839 * the exception-return magic number, which do_v7m_exception_exit()
840 * will read. Nothing else will be able to see those values because
841 * the cpu-exec main loop guarantees that we will always go straight
842 * from raising the exception to the exception-handling code.
844 * gen_ss_advance(s) does nothing on M profile currently but
845 * calling it is conceptually the right thing as we have executed
846 * this instruction (compare SWI, HVC, SMC handling).
848 gen_ss_advance(s);
849 gen_exception_internal(EXCP_EXCEPTION_EXIT);
852 static inline void gen_bxns(DisasContext *s, int rm)
854 TCGv_i32 var = load_reg(s, rm);
856 /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
857 * we need to sync state before calling it, but:
858 * - we don't need to do gen_set_pc_im() because the bxns helper will
859 * always set the PC itself
860 * - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
861 * unless it's outside an IT block or the last insn in an IT block,
862 * so we know that condexec == 0 (already set at the top of the TB)
863 * is correct in the non-UNPREDICTABLE cases, and we can choose
864 * "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
866 gen_helper_v7m_bxns(cpu_env, var);
867 tcg_temp_free_i32(var);
868 s->base.is_jmp = DISAS_EXIT;
871 static inline void gen_blxns(DisasContext *s, int rm)
873 TCGv_i32 var = load_reg(s, rm);
875 /* We don't need to sync condexec state, for the same reason as bxns.
876 * We do however need to set the PC, because the blxns helper reads it.
877 * The blxns helper may throw an exception.
879 gen_set_pc_im(s, s->base.pc_next);
880 gen_helper_v7m_blxns(cpu_env, var);
881 tcg_temp_free_i32(var);
882 s->base.is_jmp = DISAS_EXIT;
885 /* Variant of store_reg which uses branch&exchange logic when storing
886 to r15 in ARM architecture v7 and above. The source must be a temporary
887 and will be marked as dead. */
888 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
890 if (reg == 15 && ENABLE_ARCH_7) {
891 gen_bx(s, var);
892 } else {
893 store_reg(s, reg, var);
897 /* Variant of store_reg which uses branch&exchange logic when storing
898 * to r15 in ARM architecture v5T and above. This is used for storing
899 * the results of a LDR/LDM/POP into r15, and corresponds to the cases
900 * in the ARM ARM which use the LoadWritePC() pseudocode function. */
901 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
903 if (reg == 15 && ENABLE_ARCH_5) {
904 gen_bx_excret(s, var);
905 } else {
906 store_reg(s, reg, var);
910 #ifdef CONFIG_USER_ONLY
911 #define IS_USER_ONLY 1
912 #else
913 #define IS_USER_ONLY 0
914 #endif
916 /* Abstractions of "generate code to do a guest load/store for
917 * AArch32", where a vaddr is always 32 bits (and is zero
918 * extended if we're a 64 bit core) and data is also
919 * 32 bits unless specifically doing a 64 bit access.
920 * These functions work like tcg_gen_qemu_{ld,st}* except
921 * that the address argument is TCGv_i32 rather than TCGv.
924 static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
926 TCGv addr = tcg_temp_new();
927 tcg_gen_extu_i32_tl(addr, a32);
929 /* Not needed for user-mode BE32, where we use MO_BE instead. */
930 if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
931 tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
933 return addr;
936 static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
937 int index, MemOp opc)
939 TCGv addr;
941 if (arm_dc_feature(s, ARM_FEATURE_M) &&
942 !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
943 opc |= MO_ALIGN;
946 addr = gen_aa32_addr(s, a32, opc);
947 tcg_gen_qemu_ld_i32(val, addr, index, opc);
948 tcg_temp_free(addr);
951 static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
952 int index, MemOp opc)
954 TCGv addr;
956 if (arm_dc_feature(s, ARM_FEATURE_M) &&
957 !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
958 opc |= MO_ALIGN;
961 addr = gen_aa32_addr(s, a32, opc);
962 tcg_gen_qemu_st_i32(val, addr, index, opc);
963 tcg_temp_free(addr);
966 #define DO_GEN_LD(SUFF, OPC) \
967 static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
968 TCGv_i32 a32, int index) \
970 gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data); \
973 #define DO_GEN_ST(SUFF, OPC) \
974 static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
975 TCGv_i32 a32, int index) \
977 gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data); \
980 static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
982 /* Not needed for user-mode BE32, where we use MO_BE instead. */
983 if (!IS_USER_ONLY && s->sctlr_b) {
984 tcg_gen_rotri_i64(val, val, 32);
988 static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
989 int index, MemOp opc)
991 TCGv addr = gen_aa32_addr(s, a32, opc);
992 tcg_gen_qemu_ld_i64(val, addr, index, opc);
993 gen_aa32_frob64(s, val);
994 tcg_temp_free(addr);
997 static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
998 TCGv_i32 a32, int index)
1000 gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
1003 static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1004 int index, MemOp opc)
1006 TCGv addr = gen_aa32_addr(s, a32, opc);
1008 /* Not needed for user-mode BE32, where we use MO_BE instead. */
1009 if (!IS_USER_ONLY && s->sctlr_b) {
1010 TCGv_i64 tmp = tcg_temp_new_i64();
1011 tcg_gen_rotri_i64(tmp, val, 32);
1012 tcg_gen_qemu_st_i64(tmp, addr, index, opc);
1013 tcg_temp_free_i64(tmp);
1014 } else {
1015 tcg_gen_qemu_st_i64(val, addr, index, opc);
1017 tcg_temp_free(addr);
1020 static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
1021 TCGv_i32 a32, int index)
1023 gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
1026 DO_GEN_LD(8u, MO_UB)
1027 DO_GEN_LD(16u, MO_UW)
1028 DO_GEN_LD(32u, MO_UL)
1029 DO_GEN_ST(8, MO_UB)
1030 DO_GEN_ST(16, MO_UW)
1031 DO_GEN_ST(32, MO_UL)
1033 static inline void gen_hvc(DisasContext *s, int imm16)
1035 /* The pre HVC helper handles cases when HVC gets trapped
1036 * as an undefined insn by runtime configuration (ie before
1037 * the insn really executes).
1039 gen_set_pc_im(s, s->pc_curr);
1040 gen_helper_pre_hvc(cpu_env);
1041 /* Otherwise we will treat this as a real exception which
1042 * happens after execution of the insn. (The distinction matters
1043 * for the PC value reported to the exception handler and also
1044 * for single stepping.)
1046 s->svc_imm = imm16;
1047 gen_set_pc_im(s, s->base.pc_next);
1048 s->base.is_jmp = DISAS_HVC;
1051 static inline void gen_smc(DisasContext *s)
1053 /* As with HVC, we may take an exception either before or after
1054 * the insn executes.
1056 TCGv_i32 tmp;
1058 gen_set_pc_im(s, s->pc_curr);
1059 tmp = tcg_const_i32(syn_aa32_smc());
1060 gen_helper_pre_smc(cpu_env, tmp);
1061 tcg_temp_free_i32(tmp);
1062 gen_set_pc_im(s, s->base.pc_next);
1063 s->base.is_jmp = DISAS_SMC;
1066 static void gen_exception_internal_insn(DisasContext *s, uint32_t pc, int excp)
1068 gen_set_condexec(s);
1069 gen_set_pc_im(s, pc);
1070 gen_exception_internal(excp);
1071 s->base.is_jmp = DISAS_NORETURN;
1074 static void gen_exception_insn(DisasContext *s, uint32_t pc, int excp,
1075 int syn, uint32_t target_el)
1077 gen_set_condexec(s);
1078 gen_set_pc_im(s, pc);
1079 gen_exception(excp, syn, target_el);
1080 s->base.is_jmp = DISAS_NORETURN;
1083 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1085 TCGv_i32 tcg_syn;
1087 gen_set_condexec(s);
1088 gen_set_pc_im(s, s->pc_curr);
1089 tcg_syn = tcg_const_i32(syn);
1090 gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
1091 tcg_temp_free_i32(tcg_syn);
1092 s->base.is_jmp = DISAS_NORETURN;
1095 static void unallocated_encoding(DisasContext *s)
1097 /* Unallocated and reserved encodings are uncategorized */
1098 gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
1099 default_exception_el(s));
1102 /* Force a TB lookup after an instruction that changes the CPU state. */
1103 static inline void gen_lookup_tb(DisasContext *s)
1105 tcg_gen_movi_i32(cpu_R[15], s->base.pc_next);
1106 s->base.is_jmp = DISAS_EXIT;
1109 static inline void gen_hlt(DisasContext *s, int imm)
1111 /* HLT. This has two purposes.
1112 * Architecturally, it is an external halting debug instruction.
1113 * Since QEMU doesn't implement external debug, we treat this as
1114 * it is required for halting debug disabled: it will UNDEF.
1115 * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1116 * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1117 * must trigger semihosting even for ARMv7 and earlier, where
1118 * HLT was an undefined encoding.
1119 * In system mode, we don't allow userspace access to
1120 * semihosting, to provide some semblance of security
1121 * (and for consistency with our 32-bit semihosting).
1123 if (semihosting_enabled() &&
1124 #ifndef CONFIG_USER_ONLY
1125 s->current_el != 0 &&
1126 #endif
1127 (imm == (s->thumb ? 0x3c : 0xf000))) {
1128 gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
1129 return;
1132 unallocated_encoding(s);
1135 static TCGv_ptr get_fpstatus_ptr(int neon)
1137 TCGv_ptr statusptr = tcg_temp_new_ptr();
1138 int offset;
1139 if (neon) {
1140 offset = offsetof(CPUARMState, vfp.standard_fp_status);
1141 } else {
1142 offset = offsetof(CPUARMState, vfp.fp_status);
1144 tcg_gen_addi_ptr(statusptr, cpu_env, offset);
1145 return statusptr;
1148 static inline long vfp_reg_offset(bool dp, unsigned reg)
1150 if (dp) {
1151 return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1152 } else {
1153 long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
1154 if (reg & 1) {
1155 ofs += offsetof(CPU_DoubleU, l.upper);
1156 } else {
1157 ofs += offsetof(CPU_DoubleU, l.lower);
1159 return ofs;
1163 /* Return the offset of a 32-bit piece of a NEON register.
1164 zero is the least significant end of the register. */
1165 static inline long
1166 neon_reg_offset (int reg, int n)
1168 int sreg;
1169 sreg = reg * 2 + n;
1170 return vfp_reg_offset(0, sreg);
1173 /* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1174 * where 0 is the least significant end of the register.
1176 static inline long
1177 neon_element_offset(int reg, int element, MemOp size)
1179 int element_size = 1 << size;
1180 int ofs = element * element_size;
1181 #ifdef HOST_WORDS_BIGENDIAN
1182 /* Calculate the offset assuming fully little-endian,
1183 * then XOR to account for the order of the 8-byte units.
1185 if (element_size < 8) {
1186 ofs ^= 8 - element_size;
1188 #endif
1189 return neon_reg_offset(reg, 0) + ofs;
1192 static TCGv_i32 neon_load_reg(int reg, int pass)
1194 TCGv_i32 tmp = tcg_temp_new_i32();
1195 tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1196 return tmp;
1199 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
1201 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1203 switch (mop) {
1204 case MO_UB:
1205 tcg_gen_ld8u_i32(var, cpu_env, offset);
1206 break;
1207 case MO_UW:
1208 tcg_gen_ld16u_i32(var, cpu_env, offset);
1209 break;
1210 case MO_UL:
1211 tcg_gen_ld_i32(var, cpu_env, offset);
1212 break;
1213 default:
1214 g_assert_not_reached();
1218 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
1220 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1222 switch (mop) {
1223 case MO_UB:
1224 tcg_gen_ld8u_i64(var, cpu_env, offset);
1225 break;
1226 case MO_UW:
1227 tcg_gen_ld16u_i64(var, cpu_env, offset);
1228 break;
1229 case MO_UL:
1230 tcg_gen_ld32u_i64(var, cpu_env, offset);
1231 break;
1232 case MO_Q:
1233 tcg_gen_ld_i64(var, cpu_env, offset);
1234 break;
1235 default:
1236 g_assert_not_reached();
1240 static void neon_store_reg(int reg, int pass, TCGv_i32 var)
1242 tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1243 tcg_temp_free_i32(var);
1246 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
1248 long offset = neon_element_offset(reg, ele, size);
1250 switch (size) {
1251 case MO_8:
1252 tcg_gen_st8_i32(var, cpu_env, offset);
1253 break;
1254 case MO_16:
1255 tcg_gen_st16_i32(var, cpu_env, offset);
1256 break;
1257 case MO_32:
1258 tcg_gen_st_i32(var, cpu_env, offset);
1259 break;
1260 default:
1261 g_assert_not_reached();
1265 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
1267 long offset = neon_element_offset(reg, ele, size);
1269 switch (size) {
1270 case MO_8:
1271 tcg_gen_st8_i64(var, cpu_env, offset);
1272 break;
1273 case MO_16:
1274 tcg_gen_st16_i64(var, cpu_env, offset);
1275 break;
1276 case MO_32:
1277 tcg_gen_st32_i64(var, cpu_env, offset);
1278 break;
1279 case MO_64:
1280 tcg_gen_st_i64(var, cpu_env, offset);
1281 break;
1282 default:
1283 g_assert_not_reached();
1287 static inline void neon_load_reg64(TCGv_i64 var, int reg)
1289 tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1292 static inline void neon_store_reg64(TCGv_i64 var, int reg)
1294 tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1297 static inline void neon_load_reg32(TCGv_i32 var, int reg)
1299 tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
1302 static inline void neon_store_reg32(TCGv_i32 var, int reg)
1304 tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
1307 static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
1309 TCGv_ptr ret = tcg_temp_new_ptr();
1310 tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
1311 return ret;
1314 #define ARM_CP_RW_BIT (1 << 20)
1316 /* Include the VFP and Neon decoders */
1317 #include "translate-vfp.inc.c"
1318 #include "translate-neon.inc.c"
1320 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1322 tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1325 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1327 tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1330 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1332 TCGv_i32 var = tcg_temp_new_i32();
1333 tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1334 return var;
1337 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1339 tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1340 tcg_temp_free_i32(var);
1343 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1345 iwmmxt_store_reg(cpu_M0, rn);
1348 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1350 iwmmxt_load_reg(cpu_M0, rn);
1353 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1355 iwmmxt_load_reg(cpu_V1, rn);
1356 tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1359 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1361 iwmmxt_load_reg(cpu_V1, rn);
1362 tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1365 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1367 iwmmxt_load_reg(cpu_V1, rn);
1368 tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1371 #define IWMMXT_OP(name) \
1372 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1374 iwmmxt_load_reg(cpu_V1, rn); \
1375 gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1378 #define IWMMXT_OP_ENV(name) \
1379 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1381 iwmmxt_load_reg(cpu_V1, rn); \
1382 gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1385 #define IWMMXT_OP_ENV_SIZE(name) \
1386 IWMMXT_OP_ENV(name##b) \
1387 IWMMXT_OP_ENV(name##w) \
1388 IWMMXT_OP_ENV(name##l)
1390 #define IWMMXT_OP_ENV1(name) \
1391 static inline void gen_op_iwmmxt_##name##_M0(void) \
1393 gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1396 IWMMXT_OP(maddsq)
1397 IWMMXT_OP(madduq)
1398 IWMMXT_OP(sadb)
1399 IWMMXT_OP(sadw)
1400 IWMMXT_OP(mulslw)
1401 IWMMXT_OP(mulshw)
1402 IWMMXT_OP(mululw)
1403 IWMMXT_OP(muluhw)
1404 IWMMXT_OP(macsw)
1405 IWMMXT_OP(macuw)
1407 IWMMXT_OP_ENV_SIZE(unpackl)
1408 IWMMXT_OP_ENV_SIZE(unpackh)
1410 IWMMXT_OP_ENV1(unpacklub)
1411 IWMMXT_OP_ENV1(unpackluw)
1412 IWMMXT_OP_ENV1(unpacklul)
1413 IWMMXT_OP_ENV1(unpackhub)
1414 IWMMXT_OP_ENV1(unpackhuw)
1415 IWMMXT_OP_ENV1(unpackhul)
1416 IWMMXT_OP_ENV1(unpacklsb)
1417 IWMMXT_OP_ENV1(unpacklsw)
1418 IWMMXT_OP_ENV1(unpacklsl)
1419 IWMMXT_OP_ENV1(unpackhsb)
1420 IWMMXT_OP_ENV1(unpackhsw)
1421 IWMMXT_OP_ENV1(unpackhsl)
1423 IWMMXT_OP_ENV_SIZE(cmpeq)
1424 IWMMXT_OP_ENV_SIZE(cmpgtu)
1425 IWMMXT_OP_ENV_SIZE(cmpgts)
1427 IWMMXT_OP_ENV_SIZE(mins)
1428 IWMMXT_OP_ENV_SIZE(minu)
1429 IWMMXT_OP_ENV_SIZE(maxs)
1430 IWMMXT_OP_ENV_SIZE(maxu)
1432 IWMMXT_OP_ENV_SIZE(subn)
1433 IWMMXT_OP_ENV_SIZE(addn)
1434 IWMMXT_OP_ENV_SIZE(subu)
1435 IWMMXT_OP_ENV_SIZE(addu)
1436 IWMMXT_OP_ENV_SIZE(subs)
1437 IWMMXT_OP_ENV_SIZE(adds)
1439 IWMMXT_OP_ENV(avgb0)
1440 IWMMXT_OP_ENV(avgb1)
1441 IWMMXT_OP_ENV(avgw0)
1442 IWMMXT_OP_ENV(avgw1)
1444 IWMMXT_OP_ENV(packuw)
1445 IWMMXT_OP_ENV(packul)
1446 IWMMXT_OP_ENV(packuq)
1447 IWMMXT_OP_ENV(packsw)
1448 IWMMXT_OP_ENV(packsl)
1449 IWMMXT_OP_ENV(packsq)
1451 static void gen_op_iwmmxt_set_mup(void)
1453 TCGv_i32 tmp;
1454 tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1455 tcg_gen_ori_i32(tmp, tmp, 2);
1456 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1459 static void gen_op_iwmmxt_set_cup(void)
1461 TCGv_i32 tmp;
1462 tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1463 tcg_gen_ori_i32(tmp, tmp, 1);
1464 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1467 static void gen_op_iwmmxt_setpsr_nz(void)
1469 TCGv_i32 tmp = tcg_temp_new_i32();
1470 gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1471 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1474 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1476 iwmmxt_load_reg(cpu_V1, rn);
1477 tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1478 tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1481 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1482 TCGv_i32 dest)
1484 int rd;
1485 uint32_t offset;
1486 TCGv_i32 tmp;
1488 rd = (insn >> 16) & 0xf;
1489 tmp = load_reg(s, rd);
1491 offset = (insn & 0xff) << ((insn >> 7) & 2);
1492 if (insn & (1 << 24)) {
1493 /* Pre indexed */
1494 if (insn & (1 << 23))
1495 tcg_gen_addi_i32(tmp, tmp, offset);
1496 else
1497 tcg_gen_addi_i32(tmp, tmp, -offset);
1498 tcg_gen_mov_i32(dest, tmp);
1499 if (insn & (1 << 21))
1500 store_reg(s, rd, tmp);
1501 else
1502 tcg_temp_free_i32(tmp);
1503 } else if (insn & (1 << 21)) {
1504 /* Post indexed */
1505 tcg_gen_mov_i32(dest, tmp);
1506 if (insn & (1 << 23))
1507 tcg_gen_addi_i32(tmp, tmp, offset);
1508 else
1509 tcg_gen_addi_i32(tmp, tmp, -offset);
1510 store_reg(s, rd, tmp);
1511 } else if (!(insn & (1 << 23)))
1512 return 1;
1513 return 0;
1516 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1518 int rd = (insn >> 0) & 0xf;
1519 TCGv_i32 tmp;
1521 if (insn & (1 << 8)) {
1522 if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1523 return 1;
1524 } else {
1525 tmp = iwmmxt_load_creg(rd);
1527 } else {
1528 tmp = tcg_temp_new_i32();
1529 iwmmxt_load_reg(cpu_V0, rd);
1530 tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1532 tcg_gen_andi_i32(tmp, tmp, mask);
1533 tcg_gen_mov_i32(dest, tmp);
1534 tcg_temp_free_i32(tmp);
1535 return 0;
1538 /* Disassemble an iwMMXt instruction. Returns nonzero if an error occurred
1539 (ie. an undefined instruction). */
1540 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1542 int rd, wrd;
1543 int rdhi, rdlo, rd0, rd1, i;
1544 TCGv_i32 addr;
1545 TCGv_i32 tmp, tmp2, tmp3;
1547 if ((insn & 0x0e000e00) == 0x0c000000) {
1548 if ((insn & 0x0fe00ff0) == 0x0c400000) {
1549 wrd = insn & 0xf;
1550 rdlo = (insn >> 12) & 0xf;
1551 rdhi = (insn >> 16) & 0xf;
1552 if (insn & ARM_CP_RW_BIT) { /* TMRRC */
1553 iwmmxt_load_reg(cpu_V0, wrd);
1554 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1555 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1556 } else { /* TMCRR */
1557 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1558 iwmmxt_store_reg(cpu_V0, wrd);
1559 gen_op_iwmmxt_set_mup();
1561 return 0;
1564 wrd = (insn >> 12) & 0xf;
1565 addr = tcg_temp_new_i32();
1566 if (gen_iwmmxt_address(s, insn, addr)) {
1567 tcg_temp_free_i32(addr);
1568 return 1;
1570 if (insn & ARM_CP_RW_BIT) {
1571 if ((insn >> 28) == 0xf) { /* WLDRW wCx */
1572 tmp = tcg_temp_new_i32();
1573 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1574 iwmmxt_store_creg(wrd, tmp);
1575 } else {
1576 i = 1;
1577 if (insn & (1 << 8)) {
1578 if (insn & (1 << 22)) { /* WLDRD */
1579 gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1580 i = 0;
1581 } else { /* WLDRW wRd */
1582 tmp = tcg_temp_new_i32();
1583 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1585 } else {
1586 tmp = tcg_temp_new_i32();
1587 if (insn & (1 << 22)) { /* WLDRH */
1588 gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1589 } else { /* WLDRB */
1590 gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1593 if (i) {
1594 tcg_gen_extu_i32_i64(cpu_M0, tmp);
1595 tcg_temp_free_i32(tmp);
1597 gen_op_iwmmxt_movq_wRn_M0(wrd);
1599 } else {
1600 if ((insn >> 28) == 0xf) { /* WSTRW wCx */
1601 tmp = iwmmxt_load_creg(wrd);
1602 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1603 } else {
1604 gen_op_iwmmxt_movq_M0_wRn(wrd);
1605 tmp = tcg_temp_new_i32();
1606 if (insn & (1 << 8)) {
1607 if (insn & (1 << 22)) { /* WSTRD */
1608 gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1609 } else { /* WSTRW wRd */
1610 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1611 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1613 } else {
1614 if (insn & (1 << 22)) { /* WSTRH */
1615 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1616 gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1617 } else { /* WSTRB */
1618 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1619 gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1623 tcg_temp_free_i32(tmp);
1625 tcg_temp_free_i32(addr);
1626 return 0;
1629 if ((insn & 0x0f000000) != 0x0e000000)
1630 return 1;
1632 switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1633 case 0x000: /* WOR */
1634 wrd = (insn >> 12) & 0xf;
1635 rd0 = (insn >> 0) & 0xf;
1636 rd1 = (insn >> 16) & 0xf;
1637 gen_op_iwmmxt_movq_M0_wRn(rd0);
1638 gen_op_iwmmxt_orq_M0_wRn(rd1);
1639 gen_op_iwmmxt_setpsr_nz();
1640 gen_op_iwmmxt_movq_wRn_M0(wrd);
1641 gen_op_iwmmxt_set_mup();
1642 gen_op_iwmmxt_set_cup();
1643 break;
1644 case 0x011: /* TMCR */
1645 if (insn & 0xf)
1646 return 1;
1647 rd = (insn >> 12) & 0xf;
1648 wrd = (insn >> 16) & 0xf;
1649 switch (wrd) {
1650 case ARM_IWMMXT_wCID:
1651 case ARM_IWMMXT_wCASF:
1652 break;
1653 case ARM_IWMMXT_wCon:
1654 gen_op_iwmmxt_set_cup();
1655 /* Fall through. */
1656 case ARM_IWMMXT_wCSSF:
1657 tmp = iwmmxt_load_creg(wrd);
1658 tmp2 = load_reg(s, rd);
1659 tcg_gen_andc_i32(tmp, tmp, tmp2);
1660 tcg_temp_free_i32(tmp2);
1661 iwmmxt_store_creg(wrd, tmp);
1662 break;
1663 case ARM_IWMMXT_wCGR0:
1664 case ARM_IWMMXT_wCGR1:
1665 case ARM_IWMMXT_wCGR2:
1666 case ARM_IWMMXT_wCGR3:
1667 gen_op_iwmmxt_set_cup();
1668 tmp = load_reg(s, rd);
1669 iwmmxt_store_creg(wrd, tmp);
1670 break;
1671 default:
1672 return 1;
1674 break;
1675 case 0x100: /* WXOR */
1676 wrd = (insn >> 12) & 0xf;
1677 rd0 = (insn >> 0) & 0xf;
1678 rd1 = (insn >> 16) & 0xf;
1679 gen_op_iwmmxt_movq_M0_wRn(rd0);
1680 gen_op_iwmmxt_xorq_M0_wRn(rd1);
1681 gen_op_iwmmxt_setpsr_nz();
1682 gen_op_iwmmxt_movq_wRn_M0(wrd);
1683 gen_op_iwmmxt_set_mup();
1684 gen_op_iwmmxt_set_cup();
1685 break;
1686 case 0x111: /* TMRC */
1687 if (insn & 0xf)
1688 return 1;
1689 rd = (insn >> 12) & 0xf;
1690 wrd = (insn >> 16) & 0xf;
1691 tmp = iwmmxt_load_creg(wrd);
1692 store_reg(s, rd, tmp);
1693 break;
1694 case 0x300: /* WANDN */
1695 wrd = (insn >> 12) & 0xf;
1696 rd0 = (insn >> 0) & 0xf;
1697 rd1 = (insn >> 16) & 0xf;
1698 gen_op_iwmmxt_movq_M0_wRn(rd0);
1699 tcg_gen_neg_i64(cpu_M0, cpu_M0);
1700 gen_op_iwmmxt_andq_M0_wRn(rd1);
1701 gen_op_iwmmxt_setpsr_nz();
1702 gen_op_iwmmxt_movq_wRn_M0(wrd);
1703 gen_op_iwmmxt_set_mup();
1704 gen_op_iwmmxt_set_cup();
1705 break;
1706 case 0x200: /* WAND */
1707 wrd = (insn >> 12) & 0xf;
1708 rd0 = (insn >> 0) & 0xf;
1709 rd1 = (insn >> 16) & 0xf;
1710 gen_op_iwmmxt_movq_M0_wRn(rd0);
1711 gen_op_iwmmxt_andq_M0_wRn(rd1);
1712 gen_op_iwmmxt_setpsr_nz();
1713 gen_op_iwmmxt_movq_wRn_M0(wrd);
1714 gen_op_iwmmxt_set_mup();
1715 gen_op_iwmmxt_set_cup();
1716 break;
1717 case 0x810: case 0xa10: /* WMADD */
1718 wrd = (insn >> 12) & 0xf;
1719 rd0 = (insn >> 0) & 0xf;
1720 rd1 = (insn >> 16) & 0xf;
1721 gen_op_iwmmxt_movq_M0_wRn(rd0);
1722 if (insn & (1 << 21))
1723 gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1724 else
1725 gen_op_iwmmxt_madduq_M0_wRn(rd1);
1726 gen_op_iwmmxt_movq_wRn_M0(wrd);
1727 gen_op_iwmmxt_set_mup();
1728 break;
1729 case 0x10e: case 0x50e: case 0x90e: case 0xd0e: /* WUNPCKIL */
1730 wrd = (insn >> 12) & 0xf;
1731 rd0 = (insn >> 16) & 0xf;
1732 rd1 = (insn >> 0) & 0xf;
1733 gen_op_iwmmxt_movq_M0_wRn(rd0);
1734 switch ((insn >> 22) & 3) {
1735 case 0:
1736 gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1737 break;
1738 case 1:
1739 gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1740 break;
1741 case 2:
1742 gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1743 break;
1744 case 3:
1745 return 1;
1747 gen_op_iwmmxt_movq_wRn_M0(wrd);
1748 gen_op_iwmmxt_set_mup();
1749 gen_op_iwmmxt_set_cup();
1750 break;
1751 case 0x10c: case 0x50c: case 0x90c: case 0xd0c: /* WUNPCKIH */
1752 wrd = (insn >> 12) & 0xf;
1753 rd0 = (insn >> 16) & 0xf;
1754 rd1 = (insn >> 0) & 0xf;
1755 gen_op_iwmmxt_movq_M0_wRn(rd0);
1756 switch ((insn >> 22) & 3) {
1757 case 0:
1758 gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1759 break;
1760 case 1:
1761 gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1762 break;
1763 case 2:
1764 gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1765 break;
1766 case 3:
1767 return 1;
1769 gen_op_iwmmxt_movq_wRn_M0(wrd);
1770 gen_op_iwmmxt_set_mup();
1771 gen_op_iwmmxt_set_cup();
1772 break;
1773 case 0x012: case 0x112: case 0x412: case 0x512: /* WSAD */
1774 wrd = (insn >> 12) & 0xf;
1775 rd0 = (insn >> 16) & 0xf;
1776 rd1 = (insn >> 0) & 0xf;
1777 gen_op_iwmmxt_movq_M0_wRn(rd0);
1778 if (insn & (1 << 22))
1779 gen_op_iwmmxt_sadw_M0_wRn(rd1);
1780 else
1781 gen_op_iwmmxt_sadb_M0_wRn(rd1);
1782 if (!(insn & (1 << 20)))
1783 gen_op_iwmmxt_addl_M0_wRn(wrd);
1784 gen_op_iwmmxt_movq_wRn_M0(wrd);
1785 gen_op_iwmmxt_set_mup();
1786 break;
1787 case 0x010: case 0x110: case 0x210: case 0x310: /* WMUL */
1788 wrd = (insn >> 12) & 0xf;
1789 rd0 = (insn >> 16) & 0xf;
1790 rd1 = (insn >> 0) & 0xf;
1791 gen_op_iwmmxt_movq_M0_wRn(rd0);
1792 if (insn & (1 << 21)) {
1793 if (insn & (1 << 20))
1794 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1795 else
1796 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1797 } else {
1798 if (insn & (1 << 20))
1799 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1800 else
1801 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1803 gen_op_iwmmxt_movq_wRn_M0(wrd);
1804 gen_op_iwmmxt_set_mup();
1805 break;
1806 case 0x410: case 0x510: case 0x610: case 0x710: /* WMAC */
1807 wrd = (insn >> 12) & 0xf;
1808 rd0 = (insn >> 16) & 0xf;
1809 rd1 = (insn >> 0) & 0xf;
1810 gen_op_iwmmxt_movq_M0_wRn(rd0);
1811 if (insn & (1 << 21))
1812 gen_op_iwmmxt_macsw_M0_wRn(rd1);
1813 else
1814 gen_op_iwmmxt_macuw_M0_wRn(rd1);
1815 if (!(insn & (1 << 20))) {
1816 iwmmxt_load_reg(cpu_V1, wrd);
1817 tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1819 gen_op_iwmmxt_movq_wRn_M0(wrd);
1820 gen_op_iwmmxt_set_mup();
1821 break;
1822 case 0x006: case 0x406: case 0x806: case 0xc06: /* WCMPEQ */
1823 wrd = (insn >> 12) & 0xf;
1824 rd0 = (insn >> 16) & 0xf;
1825 rd1 = (insn >> 0) & 0xf;
1826 gen_op_iwmmxt_movq_M0_wRn(rd0);
1827 switch ((insn >> 22) & 3) {
1828 case 0:
1829 gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1830 break;
1831 case 1:
1832 gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1833 break;
1834 case 2:
1835 gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1836 break;
1837 case 3:
1838 return 1;
1840 gen_op_iwmmxt_movq_wRn_M0(wrd);
1841 gen_op_iwmmxt_set_mup();
1842 gen_op_iwmmxt_set_cup();
1843 break;
1844 case 0x800: case 0x900: case 0xc00: case 0xd00: /* WAVG2 */
1845 wrd = (insn >> 12) & 0xf;
1846 rd0 = (insn >> 16) & 0xf;
1847 rd1 = (insn >> 0) & 0xf;
1848 gen_op_iwmmxt_movq_M0_wRn(rd0);
1849 if (insn & (1 << 22)) {
1850 if (insn & (1 << 20))
1851 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1852 else
1853 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1854 } else {
1855 if (insn & (1 << 20))
1856 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1857 else
1858 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1860 gen_op_iwmmxt_movq_wRn_M0(wrd);
1861 gen_op_iwmmxt_set_mup();
1862 gen_op_iwmmxt_set_cup();
1863 break;
1864 case 0x802: case 0x902: case 0xa02: case 0xb02: /* WALIGNR */
1865 wrd = (insn >> 12) & 0xf;
1866 rd0 = (insn >> 16) & 0xf;
1867 rd1 = (insn >> 0) & 0xf;
1868 gen_op_iwmmxt_movq_M0_wRn(rd0);
1869 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1870 tcg_gen_andi_i32(tmp, tmp, 7);
1871 iwmmxt_load_reg(cpu_V1, rd1);
1872 gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1873 tcg_temp_free_i32(tmp);
1874 gen_op_iwmmxt_movq_wRn_M0(wrd);
1875 gen_op_iwmmxt_set_mup();
1876 break;
1877 case 0x601: case 0x605: case 0x609: case 0x60d: /* TINSR */
1878 if (((insn >> 6) & 3) == 3)
1879 return 1;
1880 rd = (insn >> 12) & 0xf;
1881 wrd = (insn >> 16) & 0xf;
1882 tmp = load_reg(s, rd);
1883 gen_op_iwmmxt_movq_M0_wRn(wrd);
1884 switch ((insn >> 6) & 3) {
1885 case 0:
1886 tmp2 = tcg_const_i32(0xff);
1887 tmp3 = tcg_const_i32((insn & 7) << 3);
1888 break;
1889 case 1:
1890 tmp2 = tcg_const_i32(0xffff);
1891 tmp3 = tcg_const_i32((insn & 3) << 4);
1892 break;
1893 case 2:
1894 tmp2 = tcg_const_i32(0xffffffff);
1895 tmp3 = tcg_const_i32((insn & 1) << 5);
1896 break;
1897 default:
1898 tmp2 = NULL;
1899 tmp3 = NULL;
1901 gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1902 tcg_temp_free_i32(tmp3);
1903 tcg_temp_free_i32(tmp2);
1904 tcg_temp_free_i32(tmp);
1905 gen_op_iwmmxt_movq_wRn_M0(wrd);
1906 gen_op_iwmmxt_set_mup();
1907 break;
1908 case 0x107: case 0x507: case 0x907: case 0xd07: /* TEXTRM */
1909 rd = (insn >> 12) & 0xf;
1910 wrd = (insn >> 16) & 0xf;
1911 if (rd == 15 || ((insn >> 22) & 3) == 3)
1912 return 1;
1913 gen_op_iwmmxt_movq_M0_wRn(wrd);
1914 tmp = tcg_temp_new_i32();
1915 switch ((insn >> 22) & 3) {
1916 case 0:
1917 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1918 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1919 if (insn & 8) {
1920 tcg_gen_ext8s_i32(tmp, tmp);
1921 } else {
1922 tcg_gen_andi_i32(tmp, tmp, 0xff);
1924 break;
1925 case 1:
1926 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1927 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1928 if (insn & 8) {
1929 tcg_gen_ext16s_i32(tmp, tmp);
1930 } else {
1931 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1933 break;
1934 case 2:
1935 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1936 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1937 break;
1939 store_reg(s, rd, tmp);
1940 break;
1941 case 0x117: case 0x517: case 0x917: case 0xd17: /* TEXTRC */
1942 if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1943 return 1;
1944 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1945 switch ((insn >> 22) & 3) {
1946 case 0:
1947 tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1948 break;
1949 case 1:
1950 tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1951 break;
1952 case 2:
1953 tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1954 break;
1956 tcg_gen_shli_i32(tmp, tmp, 28);
1957 gen_set_nzcv(tmp);
1958 tcg_temp_free_i32(tmp);
1959 break;
1960 case 0x401: case 0x405: case 0x409: case 0x40d: /* TBCST */
1961 if (((insn >> 6) & 3) == 3)
1962 return 1;
1963 rd = (insn >> 12) & 0xf;
1964 wrd = (insn >> 16) & 0xf;
1965 tmp = load_reg(s, rd);
1966 switch ((insn >> 6) & 3) {
1967 case 0:
1968 gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1969 break;
1970 case 1:
1971 gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1972 break;
1973 case 2:
1974 gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1975 break;
1977 tcg_temp_free_i32(tmp);
1978 gen_op_iwmmxt_movq_wRn_M0(wrd);
1979 gen_op_iwmmxt_set_mup();
1980 break;
1981 case 0x113: case 0x513: case 0x913: case 0xd13: /* TANDC */
1982 if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1983 return 1;
1984 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1985 tmp2 = tcg_temp_new_i32();
1986 tcg_gen_mov_i32(tmp2, tmp);
1987 switch ((insn >> 22) & 3) {
1988 case 0:
1989 for (i = 0; i < 7; i ++) {
1990 tcg_gen_shli_i32(tmp2, tmp2, 4);
1991 tcg_gen_and_i32(tmp, tmp, tmp2);
1993 break;
1994 case 1:
1995 for (i = 0; i < 3; i ++) {
1996 tcg_gen_shli_i32(tmp2, tmp2, 8);
1997 tcg_gen_and_i32(tmp, tmp, tmp2);
1999 break;
2000 case 2:
2001 tcg_gen_shli_i32(tmp2, tmp2, 16);
2002 tcg_gen_and_i32(tmp, tmp, tmp2);
2003 break;
2005 gen_set_nzcv(tmp);
2006 tcg_temp_free_i32(tmp2);
2007 tcg_temp_free_i32(tmp);
2008 break;
2009 case 0x01c: case 0x41c: case 0x81c: case 0xc1c: /* WACC */
2010 wrd = (insn >> 12) & 0xf;
2011 rd0 = (insn >> 16) & 0xf;
2012 gen_op_iwmmxt_movq_M0_wRn(rd0);
2013 switch ((insn >> 22) & 3) {
2014 case 0:
2015 gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
2016 break;
2017 case 1:
2018 gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2019 break;
2020 case 2:
2021 gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2022 break;
2023 case 3:
2024 return 1;
2026 gen_op_iwmmxt_movq_wRn_M0(wrd);
2027 gen_op_iwmmxt_set_mup();
2028 break;
2029 case 0x115: case 0x515: case 0x915: case 0xd15: /* TORC */
2030 if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2031 return 1;
2032 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2033 tmp2 = tcg_temp_new_i32();
2034 tcg_gen_mov_i32(tmp2, tmp);
2035 switch ((insn >> 22) & 3) {
2036 case 0:
2037 for (i = 0; i < 7; i ++) {
2038 tcg_gen_shli_i32(tmp2, tmp2, 4);
2039 tcg_gen_or_i32(tmp, tmp, tmp2);
2041 break;
2042 case 1:
2043 for (i = 0; i < 3; i ++) {
2044 tcg_gen_shli_i32(tmp2, tmp2, 8);
2045 tcg_gen_or_i32(tmp, tmp, tmp2);
2047 break;
2048 case 2:
2049 tcg_gen_shli_i32(tmp2, tmp2, 16);
2050 tcg_gen_or_i32(tmp, tmp, tmp2);
2051 break;
2053 gen_set_nzcv(tmp);
2054 tcg_temp_free_i32(tmp2);
2055 tcg_temp_free_i32(tmp);
2056 break;
2057 case 0x103: case 0x503: case 0x903: case 0xd03: /* TMOVMSK */
2058 rd = (insn >> 12) & 0xf;
2059 rd0 = (insn >> 16) & 0xf;
2060 if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2061 return 1;
2062 gen_op_iwmmxt_movq_M0_wRn(rd0);
2063 tmp = tcg_temp_new_i32();
2064 switch ((insn >> 22) & 3) {
2065 case 0:
2066 gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2067 break;
2068 case 1:
2069 gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2070 break;
2071 case 2:
2072 gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2073 break;
2075 store_reg(s, rd, tmp);
2076 break;
2077 case 0x106: case 0x306: case 0x506: case 0x706: /* WCMPGT */
2078 case 0x906: case 0xb06: case 0xd06: case 0xf06:
2079 wrd = (insn >> 12) & 0xf;
2080 rd0 = (insn >> 16) & 0xf;
2081 rd1 = (insn >> 0) & 0xf;
2082 gen_op_iwmmxt_movq_M0_wRn(rd0);
2083 switch ((insn >> 22) & 3) {
2084 case 0:
2085 if (insn & (1 << 21))
2086 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2087 else
2088 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2089 break;
2090 case 1:
2091 if (insn & (1 << 21))
2092 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2093 else
2094 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2095 break;
2096 case 2:
2097 if (insn & (1 << 21))
2098 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2099 else
2100 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2101 break;
2102 case 3:
2103 return 1;
2105 gen_op_iwmmxt_movq_wRn_M0(wrd);
2106 gen_op_iwmmxt_set_mup();
2107 gen_op_iwmmxt_set_cup();
2108 break;
2109 case 0x00e: case 0x20e: case 0x40e: case 0x60e: /* WUNPCKEL */
2110 case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2111 wrd = (insn >> 12) & 0xf;
2112 rd0 = (insn >> 16) & 0xf;
2113 gen_op_iwmmxt_movq_M0_wRn(rd0);
2114 switch ((insn >> 22) & 3) {
2115 case 0:
2116 if (insn & (1 << 21))
2117 gen_op_iwmmxt_unpacklsb_M0();
2118 else
2119 gen_op_iwmmxt_unpacklub_M0();
2120 break;
2121 case 1:
2122 if (insn & (1 << 21))
2123 gen_op_iwmmxt_unpacklsw_M0();
2124 else
2125 gen_op_iwmmxt_unpackluw_M0();
2126 break;
2127 case 2:
2128 if (insn & (1 << 21))
2129 gen_op_iwmmxt_unpacklsl_M0();
2130 else
2131 gen_op_iwmmxt_unpacklul_M0();
2132 break;
2133 case 3:
2134 return 1;
2136 gen_op_iwmmxt_movq_wRn_M0(wrd);
2137 gen_op_iwmmxt_set_mup();
2138 gen_op_iwmmxt_set_cup();
2139 break;
2140 case 0x00c: case 0x20c: case 0x40c: case 0x60c: /* WUNPCKEH */
2141 case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2142 wrd = (insn >> 12) & 0xf;
2143 rd0 = (insn >> 16) & 0xf;
2144 gen_op_iwmmxt_movq_M0_wRn(rd0);
2145 switch ((insn >> 22) & 3) {
2146 case 0:
2147 if (insn & (1 << 21))
2148 gen_op_iwmmxt_unpackhsb_M0();
2149 else
2150 gen_op_iwmmxt_unpackhub_M0();
2151 break;
2152 case 1:
2153 if (insn & (1 << 21))
2154 gen_op_iwmmxt_unpackhsw_M0();
2155 else
2156 gen_op_iwmmxt_unpackhuw_M0();
2157 break;
2158 case 2:
2159 if (insn & (1 << 21))
2160 gen_op_iwmmxt_unpackhsl_M0();
2161 else
2162 gen_op_iwmmxt_unpackhul_M0();
2163 break;
2164 case 3:
2165 return 1;
2167 gen_op_iwmmxt_movq_wRn_M0(wrd);
2168 gen_op_iwmmxt_set_mup();
2169 gen_op_iwmmxt_set_cup();
2170 break;
2171 case 0x204: case 0x604: case 0xa04: case 0xe04: /* WSRL */
2172 case 0x214: case 0x614: case 0xa14: case 0xe14:
2173 if (((insn >> 22) & 3) == 0)
2174 return 1;
2175 wrd = (insn >> 12) & 0xf;
2176 rd0 = (insn >> 16) & 0xf;
2177 gen_op_iwmmxt_movq_M0_wRn(rd0);
2178 tmp = tcg_temp_new_i32();
2179 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2180 tcg_temp_free_i32(tmp);
2181 return 1;
2183 switch ((insn >> 22) & 3) {
2184 case 1:
2185 gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2186 break;
2187 case 2:
2188 gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2189 break;
2190 case 3:
2191 gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2192 break;
2194 tcg_temp_free_i32(tmp);
2195 gen_op_iwmmxt_movq_wRn_M0(wrd);
2196 gen_op_iwmmxt_set_mup();
2197 gen_op_iwmmxt_set_cup();
2198 break;
2199 case 0x004: case 0x404: case 0x804: case 0xc04: /* WSRA */
2200 case 0x014: case 0x414: case 0x814: case 0xc14:
2201 if (((insn >> 22) & 3) == 0)
2202 return 1;
2203 wrd = (insn >> 12) & 0xf;
2204 rd0 = (insn >> 16) & 0xf;
2205 gen_op_iwmmxt_movq_M0_wRn(rd0);
2206 tmp = tcg_temp_new_i32();
2207 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2208 tcg_temp_free_i32(tmp);
2209 return 1;
2211 switch ((insn >> 22) & 3) {
2212 case 1:
2213 gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2214 break;
2215 case 2:
2216 gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2217 break;
2218 case 3:
2219 gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2220 break;
2222 tcg_temp_free_i32(tmp);
2223 gen_op_iwmmxt_movq_wRn_M0(wrd);
2224 gen_op_iwmmxt_set_mup();
2225 gen_op_iwmmxt_set_cup();
2226 break;
2227 case 0x104: case 0x504: case 0x904: case 0xd04: /* WSLL */
2228 case 0x114: case 0x514: case 0x914: case 0xd14:
2229 if (((insn >> 22) & 3) == 0)
2230 return 1;
2231 wrd = (insn >> 12) & 0xf;
2232 rd0 = (insn >> 16) & 0xf;
2233 gen_op_iwmmxt_movq_M0_wRn(rd0);
2234 tmp = tcg_temp_new_i32();
2235 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2236 tcg_temp_free_i32(tmp);
2237 return 1;
2239 switch ((insn >> 22) & 3) {
2240 case 1:
2241 gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2242 break;
2243 case 2:
2244 gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2245 break;
2246 case 3:
2247 gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2248 break;
2250 tcg_temp_free_i32(tmp);
2251 gen_op_iwmmxt_movq_wRn_M0(wrd);
2252 gen_op_iwmmxt_set_mup();
2253 gen_op_iwmmxt_set_cup();
2254 break;
2255 case 0x304: case 0x704: case 0xb04: case 0xf04: /* WROR */
2256 case 0x314: case 0x714: case 0xb14: case 0xf14:
2257 if (((insn >> 22) & 3) == 0)
2258 return 1;
2259 wrd = (insn >> 12) & 0xf;
2260 rd0 = (insn >> 16) & 0xf;
2261 gen_op_iwmmxt_movq_M0_wRn(rd0);
2262 tmp = tcg_temp_new_i32();
2263 switch ((insn >> 22) & 3) {
2264 case 1:
2265 if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2266 tcg_temp_free_i32(tmp);
2267 return 1;
2269 gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2270 break;
2271 case 2:
2272 if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2273 tcg_temp_free_i32(tmp);
2274 return 1;
2276 gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2277 break;
2278 case 3:
2279 if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2280 tcg_temp_free_i32(tmp);
2281 return 1;
2283 gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2284 break;
2286 tcg_temp_free_i32(tmp);
2287 gen_op_iwmmxt_movq_wRn_M0(wrd);
2288 gen_op_iwmmxt_set_mup();
2289 gen_op_iwmmxt_set_cup();
2290 break;
2291 case 0x116: case 0x316: case 0x516: case 0x716: /* WMIN */
2292 case 0x916: case 0xb16: case 0xd16: case 0xf16:
2293 wrd = (insn >> 12) & 0xf;
2294 rd0 = (insn >> 16) & 0xf;
2295 rd1 = (insn >> 0) & 0xf;
2296 gen_op_iwmmxt_movq_M0_wRn(rd0);
2297 switch ((insn >> 22) & 3) {
2298 case 0:
2299 if (insn & (1 << 21))
2300 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2301 else
2302 gen_op_iwmmxt_minub_M0_wRn(rd1);
2303 break;
2304 case 1:
2305 if (insn & (1 << 21))
2306 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2307 else
2308 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2309 break;
2310 case 2:
2311 if (insn & (1 << 21))
2312 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2313 else
2314 gen_op_iwmmxt_minul_M0_wRn(rd1);
2315 break;
2316 case 3:
2317 return 1;
2319 gen_op_iwmmxt_movq_wRn_M0(wrd);
2320 gen_op_iwmmxt_set_mup();
2321 break;
2322 case 0x016: case 0x216: case 0x416: case 0x616: /* WMAX */
2323 case 0x816: case 0xa16: case 0xc16: case 0xe16:
2324 wrd = (insn >> 12) & 0xf;
2325 rd0 = (insn >> 16) & 0xf;
2326 rd1 = (insn >> 0) & 0xf;
2327 gen_op_iwmmxt_movq_M0_wRn(rd0);
2328 switch ((insn >> 22) & 3) {
2329 case 0:
2330 if (insn & (1 << 21))
2331 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2332 else
2333 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2334 break;
2335 case 1:
2336 if (insn & (1 << 21))
2337 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2338 else
2339 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2340 break;
2341 case 2:
2342 if (insn & (1 << 21))
2343 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2344 else
2345 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2346 break;
2347 case 3:
2348 return 1;
2350 gen_op_iwmmxt_movq_wRn_M0(wrd);
2351 gen_op_iwmmxt_set_mup();
2352 break;
2353 case 0x002: case 0x102: case 0x202: case 0x302: /* WALIGNI */
2354 case 0x402: case 0x502: case 0x602: case 0x702:
2355 wrd = (insn >> 12) & 0xf;
2356 rd0 = (insn >> 16) & 0xf;
2357 rd1 = (insn >> 0) & 0xf;
2358 gen_op_iwmmxt_movq_M0_wRn(rd0);
2359 tmp = tcg_const_i32((insn >> 20) & 3);
2360 iwmmxt_load_reg(cpu_V1, rd1);
2361 gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2362 tcg_temp_free_i32(tmp);
2363 gen_op_iwmmxt_movq_wRn_M0(wrd);
2364 gen_op_iwmmxt_set_mup();
2365 break;
2366 case 0x01a: case 0x11a: case 0x21a: case 0x31a: /* WSUB */
2367 case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2368 case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2369 case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2370 wrd = (insn >> 12) & 0xf;
2371 rd0 = (insn >> 16) & 0xf;
2372 rd1 = (insn >> 0) & 0xf;
2373 gen_op_iwmmxt_movq_M0_wRn(rd0);
2374 switch ((insn >> 20) & 0xf) {
2375 case 0x0:
2376 gen_op_iwmmxt_subnb_M0_wRn(rd1);
2377 break;
2378 case 0x1:
2379 gen_op_iwmmxt_subub_M0_wRn(rd1);
2380 break;
2381 case 0x3:
2382 gen_op_iwmmxt_subsb_M0_wRn(rd1);
2383 break;
2384 case 0x4:
2385 gen_op_iwmmxt_subnw_M0_wRn(rd1);
2386 break;
2387 case 0x5:
2388 gen_op_iwmmxt_subuw_M0_wRn(rd1);
2389 break;
2390 case 0x7:
2391 gen_op_iwmmxt_subsw_M0_wRn(rd1);
2392 break;
2393 case 0x8:
2394 gen_op_iwmmxt_subnl_M0_wRn(rd1);
2395 break;
2396 case 0x9:
2397 gen_op_iwmmxt_subul_M0_wRn(rd1);
2398 break;
2399 case 0xb:
2400 gen_op_iwmmxt_subsl_M0_wRn(rd1);
2401 break;
2402 default:
2403 return 1;
2405 gen_op_iwmmxt_movq_wRn_M0(wrd);
2406 gen_op_iwmmxt_set_mup();
2407 gen_op_iwmmxt_set_cup();
2408 break;
2409 case 0x01e: case 0x11e: case 0x21e: case 0x31e: /* WSHUFH */
2410 case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2411 case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2412 case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2413 wrd = (insn >> 12) & 0xf;
2414 rd0 = (insn >> 16) & 0xf;
2415 gen_op_iwmmxt_movq_M0_wRn(rd0);
2416 tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2417 gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2418 tcg_temp_free_i32(tmp);
2419 gen_op_iwmmxt_movq_wRn_M0(wrd);
2420 gen_op_iwmmxt_set_mup();
2421 gen_op_iwmmxt_set_cup();
2422 break;
2423 case 0x018: case 0x118: case 0x218: case 0x318: /* WADD */
2424 case 0x418: case 0x518: case 0x618: case 0x718:
2425 case 0x818: case 0x918: case 0xa18: case 0xb18:
2426 case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2427 wrd = (insn >> 12) & 0xf;
2428 rd0 = (insn >> 16) & 0xf;
2429 rd1 = (insn >> 0) & 0xf;
2430 gen_op_iwmmxt_movq_M0_wRn(rd0);
2431 switch ((insn >> 20) & 0xf) {
2432 case 0x0:
2433 gen_op_iwmmxt_addnb_M0_wRn(rd1);
2434 break;
2435 case 0x1:
2436 gen_op_iwmmxt_addub_M0_wRn(rd1);
2437 break;
2438 case 0x3:
2439 gen_op_iwmmxt_addsb_M0_wRn(rd1);
2440 break;
2441 case 0x4:
2442 gen_op_iwmmxt_addnw_M0_wRn(rd1);
2443 break;
2444 case 0x5:
2445 gen_op_iwmmxt_adduw_M0_wRn(rd1);
2446 break;
2447 case 0x7:
2448 gen_op_iwmmxt_addsw_M0_wRn(rd1);
2449 break;
2450 case 0x8:
2451 gen_op_iwmmxt_addnl_M0_wRn(rd1);
2452 break;
2453 case 0x9:
2454 gen_op_iwmmxt_addul_M0_wRn(rd1);
2455 break;
2456 case 0xb:
2457 gen_op_iwmmxt_addsl_M0_wRn(rd1);
2458 break;
2459 default:
2460 return 1;
2462 gen_op_iwmmxt_movq_wRn_M0(wrd);
2463 gen_op_iwmmxt_set_mup();
2464 gen_op_iwmmxt_set_cup();
2465 break;
2466 case 0x008: case 0x108: case 0x208: case 0x308: /* WPACK */
2467 case 0x408: case 0x508: case 0x608: case 0x708:
2468 case 0x808: case 0x908: case 0xa08: case 0xb08:
2469 case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2470 if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2471 return 1;
2472 wrd = (insn >> 12) & 0xf;
2473 rd0 = (insn >> 16) & 0xf;
2474 rd1 = (insn >> 0) & 0xf;
2475 gen_op_iwmmxt_movq_M0_wRn(rd0);
2476 switch ((insn >> 22) & 3) {
2477 case 1:
2478 if (insn & (1 << 21))
2479 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2480 else
2481 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2482 break;
2483 case 2:
2484 if (insn & (1 << 21))
2485 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2486 else
2487 gen_op_iwmmxt_packul_M0_wRn(rd1);
2488 break;
2489 case 3:
2490 if (insn & (1 << 21))
2491 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2492 else
2493 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2494 break;
2496 gen_op_iwmmxt_movq_wRn_M0(wrd);
2497 gen_op_iwmmxt_set_mup();
2498 gen_op_iwmmxt_set_cup();
2499 break;
2500 case 0x201: case 0x203: case 0x205: case 0x207:
2501 case 0x209: case 0x20b: case 0x20d: case 0x20f:
2502 case 0x211: case 0x213: case 0x215: case 0x217:
2503 case 0x219: case 0x21b: case 0x21d: case 0x21f:
2504 wrd = (insn >> 5) & 0xf;
2505 rd0 = (insn >> 12) & 0xf;
2506 rd1 = (insn >> 0) & 0xf;
2507 if (rd0 == 0xf || rd1 == 0xf)
2508 return 1;
2509 gen_op_iwmmxt_movq_M0_wRn(wrd);
2510 tmp = load_reg(s, rd0);
2511 tmp2 = load_reg(s, rd1);
2512 switch ((insn >> 16) & 0xf) {
2513 case 0x0: /* TMIA */
2514 gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2515 break;
2516 case 0x8: /* TMIAPH */
2517 gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2518 break;
2519 case 0xc: case 0xd: case 0xe: case 0xf: /* TMIAxy */
2520 if (insn & (1 << 16))
2521 tcg_gen_shri_i32(tmp, tmp, 16);
2522 if (insn & (1 << 17))
2523 tcg_gen_shri_i32(tmp2, tmp2, 16);
2524 gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2525 break;
2526 default:
2527 tcg_temp_free_i32(tmp2);
2528 tcg_temp_free_i32(tmp);
2529 return 1;
2531 tcg_temp_free_i32(tmp2);
2532 tcg_temp_free_i32(tmp);
2533 gen_op_iwmmxt_movq_wRn_M0(wrd);
2534 gen_op_iwmmxt_set_mup();
2535 break;
2536 default:
2537 return 1;
2540 return 0;
2543 /* Disassemble an XScale DSP instruction. Returns nonzero if an error occurred
2544 (ie. an undefined instruction). */
2545 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2547 int acc, rd0, rd1, rdhi, rdlo;
2548 TCGv_i32 tmp, tmp2;
2550 if ((insn & 0x0ff00f10) == 0x0e200010) {
2551 /* Multiply with Internal Accumulate Format */
2552 rd0 = (insn >> 12) & 0xf;
2553 rd1 = insn & 0xf;
2554 acc = (insn >> 5) & 7;
2556 if (acc != 0)
2557 return 1;
2559 tmp = load_reg(s, rd0);
2560 tmp2 = load_reg(s, rd1);
2561 switch ((insn >> 16) & 0xf) {
2562 case 0x0: /* MIA */
2563 gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2564 break;
2565 case 0x8: /* MIAPH */
2566 gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2567 break;
2568 case 0xc: /* MIABB */
2569 case 0xd: /* MIABT */
2570 case 0xe: /* MIATB */
2571 case 0xf: /* MIATT */
2572 if (insn & (1 << 16))
2573 tcg_gen_shri_i32(tmp, tmp, 16);
2574 if (insn & (1 << 17))
2575 tcg_gen_shri_i32(tmp2, tmp2, 16);
2576 gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2577 break;
2578 default:
2579 return 1;
2581 tcg_temp_free_i32(tmp2);
2582 tcg_temp_free_i32(tmp);
2584 gen_op_iwmmxt_movq_wRn_M0(acc);
2585 return 0;
2588 if ((insn & 0x0fe00ff8) == 0x0c400000) {
2589 /* Internal Accumulator Access Format */
2590 rdhi = (insn >> 16) & 0xf;
2591 rdlo = (insn >> 12) & 0xf;
2592 acc = insn & 7;
2594 if (acc != 0)
2595 return 1;
2597 if (insn & ARM_CP_RW_BIT) { /* MRA */
2598 iwmmxt_load_reg(cpu_V0, acc);
2599 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2600 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2601 tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2602 } else { /* MAR */
2603 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2604 iwmmxt_store_reg(cpu_V0, acc);
2606 return 0;
2609 return 1;
2612 #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2613 #define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2614 if (dc_isar_feature(aa32_simd_r32, s)) { \
2615 reg = (((insn) >> (bigbit)) & 0x0f) \
2616 | (((insn) >> ((smallbit) - 4)) & 0x10); \
2617 } else { \
2618 if (insn & (1 << (smallbit))) \
2619 return 1; \
2620 reg = ((insn) >> (bigbit)) & 0x0f; \
2621 }} while (0)
2623 #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2624 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7)
2625 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5)
2627 static void gen_neon_dup_low16(TCGv_i32 var)
2629 TCGv_i32 tmp = tcg_temp_new_i32();
2630 tcg_gen_ext16u_i32(var, var);
2631 tcg_gen_shli_i32(tmp, var, 16);
2632 tcg_gen_or_i32(var, var, tmp);
2633 tcg_temp_free_i32(tmp);
2636 static void gen_neon_dup_high16(TCGv_i32 var)
2638 TCGv_i32 tmp = tcg_temp_new_i32();
2639 tcg_gen_andi_i32(var, var, 0xffff0000);
2640 tcg_gen_shri_i32(tmp, var, 16);
2641 tcg_gen_or_i32(var, var, tmp);
2642 tcg_temp_free_i32(tmp);
2645 static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
2647 #ifndef CONFIG_USER_ONLY
2648 return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
2649 ((s->base.pc_next - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
2650 #else
2651 return true;
2652 #endif
2655 static void gen_goto_ptr(void)
2657 tcg_gen_lookup_and_goto_ptr();
2660 /* This will end the TB but doesn't guarantee we'll return to
2661 * cpu_loop_exec. Any live exit_requests will be processed as we
2662 * enter the next TB.
2664 static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
2666 if (use_goto_tb(s, dest)) {
2667 tcg_gen_goto_tb(n);
2668 gen_set_pc_im(s, dest);
2669 tcg_gen_exit_tb(s->base.tb, n);
2670 } else {
2671 gen_set_pc_im(s, dest);
2672 gen_goto_ptr();
2674 s->base.is_jmp = DISAS_NORETURN;
2677 static inline void gen_jmp (DisasContext *s, uint32_t dest)
2679 if (unlikely(is_singlestepping(s))) {
2680 /* An indirect jump so that we still trigger the debug exception. */
2681 gen_set_pc_im(s, dest);
2682 s->base.is_jmp = DISAS_JUMP;
2683 } else {
2684 gen_goto_tb(s, 0, dest);
2688 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2690 if (x)
2691 tcg_gen_sari_i32(t0, t0, 16);
2692 else
2693 gen_sxth(t0);
2694 if (y)
2695 tcg_gen_sari_i32(t1, t1, 16);
2696 else
2697 gen_sxth(t1);
2698 tcg_gen_mul_i32(t0, t0, t1);
2701 /* Return the mask of PSR bits set by a MSR instruction. */
2702 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2704 uint32_t mask = 0;
2706 if (flags & (1 << 0)) {
2707 mask |= 0xff;
2709 if (flags & (1 << 1)) {
2710 mask |= 0xff00;
2712 if (flags & (1 << 2)) {
2713 mask |= 0xff0000;
2715 if (flags & (1 << 3)) {
2716 mask |= 0xff000000;
2719 /* Mask out undefined and reserved bits. */
2720 mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2722 /* Mask out execution state. */
2723 if (!spsr) {
2724 mask &= ~CPSR_EXEC;
2727 /* Mask out privileged bits. */
2728 if (IS_USER(s)) {
2729 mask &= CPSR_USER;
2731 return mask;
2734 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2735 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2737 TCGv_i32 tmp;
2738 if (spsr) {
2739 /* ??? This is also undefined in system mode. */
2740 if (IS_USER(s))
2741 return 1;
2743 tmp = load_cpu_field(spsr);
2744 tcg_gen_andi_i32(tmp, tmp, ~mask);
2745 tcg_gen_andi_i32(t0, t0, mask);
2746 tcg_gen_or_i32(tmp, tmp, t0);
2747 store_cpu_field(tmp, spsr);
2748 } else {
2749 gen_set_cpsr(t0, mask);
2751 tcg_temp_free_i32(t0);
2752 gen_lookup_tb(s);
2753 return 0;
2756 /* Returns nonzero if access to the PSR is not permitted. */
2757 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2759 TCGv_i32 tmp;
2760 tmp = tcg_temp_new_i32();
2761 tcg_gen_movi_i32(tmp, val);
2762 return gen_set_psr(s, mask, spsr, tmp);
2765 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2766 int *tgtmode, int *regno)
2768 /* Decode the r and sysm fields of MSR/MRS banked accesses into
2769 * the target mode and register number, and identify the various
2770 * unpredictable cases.
2771 * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2772 * + executed in user mode
2773 * + using R15 as the src/dest register
2774 * + accessing an unimplemented register
2775 * + accessing a register that's inaccessible at current PL/security state*
2776 * + accessing a register that you could access with a different insn
2777 * We choose to UNDEF in all these cases.
2778 * Since we don't know which of the various AArch32 modes we are in
2779 * we have to defer some checks to runtime.
2780 * Accesses to Monitor mode registers from Secure EL1 (which implies
2781 * that EL3 is AArch64) must trap to EL3.
2783 * If the access checks fail this function will emit code to take
2784 * an exception and return false. Otherwise it will return true,
2785 * and set *tgtmode and *regno appropriately.
2787 int exc_target = default_exception_el(s);
2789 /* These instructions are present only in ARMv8, or in ARMv7 with the
2790 * Virtualization Extensions.
2792 if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2793 !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2794 goto undef;
2797 if (IS_USER(s) || rn == 15) {
2798 goto undef;
2801 /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2802 * of registers into (r, sysm).
2804 if (r) {
2805 /* SPSRs for other modes */
2806 switch (sysm) {
2807 case 0xe: /* SPSR_fiq */
2808 *tgtmode = ARM_CPU_MODE_FIQ;
2809 break;
2810 case 0x10: /* SPSR_irq */
2811 *tgtmode = ARM_CPU_MODE_IRQ;
2812 break;
2813 case 0x12: /* SPSR_svc */
2814 *tgtmode = ARM_CPU_MODE_SVC;
2815 break;
2816 case 0x14: /* SPSR_abt */
2817 *tgtmode = ARM_CPU_MODE_ABT;
2818 break;
2819 case 0x16: /* SPSR_und */
2820 *tgtmode = ARM_CPU_MODE_UND;
2821 break;
2822 case 0x1c: /* SPSR_mon */
2823 *tgtmode = ARM_CPU_MODE_MON;
2824 break;
2825 case 0x1e: /* SPSR_hyp */
2826 *tgtmode = ARM_CPU_MODE_HYP;
2827 break;
2828 default: /* unallocated */
2829 goto undef;
2831 /* We arbitrarily assign SPSR a register number of 16. */
2832 *regno = 16;
2833 } else {
2834 /* general purpose registers for other modes */
2835 switch (sysm) {
2836 case 0x0 ... 0x6: /* 0b00xxx : r8_usr ... r14_usr */
2837 *tgtmode = ARM_CPU_MODE_USR;
2838 *regno = sysm + 8;
2839 break;
2840 case 0x8 ... 0xe: /* 0b01xxx : r8_fiq ... r14_fiq */
2841 *tgtmode = ARM_CPU_MODE_FIQ;
2842 *regno = sysm;
2843 break;
2844 case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2845 *tgtmode = ARM_CPU_MODE_IRQ;
2846 *regno = sysm & 1 ? 13 : 14;
2847 break;
2848 case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2849 *tgtmode = ARM_CPU_MODE_SVC;
2850 *regno = sysm & 1 ? 13 : 14;
2851 break;
2852 case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2853 *tgtmode = ARM_CPU_MODE_ABT;
2854 *regno = sysm & 1 ? 13 : 14;
2855 break;
2856 case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2857 *tgtmode = ARM_CPU_MODE_UND;
2858 *regno = sysm & 1 ? 13 : 14;
2859 break;
2860 case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2861 *tgtmode = ARM_CPU_MODE_MON;
2862 *regno = sysm & 1 ? 13 : 14;
2863 break;
2864 case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2865 *tgtmode = ARM_CPU_MODE_HYP;
2866 /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2867 *regno = sysm & 1 ? 13 : 17;
2868 break;
2869 default: /* unallocated */
2870 goto undef;
2874 /* Catch the 'accessing inaccessible register' cases we can detect
2875 * at translate time.
2877 switch (*tgtmode) {
2878 case ARM_CPU_MODE_MON:
2879 if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2880 goto undef;
2882 if (s->current_el == 1) {
2883 /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2884 * then accesses to Mon registers trap to EL3
2886 exc_target = 3;
2887 goto undef;
2889 break;
2890 case ARM_CPU_MODE_HYP:
2892 * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2893 * (and so we can forbid accesses from EL2 or below). elr_hyp
2894 * can be accessed also from Hyp mode, so forbid accesses from
2895 * EL0 or EL1.
2897 if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2898 (s->current_el < 3 && *regno != 17)) {
2899 goto undef;
2901 break;
2902 default:
2903 break;
2906 return true;
2908 undef:
2909 /* If we get here then some access check did not pass */
2910 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
2911 syn_uncategorized(), exc_target);
2912 return false;
2915 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2917 TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2918 int tgtmode = 0, regno = 0;
2920 if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2921 return;
2924 /* Sync state because msr_banked() can raise exceptions */
2925 gen_set_condexec(s);
2926 gen_set_pc_im(s, s->pc_curr);
2927 tcg_reg = load_reg(s, rn);
2928 tcg_tgtmode = tcg_const_i32(tgtmode);
2929 tcg_regno = tcg_const_i32(regno);
2930 gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
2931 tcg_temp_free_i32(tcg_tgtmode);
2932 tcg_temp_free_i32(tcg_regno);
2933 tcg_temp_free_i32(tcg_reg);
2934 s->base.is_jmp = DISAS_UPDATE;
2937 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2939 TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2940 int tgtmode = 0, regno = 0;
2942 if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2943 return;
2946 /* Sync state because mrs_banked() can raise exceptions */
2947 gen_set_condexec(s);
2948 gen_set_pc_im(s, s->pc_curr);
2949 tcg_reg = tcg_temp_new_i32();
2950 tcg_tgtmode = tcg_const_i32(tgtmode);
2951 tcg_regno = tcg_const_i32(regno);
2952 gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
2953 tcg_temp_free_i32(tcg_tgtmode);
2954 tcg_temp_free_i32(tcg_regno);
2955 store_reg(s, rn, tcg_reg);
2956 s->base.is_jmp = DISAS_UPDATE;
2959 /* Store value to PC as for an exception return (ie don't
2960 * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2961 * will do the masking based on the new value of the Thumb bit.
2963 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2965 tcg_gen_mov_i32(cpu_R[15], pc);
2966 tcg_temp_free_i32(pc);
2969 /* Generate a v6 exception return. Marks both values as dead. */
2970 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2972 store_pc_exc_ret(s, pc);
2973 /* The cpsr_write_eret helper will mask the low bits of PC
2974 * appropriately depending on the new Thumb bit, so it must
2975 * be called after storing the new PC.
2977 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2978 gen_io_start();
2980 gen_helper_cpsr_write_eret(cpu_env, cpsr);
2981 tcg_temp_free_i32(cpsr);
2982 /* Must exit loop to check un-masked IRQs */
2983 s->base.is_jmp = DISAS_EXIT;
2986 /* Generate an old-style exception return. Marks pc as dead. */
2987 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2989 gen_rfe(s, pc, load_cpu_field(spsr));
2992 #define CPU_V001 cpu_V0, cpu_V0, cpu_V1
2994 static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
2996 switch (size) {
2997 case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
2998 case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
2999 case 2: tcg_gen_add_i32(t0, t0, t1); break;
3000 default: abort();
3004 static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
3006 switch (size) {
3007 case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
3008 case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
3009 case 2: tcg_gen_sub_i32(t0, t1, t0); break;
3010 default: return;
3014 #define GEN_NEON_INTEGER_OP_ENV(name) do { \
3015 switch ((size << 1) | u) { \
3016 case 0: \
3017 gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
3018 break; \
3019 case 1: \
3020 gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
3021 break; \
3022 case 2: \
3023 gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
3024 break; \
3025 case 3: \
3026 gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
3027 break; \
3028 case 4: \
3029 gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
3030 break; \
3031 case 5: \
3032 gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
3033 break; \
3034 default: return 1; \
3035 }} while (0)
3037 static TCGv_i32 neon_load_scratch(int scratch)
3039 TCGv_i32 tmp = tcg_temp_new_i32();
3040 tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3041 return tmp;
3044 static void neon_store_scratch(int scratch, TCGv_i32 var)
3046 tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3047 tcg_temp_free_i32(var);
3050 static inline TCGv_i32 neon_get_scalar(int size, int reg)
3052 TCGv_i32 tmp;
3053 if (size == 1) {
3054 tmp = neon_load_reg(reg & 7, reg >> 4);
3055 if (reg & 8) {
3056 gen_neon_dup_high16(tmp);
3057 } else {
3058 gen_neon_dup_low16(tmp);
3060 } else {
3061 tmp = neon_load_reg(reg & 15, reg >> 4);
3063 return tmp;
3066 static int gen_neon_unzip(int rd, int rm, int size, int q)
3068 TCGv_ptr pd, pm;
3070 if (!q && size == 2) {
3071 return 1;
3073 pd = vfp_reg_ptr(true, rd);
3074 pm = vfp_reg_ptr(true, rm);
3075 if (q) {
3076 switch (size) {
3077 case 0:
3078 gen_helper_neon_qunzip8(pd, pm);
3079 break;
3080 case 1:
3081 gen_helper_neon_qunzip16(pd, pm);
3082 break;
3083 case 2:
3084 gen_helper_neon_qunzip32(pd, pm);
3085 break;
3086 default:
3087 abort();
3089 } else {
3090 switch (size) {
3091 case 0:
3092 gen_helper_neon_unzip8(pd, pm);
3093 break;
3094 case 1:
3095 gen_helper_neon_unzip16(pd, pm);
3096 break;
3097 default:
3098 abort();
3101 tcg_temp_free_ptr(pd);
3102 tcg_temp_free_ptr(pm);
3103 return 0;
3106 static int gen_neon_zip(int rd, int rm, int size, int q)
3108 TCGv_ptr pd, pm;
3110 if (!q && size == 2) {
3111 return 1;
3113 pd = vfp_reg_ptr(true, rd);
3114 pm = vfp_reg_ptr(true, rm);
3115 if (q) {
3116 switch (size) {
3117 case 0:
3118 gen_helper_neon_qzip8(pd, pm);
3119 break;
3120 case 1:
3121 gen_helper_neon_qzip16(pd, pm);
3122 break;
3123 case 2:
3124 gen_helper_neon_qzip32(pd, pm);
3125 break;
3126 default:
3127 abort();
3129 } else {
3130 switch (size) {
3131 case 0:
3132 gen_helper_neon_zip8(pd, pm);
3133 break;
3134 case 1:
3135 gen_helper_neon_zip16(pd, pm);
3136 break;
3137 default:
3138 abort();
3141 tcg_temp_free_ptr(pd);
3142 tcg_temp_free_ptr(pm);
3143 return 0;
3146 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3148 TCGv_i32 rd, tmp;
3150 rd = tcg_temp_new_i32();
3151 tmp = tcg_temp_new_i32();
3153 tcg_gen_shli_i32(rd, t0, 8);
3154 tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3155 tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3156 tcg_gen_or_i32(rd, rd, tmp);
3158 tcg_gen_shri_i32(t1, t1, 8);
3159 tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3160 tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3161 tcg_gen_or_i32(t1, t1, tmp);
3162 tcg_gen_mov_i32(t0, rd);
3164 tcg_temp_free_i32(tmp);
3165 tcg_temp_free_i32(rd);
3168 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3170 TCGv_i32 rd, tmp;
3172 rd = tcg_temp_new_i32();
3173 tmp = tcg_temp_new_i32();
3175 tcg_gen_shli_i32(rd, t0, 16);
3176 tcg_gen_andi_i32(tmp, t1, 0xffff);
3177 tcg_gen_or_i32(rd, rd, tmp);
3178 tcg_gen_shri_i32(t1, t1, 16);
3179 tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3180 tcg_gen_or_i32(t1, t1, tmp);
3181 tcg_gen_mov_i32(t0, rd);
3183 tcg_temp_free_i32(tmp);
3184 tcg_temp_free_i32(rd);
3187 static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
3189 switch (size) {
3190 case 0: gen_helper_neon_narrow_u8(dest, src); break;
3191 case 1: gen_helper_neon_narrow_u16(dest, src); break;
3192 case 2: tcg_gen_extrl_i64_i32(dest, src); break;
3193 default: abort();
3197 static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3199 switch (size) {
3200 case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
3201 case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
3202 case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
3203 default: abort();
3207 static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src)
3209 switch (size) {
3210 case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
3211 case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
3212 case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
3213 default: abort();
3217 static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3219 switch (size) {
3220 case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
3221 case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
3222 case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
3223 default: abort();
3227 static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift,
3228 int q, int u)
3230 if (q) {
3231 if (u) {
3232 switch (size) {
3233 case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
3234 case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
3235 default: abort();
3237 } else {
3238 switch (size) {
3239 case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
3240 case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
3241 default: abort();
3244 } else {
3245 if (u) {
3246 switch (size) {
3247 case 1: gen_helper_neon_shl_u16(var, var, shift); break;
3248 case 2: gen_ushl_i32(var, var, shift); break;
3249 default: abort();
3251 } else {
3252 switch (size) {
3253 case 1: gen_helper_neon_shl_s16(var, var, shift); break;
3254 case 2: gen_sshl_i32(var, var, shift); break;
3255 default: abort();
3261 static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
3263 if (u) {
3264 switch (size) {
3265 case 0: gen_helper_neon_widen_u8(dest, src); break;
3266 case 1: gen_helper_neon_widen_u16(dest, src); break;
3267 case 2: tcg_gen_extu_i32_i64(dest, src); break;
3268 default: abort();
3270 } else {
3271 switch (size) {
3272 case 0: gen_helper_neon_widen_s8(dest, src); break;
3273 case 1: gen_helper_neon_widen_s16(dest, src); break;
3274 case 2: tcg_gen_ext_i32_i64(dest, src); break;
3275 default: abort();
3278 tcg_temp_free_i32(src);
3281 static inline void gen_neon_addl(int size)
3283 switch (size) {
3284 case 0: gen_helper_neon_addl_u16(CPU_V001); break;
3285 case 1: gen_helper_neon_addl_u32(CPU_V001); break;
3286 case 2: tcg_gen_add_i64(CPU_V001); break;
3287 default: abort();
3291 static inline void gen_neon_subl(int size)
3293 switch (size) {
3294 case 0: gen_helper_neon_subl_u16(CPU_V001); break;
3295 case 1: gen_helper_neon_subl_u32(CPU_V001); break;
3296 case 2: tcg_gen_sub_i64(CPU_V001); break;
3297 default: abort();
3301 static inline void gen_neon_negl(TCGv_i64 var, int size)
3303 switch (size) {
3304 case 0: gen_helper_neon_negl_u16(var, var); break;
3305 case 1: gen_helper_neon_negl_u32(var, var); break;
3306 case 2:
3307 tcg_gen_neg_i64(var, var);
3308 break;
3309 default: abort();
3313 static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
3315 switch (size) {
3316 case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
3317 case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
3318 default: abort();
3322 static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
3323 int size, int u)
3325 TCGv_i64 tmp;
3327 switch ((size << 1) | u) {
3328 case 0: gen_helper_neon_mull_s8(dest, a, b); break;
3329 case 1: gen_helper_neon_mull_u8(dest, a, b); break;
3330 case 2: gen_helper_neon_mull_s16(dest, a, b); break;
3331 case 3: gen_helper_neon_mull_u16(dest, a, b); break;
3332 case 4:
3333 tmp = gen_muls_i64_i32(a, b);
3334 tcg_gen_mov_i64(dest, tmp);
3335 tcg_temp_free_i64(tmp);
3336 break;
3337 case 5:
3338 tmp = gen_mulu_i64_i32(a, b);
3339 tcg_gen_mov_i64(dest, tmp);
3340 tcg_temp_free_i64(tmp);
3341 break;
3342 default: abort();
3345 /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
3346 Don't forget to clean them now. */
3347 if (size < 2) {
3348 tcg_temp_free_i32(a);
3349 tcg_temp_free_i32(b);
3353 static void gen_neon_narrow_op(int op, int u, int size,
3354 TCGv_i32 dest, TCGv_i64 src)
3356 if (op) {
3357 if (u) {
3358 gen_neon_unarrow_sats(size, dest, src);
3359 } else {
3360 gen_neon_narrow(size, dest, src);
3362 } else {
3363 if (u) {
3364 gen_neon_narrow_satu(size, dest, src);
3365 } else {
3366 gen_neon_narrow_sats(size, dest, src);
3371 /* Symbolic constants for op fields for Neon 2-register miscellaneous.
3372 * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
3373 * table A7-13.
3375 #define NEON_2RM_VREV64 0
3376 #define NEON_2RM_VREV32 1
3377 #define NEON_2RM_VREV16 2
3378 #define NEON_2RM_VPADDL 4
3379 #define NEON_2RM_VPADDL_U 5
3380 #define NEON_2RM_AESE 6 /* Includes AESD */
3381 #define NEON_2RM_AESMC 7 /* Includes AESIMC */
3382 #define NEON_2RM_VCLS 8
3383 #define NEON_2RM_VCLZ 9
3384 #define NEON_2RM_VCNT 10
3385 #define NEON_2RM_VMVN 11
3386 #define NEON_2RM_VPADAL 12
3387 #define NEON_2RM_VPADAL_U 13
3388 #define NEON_2RM_VQABS 14
3389 #define NEON_2RM_VQNEG 15
3390 #define NEON_2RM_VCGT0 16
3391 #define NEON_2RM_VCGE0 17
3392 #define NEON_2RM_VCEQ0 18
3393 #define NEON_2RM_VCLE0 19
3394 #define NEON_2RM_VCLT0 20
3395 #define NEON_2RM_SHA1H 21
3396 #define NEON_2RM_VABS 22
3397 #define NEON_2RM_VNEG 23
3398 #define NEON_2RM_VCGT0_F 24
3399 #define NEON_2RM_VCGE0_F 25
3400 #define NEON_2RM_VCEQ0_F 26
3401 #define NEON_2RM_VCLE0_F 27
3402 #define NEON_2RM_VCLT0_F 28
3403 #define NEON_2RM_VABS_F 30
3404 #define NEON_2RM_VNEG_F 31
3405 #define NEON_2RM_VSWP 32
3406 #define NEON_2RM_VTRN 33
3407 #define NEON_2RM_VUZP 34
3408 #define NEON_2RM_VZIP 35
3409 #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
3410 #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
3411 #define NEON_2RM_VSHLL 38
3412 #define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
3413 #define NEON_2RM_VRINTN 40
3414 #define NEON_2RM_VRINTX 41
3415 #define NEON_2RM_VRINTA 42
3416 #define NEON_2RM_VRINTZ 43
3417 #define NEON_2RM_VCVT_F16_F32 44
3418 #define NEON_2RM_VRINTM 45
3419 #define NEON_2RM_VCVT_F32_F16 46
3420 #define NEON_2RM_VRINTP 47
3421 #define NEON_2RM_VCVTAU 48
3422 #define NEON_2RM_VCVTAS 49
3423 #define NEON_2RM_VCVTNU 50
3424 #define NEON_2RM_VCVTNS 51
3425 #define NEON_2RM_VCVTPU 52
3426 #define NEON_2RM_VCVTPS 53
3427 #define NEON_2RM_VCVTMU 54
3428 #define NEON_2RM_VCVTMS 55
3429 #define NEON_2RM_VRECPE 56
3430 #define NEON_2RM_VRSQRTE 57
3431 #define NEON_2RM_VRECPE_F 58
3432 #define NEON_2RM_VRSQRTE_F 59
3433 #define NEON_2RM_VCVT_FS 60
3434 #define NEON_2RM_VCVT_FU 61
3435 #define NEON_2RM_VCVT_SF 62
3436 #define NEON_2RM_VCVT_UF 63
3438 static bool neon_2rm_is_v8_op(int op)
3440 /* Return true if this neon 2reg-misc op is ARMv8 and up */
3441 switch (op) {
3442 case NEON_2RM_VRINTN:
3443 case NEON_2RM_VRINTA:
3444 case NEON_2RM_VRINTM:
3445 case NEON_2RM_VRINTP:
3446 case NEON_2RM_VRINTZ:
3447 case NEON_2RM_VRINTX:
3448 case NEON_2RM_VCVTAU:
3449 case NEON_2RM_VCVTAS:
3450 case NEON_2RM_VCVTNU:
3451 case NEON_2RM_VCVTNS:
3452 case NEON_2RM_VCVTPU:
3453 case NEON_2RM_VCVTPS:
3454 case NEON_2RM_VCVTMU:
3455 case NEON_2RM_VCVTMS:
3456 return true;
3457 default:
3458 return false;
3462 /* Each entry in this array has bit n set if the insn allows
3463 * size value n (otherwise it will UNDEF). Since unallocated
3464 * op values will have no bits set they always UNDEF.
3466 static const uint8_t neon_2rm_sizes[] = {
3467 [NEON_2RM_VREV64] = 0x7,
3468 [NEON_2RM_VREV32] = 0x3,
3469 [NEON_2RM_VREV16] = 0x1,
3470 [NEON_2RM_VPADDL] = 0x7,
3471 [NEON_2RM_VPADDL_U] = 0x7,
3472 [NEON_2RM_AESE] = 0x1,
3473 [NEON_2RM_AESMC] = 0x1,
3474 [NEON_2RM_VCLS] = 0x7,
3475 [NEON_2RM_VCLZ] = 0x7,
3476 [NEON_2RM_VCNT] = 0x1,
3477 [NEON_2RM_VMVN] = 0x1,
3478 [NEON_2RM_VPADAL] = 0x7,
3479 [NEON_2RM_VPADAL_U] = 0x7,
3480 [NEON_2RM_VQABS] = 0x7,
3481 [NEON_2RM_VQNEG] = 0x7,
3482 [NEON_2RM_VCGT0] = 0x7,
3483 [NEON_2RM_VCGE0] = 0x7,
3484 [NEON_2RM_VCEQ0] = 0x7,
3485 [NEON_2RM_VCLE0] = 0x7,
3486 [NEON_2RM_VCLT0] = 0x7,
3487 [NEON_2RM_SHA1H] = 0x4,
3488 [NEON_2RM_VABS] = 0x7,
3489 [NEON_2RM_VNEG] = 0x7,
3490 [NEON_2RM_VCGT0_F] = 0x4,
3491 [NEON_2RM_VCGE0_F] = 0x4,
3492 [NEON_2RM_VCEQ0_F] = 0x4,
3493 [NEON_2RM_VCLE0_F] = 0x4,
3494 [NEON_2RM_VCLT0_F] = 0x4,
3495 [NEON_2RM_VABS_F] = 0x4,
3496 [NEON_2RM_VNEG_F] = 0x4,
3497 [NEON_2RM_VSWP] = 0x1,
3498 [NEON_2RM_VTRN] = 0x7,
3499 [NEON_2RM_VUZP] = 0x7,
3500 [NEON_2RM_VZIP] = 0x7,
3501 [NEON_2RM_VMOVN] = 0x7,
3502 [NEON_2RM_VQMOVN] = 0x7,
3503 [NEON_2RM_VSHLL] = 0x7,
3504 [NEON_2RM_SHA1SU1] = 0x4,
3505 [NEON_2RM_VRINTN] = 0x4,
3506 [NEON_2RM_VRINTX] = 0x4,
3507 [NEON_2RM_VRINTA] = 0x4,
3508 [NEON_2RM_VRINTZ] = 0x4,
3509 [NEON_2RM_VCVT_F16_F32] = 0x2,
3510 [NEON_2RM_VRINTM] = 0x4,
3511 [NEON_2RM_VCVT_F32_F16] = 0x2,
3512 [NEON_2RM_VRINTP] = 0x4,
3513 [NEON_2RM_VCVTAU] = 0x4,
3514 [NEON_2RM_VCVTAS] = 0x4,
3515 [NEON_2RM_VCVTNU] = 0x4,
3516 [NEON_2RM_VCVTNS] = 0x4,
3517 [NEON_2RM_VCVTPU] = 0x4,
3518 [NEON_2RM_VCVTPS] = 0x4,
3519 [NEON_2RM_VCVTMU] = 0x4,
3520 [NEON_2RM_VCVTMS] = 0x4,
3521 [NEON_2RM_VRECPE] = 0x4,
3522 [NEON_2RM_VRSQRTE] = 0x4,
3523 [NEON_2RM_VRECPE_F] = 0x4,
3524 [NEON_2RM_VRSQRTE_F] = 0x4,
3525 [NEON_2RM_VCVT_FS] = 0x4,
3526 [NEON_2RM_VCVT_FU] = 0x4,
3527 [NEON_2RM_VCVT_SF] = 0x4,
3528 [NEON_2RM_VCVT_UF] = 0x4,
3531 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
3532 uint32_t opr_sz, uint32_t max_sz,
3533 gen_helper_gvec_3_ptr *fn)
3535 TCGv_ptr qc_ptr = tcg_temp_new_ptr();
3537 tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
3538 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
3539 opr_sz, max_sz, 0, fn);
3540 tcg_temp_free_ptr(qc_ptr);
3543 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3544 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3546 static gen_helper_gvec_3_ptr * const fns[2] = {
3547 gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
3549 tcg_debug_assert(vece >= 1 && vece <= 2);
3550 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
3553 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3554 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3556 static gen_helper_gvec_3_ptr * const fns[2] = {
3557 gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
3559 tcg_debug_assert(vece >= 1 && vece <= 2);
3560 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
3563 #define GEN_CMP0(NAME, COND) \
3564 static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a) \
3566 tcg_gen_setcondi_i32(COND, d, a, 0); \
3567 tcg_gen_neg_i32(d, d); \
3569 static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a) \
3571 tcg_gen_setcondi_i64(COND, d, a, 0); \
3572 tcg_gen_neg_i64(d, d); \
3574 static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
3576 TCGv_vec zero = tcg_const_zeros_vec_matching(d); \
3577 tcg_gen_cmp_vec(COND, vece, d, a, zero); \
3578 tcg_temp_free_vec(zero); \
3580 void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m, \
3581 uint32_t opr_sz, uint32_t max_sz) \
3583 const GVecGen2 op[4] = { \
3584 { .fno = gen_helper_gvec_##NAME##0_b, \
3585 .fniv = gen_##NAME##0_vec, \
3586 .opt_opc = vecop_list_cmp, \
3587 .vece = MO_8 }, \
3588 { .fno = gen_helper_gvec_##NAME##0_h, \
3589 .fniv = gen_##NAME##0_vec, \
3590 .opt_opc = vecop_list_cmp, \
3591 .vece = MO_16 }, \
3592 { .fni4 = gen_##NAME##0_i32, \
3593 .fniv = gen_##NAME##0_vec, \
3594 .opt_opc = vecop_list_cmp, \
3595 .vece = MO_32 }, \
3596 { .fni8 = gen_##NAME##0_i64, \
3597 .fniv = gen_##NAME##0_vec, \
3598 .opt_opc = vecop_list_cmp, \
3599 .prefer_i64 = TCG_TARGET_REG_BITS == 64, \
3600 .vece = MO_64 }, \
3601 }; \
3602 tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]); \
3605 static const TCGOpcode vecop_list_cmp[] = {
3606 INDEX_op_cmp_vec, 0
3609 GEN_CMP0(ceq, TCG_COND_EQ)
3610 GEN_CMP0(cle, TCG_COND_LE)
3611 GEN_CMP0(cge, TCG_COND_GE)
3612 GEN_CMP0(clt, TCG_COND_LT)
3613 GEN_CMP0(cgt, TCG_COND_GT)
3615 #undef GEN_CMP0
3617 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3619 tcg_gen_vec_sar8i_i64(a, a, shift);
3620 tcg_gen_vec_add8_i64(d, d, a);
3623 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3625 tcg_gen_vec_sar16i_i64(a, a, shift);
3626 tcg_gen_vec_add16_i64(d, d, a);
3629 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3631 tcg_gen_sari_i32(a, a, shift);
3632 tcg_gen_add_i32(d, d, a);
3635 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3637 tcg_gen_sari_i64(a, a, shift);
3638 tcg_gen_add_i64(d, d, a);
3641 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3643 tcg_gen_sari_vec(vece, a, a, sh);
3644 tcg_gen_add_vec(vece, d, d, a);
3647 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3648 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3650 static const TCGOpcode vecop_list[] = {
3651 INDEX_op_sari_vec, INDEX_op_add_vec, 0
3653 static const GVecGen2i ops[4] = {
3654 { .fni8 = gen_ssra8_i64,
3655 .fniv = gen_ssra_vec,
3656 .fno = gen_helper_gvec_ssra_b,
3657 .load_dest = true,
3658 .opt_opc = vecop_list,
3659 .vece = MO_8 },
3660 { .fni8 = gen_ssra16_i64,
3661 .fniv = gen_ssra_vec,
3662 .fno = gen_helper_gvec_ssra_h,
3663 .load_dest = true,
3664 .opt_opc = vecop_list,
3665 .vece = MO_16 },
3666 { .fni4 = gen_ssra32_i32,
3667 .fniv = gen_ssra_vec,
3668 .fno = gen_helper_gvec_ssra_s,
3669 .load_dest = true,
3670 .opt_opc = vecop_list,
3671 .vece = MO_32 },
3672 { .fni8 = gen_ssra64_i64,
3673 .fniv = gen_ssra_vec,
3674 .fno = gen_helper_gvec_ssra_b,
3675 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3676 .opt_opc = vecop_list,
3677 .load_dest = true,
3678 .vece = MO_64 },
3681 /* tszimm encoding produces immediates in the range [1..esize]. */
3682 tcg_debug_assert(shift > 0);
3683 tcg_debug_assert(shift <= (8 << vece));
3686 * Shifts larger than the element size are architecturally valid.
3687 * Signed results in all sign bits.
3689 shift = MIN(shift, (8 << vece) - 1);
3690 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3693 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3695 tcg_gen_vec_shr8i_i64(a, a, shift);
3696 tcg_gen_vec_add8_i64(d, d, a);
3699 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3701 tcg_gen_vec_shr16i_i64(a, a, shift);
3702 tcg_gen_vec_add16_i64(d, d, a);
3705 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3707 tcg_gen_shri_i32(a, a, shift);
3708 tcg_gen_add_i32(d, d, a);
3711 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3713 tcg_gen_shri_i64(a, a, shift);
3714 tcg_gen_add_i64(d, d, a);
3717 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3719 tcg_gen_shri_vec(vece, a, a, sh);
3720 tcg_gen_add_vec(vece, d, d, a);
3723 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3724 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3726 static const TCGOpcode vecop_list[] = {
3727 INDEX_op_shri_vec, INDEX_op_add_vec, 0
3729 static const GVecGen2i ops[4] = {
3730 { .fni8 = gen_usra8_i64,
3731 .fniv = gen_usra_vec,
3732 .fno = gen_helper_gvec_usra_b,
3733 .load_dest = true,
3734 .opt_opc = vecop_list,
3735 .vece = MO_8, },
3736 { .fni8 = gen_usra16_i64,
3737 .fniv = gen_usra_vec,
3738 .fno = gen_helper_gvec_usra_h,
3739 .load_dest = true,
3740 .opt_opc = vecop_list,
3741 .vece = MO_16, },
3742 { .fni4 = gen_usra32_i32,
3743 .fniv = gen_usra_vec,
3744 .fno = gen_helper_gvec_usra_s,
3745 .load_dest = true,
3746 .opt_opc = vecop_list,
3747 .vece = MO_32, },
3748 { .fni8 = gen_usra64_i64,
3749 .fniv = gen_usra_vec,
3750 .fno = gen_helper_gvec_usra_d,
3751 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3752 .load_dest = true,
3753 .opt_opc = vecop_list,
3754 .vece = MO_64, },
3757 /* tszimm encoding produces immediates in the range [1..esize]. */
3758 tcg_debug_assert(shift > 0);
3759 tcg_debug_assert(shift <= (8 << vece));
3762 * Shifts larger than the element size are architecturally valid.
3763 * Unsigned results in all zeros as input to accumulate: nop.
3765 if (shift < (8 << vece)) {
3766 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3767 } else {
3768 /* Nop, but we do need to clear the tail. */
3769 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3774 * Shift one less than the requested amount, and the low bit is
3775 * the rounding bit. For the 8 and 16-bit operations, because we
3776 * mask the low bit, we can perform a normal integer shift instead
3777 * of a vector shift.
3779 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3781 TCGv_i64 t = tcg_temp_new_i64();
3783 tcg_gen_shri_i64(t, a, sh - 1);
3784 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3785 tcg_gen_vec_sar8i_i64(d, a, sh);
3786 tcg_gen_vec_add8_i64(d, d, t);
3787 tcg_temp_free_i64(t);
3790 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3792 TCGv_i64 t = tcg_temp_new_i64();
3794 tcg_gen_shri_i64(t, a, sh - 1);
3795 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3796 tcg_gen_vec_sar16i_i64(d, a, sh);
3797 tcg_gen_vec_add16_i64(d, d, t);
3798 tcg_temp_free_i64(t);
3801 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3803 TCGv_i32 t = tcg_temp_new_i32();
3805 tcg_gen_extract_i32(t, a, sh - 1, 1);
3806 tcg_gen_sari_i32(d, a, sh);
3807 tcg_gen_add_i32(d, d, t);
3808 tcg_temp_free_i32(t);
3811 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3813 TCGv_i64 t = tcg_temp_new_i64();
3815 tcg_gen_extract_i64(t, a, sh - 1, 1);
3816 tcg_gen_sari_i64(d, a, sh);
3817 tcg_gen_add_i64(d, d, t);
3818 tcg_temp_free_i64(t);
3821 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3823 TCGv_vec t = tcg_temp_new_vec_matching(d);
3824 TCGv_vec ones = tcg_temp_new_vec_matching(d);
3826 tcg_gen_shri_vec(vece, t, a, sh - 1);
3827 tcg_gen_dupi_vec(vece, ones, 1);
3828 tcg_gen_and_vec(vece, t, t, ones);
3829 tcg_gen_sari_vec(vece, d, a, sh);
3830 tcg_gen_add_vec(vece, d, d, t);
3832 tcg_temp_free_vec(t);
3833 tcg_temp_free_vec(ones);
3836 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3837 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3839 static const TCGOpcode vecop_list[] = {
3840 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3842 static const GVecGen2i ops[4] = {
3843 { .fni8 = gen_srshr8_i64,
3844 .fniv = gen_srshr_vec,
3845 .fno = gen_helper_gvec_srshr_b,
3846 .opt_opc = vecop_list,
3847 .vece = MO_8 },
3848 { .fni8 = gen_srshr16_i64,
3849 .fniv = gen_srshr_vec,
3850 .fno = gen_helper_gvec_srshr_h,
3851 .opt_opc = vecop_list,
3852 .vece = MO_16 },
3853 { .fni4 = gen_srshr32_i32,
3854 .fniv = gen_srshr_vec,
3855 .fno = gen_helper_gvec_srshr_s,
3856 .opt_opc = vecop_list,
3857 .vece = MO_32 },
3858 { .fni8 = gen_srshr64_i64,
3859 .fniv = gen_srshr_vec,
3860 .fno = gen_helper_gvec_srshr_d,
3861 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3862 .opt_opc = vecop_list,
3863 .vece = MO_64 },
3866 /* tszimm encoding produces immediates in the range [1..esize] */
3867 tcg_debug_assert(shift > 0);
3868 tcg_debug_assert(shift <= (8 << vece));
3870 if (shift == (8 << vece)) {
3872 * Shifts larger than the element size are architecturally valid.
3873 * Signed results in all sign bits. With rounding, this produces
3874 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3875 * I.e. always zero.
3877 tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3878 } else {
3879 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3883 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3885 TCGv_i64 t = tcg_temp_new_i64();
3887 gen_srshr8_i64(t, a, sh);
3888 tcg_gen_vec_add8_i64(d, d, t);
3889 tcg_temp_free_i64(t);
3892 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3894 TCGv_i64 t = tcg_temp_new_i64();
3896 gen_srshr16_i64(t, a, sh);
3897 tcg_gen_vec_add16_i64(d, d, t);
3898 tcg_temp_free_i64(t);
3901 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3903 TCGv_i32 t = tcg_temp_new_i32();
3905 gen_srshr32_i32(t, a, sh);
3906 tcg_gen_add_i32(d, d, t);
3907 tcg_temp_free_i32(t);
3910 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3912 TCGv_i64 t = tcg_temp_new_i64();
3914 gen_srshr64_i64(t, a, sh);
3915 tcg_gen_add_i64(d, d, t);
3916 tcg_temp_free_i64(t);
3919 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3921 TCGv_vec t = tcg_temp_new_vec_matching(d);
3923 gen_srshr_vec(vece, t, a, sh);
3924 tcg_gen_add_vec(vece, d, d, t);
3925 tcg_temp_free_vec(t);
3928 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3929 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3931 static const TCGOpcode vecop_list[] = {
3932 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3934 static const GVecGen2i ops[4] = {
3935 { .fni8 = gen_srsra8_i64,
3936 .fniv = gen_srsra_vec,
3937 .fno = gen_helper_gvec_srsra_b,
3938 .opt_opc = vecop_list,
3939 .load_dest = true,
3940 .vece = MO_8 },
3941 { .fni8 = gen_srsra16_i64,
3942 .fniv = gen_srsra_vec,
3943 .fno = gen_helper_gvec_srsra_h,
3944 .opt_opc = vecop_list,
3945 .load_dest = true,
3946 .vece = MO_16 },
3947 { .fni4 = gen_srsra32_i32,
3948 .fniv = gen_srsra_vec,
3949 .fno = gen_helper_gvec_srsra_s,
3950 .opt_opc = vecop_list,
3951 .load_dest = true,
3952 .vece = MO_32 },
3953 { .fni8 = gen_srsra64_i64,
3954 .fniv = gen_srsra_vec,
3955 .fno = gen_helper_gvec_srsra_d,
3956 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3957 .opt_opc = vecop_list,
3958 .load_dest = true,
3959 .vece = MO_64 },
3962 /* tszimm encoding produces immediates in the range [1..esize] */
3963 tcg_debug_assert(shift > 0);
3964 tcg_debug_assert(shift <= (8 << vece));
3967 * Shifts larger than the element size are architecturally valid.
3968 * Signed results in all sign bits. With rounding, this produces
3969 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3970 * I.e. always zero. With accumulation, this leaves D unchanged.
3972 if (shift == (8 << vece)) {
3973 /* Nop, but we do need to clear the tail. */
3974 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3975 } else {
3976 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3980 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3982 TCGv_i64 t = tcg_temp_new_i64();
3984 tcg_gen_shri_i64(t, a, sh - 1);
3985 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3986 tcg_gen_vec_shr8i_i64(d, a, sh);
3987 tcg_gen_vec_add8_i64(d, d, t);
3988 tcg_temp_free_i64(t);
3991 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3993 TCGv_i64 t = tcg_temp_new_i64();
3995 tcg_gen_shri_i64(t, a, sh - 1);
3996 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3997 tcg_gen_vec_shr16i_i64(d, a, sh);
3998 tcg_gen_vec_add16_i64(d, d, t);
3999 tcg_temp_free_i64(t);
4002 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
4004 TCGv_i32 t = tcg_temp_new_i32();
4006 tcg_gen_extract_i32(t, a, sh - 1, 1);
4007 tcg_gen_shri_i32(d, a, sh);
4008 tcg_gen_add_i32(d, d, t);
4009 tcg_temp_free_i32(t);
4012 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4014 TCGv_i64 t = tcg_temp_new_i64();
4016 tcg_gen_extract_i64(t, a, sh - 1, 1);
4017 tcg_gen_shri_i64(d, a, sh);
4018 tcg_gen_add_i64(d, d, t);
4019 tcg_temp_free_i64(t);
4022 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
4024 TCGv_vec t = tcg_temp_new_vec_matching(d);
4025 TCGv_vec ones = tcg_temp_new_vec_matching(d);
4027 tcg_gen_shri_vec(vece, t, a, shift - 1);
4028 tcg_gen_dupi_vec(vece, ones, 1);
4029 tcg_gen_and_vec(vece, t, t, ones);
4030 tcg_gen_shri_vec(vece, d, a, shift);
4031 tcg_gen_add_vec(vece, d, d, t);
4033 tcg_temp_free_vec(t);
4034 tcg_temp_free_vec(ones);
4037 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4038 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4040 static const TCGOpcode vecop_list[] = {
4041 INDEX_op_shri_vec, INDEX_op_add_vec, 0
4043 static const GVecGen2i ops[4] = {
4044 { .fni8 = gen_urshr8_i64,
4045 .fniv = gen_urshr_vec,
4046 .fno = gen_helper_gvec_urshr_b,
4047 .opt_opc = vecop_list,
4048 .vece = MO_8 },
4049 { .fni8 = gen_urshr16_i64,
4050 .fniv = gen_urshr_vec,
4051 .fno = gen_helper_gvec_urshr_h,
4052 .opt_opc = vecop_list,
4053 .vece = MO_16 },
4054 { .fni4 = gen_urshr32_i32,
4055 .fniv = gen_urshr_vec,
4056 .fno = gen_helper_gvec_urshr_s,
4057 .opt_opc = vecop_list,
4058 .vece = MO_32 },
4059 { .fni8 = gen_urshr64_i64,
4060 .fniv = gen_urshr_vec,
4061 .fno = gen_helper_gvec_urshr_d,
4062 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4063 .opt_opc = vecop_list,
4064 .vece = MO_64 },
4067 /* tszimm encoding produces immediates in the range [1..esize] */
4068 tcg_debug_assert(shift > 0);
4069 tcg_debug_assert(shift <= (8 << vece));
4071 if (shift == (8 << vece)) {
4073 * Shifts larger than the element size are architecturally valid.
4074 * Unsigned results in zero. With rounding, this produces a
4075 * copy of the most significant bit.
4077 tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
4078 } else {
4079 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4083 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4085 TCGv_i64 t = tcg_temp_new_i64();
4087 if (sh == 8) {
4088 tcg_gen_vec_shr8i_i64(t, a, 7);
4089 } else {
4090 gen_urshr8_i64(t, a, sh);
4092 tcg_gen_vec_add8_i64(d, d, t);
4093 tcg_temp_free_i64(t);
4096 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4098 TCGv_i64 t = tcg_temp_new_i64();
4100 if (sh == 16) {
4101 tcg_gen_vec_shr16i_i64(t, a, 15);
4102 } else {
4103 gen_urshr16_i64(t, a, sh);
4105 tcg_gen_vec_add16_i64(d, d, t);
4106 tcg_temp_free_i64(t);
4109 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
4111 TCGv_i32 t = tcg_temp_new_i32();
4113 if (sh == 32) {
4114 tcg_gen_shri_i32(t, a, 31);
4115 } else {
4116 gen_urshr32_i32(t, a, sh);
4118 tcg_gen_add_i32(d, d, t);
4119 tcg_temp_free_i32(t);
4122 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4124 TCGv_i64 t = tcg_temp_new_i64();
4126 if (sh == 64) {
4127 tcg_gen_shri_i64(t, a, 63);
4128 } else {
4129 gen_urshr64_i64(t, a, sh);
4131 tcg_gen_add_i64(d, d, t);
4132 tcg_temp_free_i64(t);
4135 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4137 TCGv_vec t = tcg_temp_new_vec_matching(d);
4139 if (sh == (8 << vece)) {
4140 tcg_gen_shri_vec(vece, t, a, sh - 1);
4141 } else {
4142 gen_urshr_vec(vece, t, a, sh);
4144 tcg_gen_add_vec(vece, d, d, t);
4145 tcg_temp_free_vec(t);
4148 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4149 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4151 static const TCGOpcode vecop_list[] = {
4152 INDEX_op_shri_vec, INDEX_op_add_vec, 0
4154 static const GVecGen2i ops[4] = {
4155 { .fni8 = gen_ursra8_i64,
4156 .fniv = gen_ursra_vec,
4157 .fno = gen_helper_gvec_ursra_b,
4158 .opt_opc = vecop_list,
4159 .load_dest = true,
4160 .vece = MO_8 },
4161 { .fni8 = gen_ursra16_i64,
4162 .fniv = gen_ursra_vec,
4163 .fno = gen_helper_gvec_ursra_h,
4164 .opt_opc = vecop_list,
4165 .load_dest = true,
4166 .vece = MO_16 },
4167 { .fni4 = gen_ursra32_i32,
4168 .fniv = gen_ursra_vec,
4169 .fno = gen_helper_gvec_ursra_s,
4170 .opt_opc = vecop_list,
4171 .load_dest = true,
4172 .vece = MO_32 },
4173 { .fni8 = gen_ursra64_i64,
4174 .fniv = gen_ursra_vec,
4175 .fno = gen_helper_gvec_ursra_d,
4176 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4177 .opt_opc = vecop_list,
4178 .load_dest = true,
4179 .vece = MO_64 },
4182 /* tszimm encoding produces immediates in the range [1..esize] */
4183 tcg_debug_assert(shift > 0);
4184 tcg_debug_assert(shift <= (8 << vece));
4186 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4189 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4191 uint64_t mask = dup_const(MO_8, 0xff >> shift);
4192 TCGv_i64 t = tcg_temp_new_i64();
4194 tcg_gen_shri_i64(t, a, shift);
4195 tcg_gen_andi_i64(t, t, mask);
4196 tcg_gen_andi_i64(d, d, ~mask);
4197 tcg_gen_or_i64(d, d, t);
4198 tcg_temp_free_i64(t);
4201 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4203 uint64_t mask = dup_const(MO_16, 0xffff >> shift);
4204 TCGv_i64 t = tcg_temp_new_i64();
4206 tcg_gen_shri_i64(t, a, shift);
4207 tcg_gen_andi_i64(t, t, mask);
4208 tcg_gen_andi_i64(d, d, ~mask);
4209 tcg_gen_or_i64(d, d, t);
4210 tcg_temp_free_i64(t);
4213 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4215 tcg_gen_shri_i32(a, a, shift);
4216 tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
4219 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4221 tcg_gen_shri_i64(a, a, shift);
4222 tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
4225 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4227 TCGv_vec t = tcg_temp_new_vec_matching(d);
4228 TCGv_vec m = tcg_temp_new_vec_matching(d);
4230 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
4231 tcg_gen_shri_vec(vece, t, a, sh);
4232 tcg_gen_and_vec(vece, d, d, m);
4233 tcg_gen_or_vec(vece, d, d, t);
4235 tcg_temp_free_vec(t);
4236 tcg_temp_free_vec(m);
4239 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4240 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4242 static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
4243 const GVecGen2i ops[4] = {
4244 { .fni8 = gen_shr8_ins_i64,
4245 .fniv = gen_shr_ins_vec,
4246 .fno = gen_helper_gvec_sri_b,
4247 .load_dest = true,
4248 .opt_opc = vecop_list,
4249 .vece = MO_8 },
4250 { .fni8 = gen_shr16_ins_i64,
4251 .fniv = gen_shr_ins_vec,
4252 .fno = gen_helper_gvec_sri_h,
4253 .load_dest = true,
4254 .opt_opc = vecop_list,
4255 .vece = MO_16 },
4256 { .fni4 = gen_shr32_ins_i32,
4257 .fniv = gen_shr_ins_vec,
4258 .fno = gen_helper_gvec_sri_s,
4259 .load_dest = true,
4260 .opt_opc = vecop_list,
4261 .vece = MO_32 },
4262 { .fni8 = gen_shr64_ins_i64,
4263 .fniv = gen_shr_ins_vec,
4264 .fno = gen_helper_gvec_sri_d,
4265 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4266 .load_dest = true,
4267 .opt_opc = vecop_list,
4268 .vece = MO_64 },
4271 /* tszimm encoding produces immediates in the range [1..esize]. */
4272 tcg_debug_assert(shift > 0);
4273 tcg_debug_assert(shift <= (8 << vece));
4275 /* Shift of esize leaves destination unchanged. */
4276 if (shift < (8 << vece)) {
4277 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4278 } else {
4279 /* Nop, but we do need to clear the tail. */
4280 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
4284 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4286 uint64_t mask = dup_const(MO_8, 0xff << shift);
4287 TCGv_i64 t = tcg_temp_new_i64();
4289 tcg_gen_shli_i64(t, a, shift);
4290 tcg_gen_andi_i64(t, t, mask);
4291 tcg_gen_andi_i64(d, d, ~mask);
4292 tcg_gen_or_i64(d, d, t);
4293 tcg_temp_free_i64(t);
4296 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4298 uint64_t mask = dup_const(MO_16, 0xffff << shift);
4299 TCGv_i64 t = tcg_temp_new_i64();
4301 tcg_gen_shli_i64(t, a, shift);
4302 tcg_gen_andi_i64(t, t, mask);
4303 tcg_gen_andi_i64(d, d, ~mask);
4304 tcg_gen_or_i64(d, d, t);
4305 tcg_temp_free_i64(t);
4308 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4310 tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
4313 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4315 tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
4318 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4320 TCGv_vec t = tcg_temp_new_vec_matching(d);
4321 TCGv_vec m = tcg_temp_new_vec_matching(d);
4323 tcg_gen_shli_vec(vece, t, a, sh);
4324 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
4325 tcg_gen_and_vec(vece, d, d, m);
4326 tcg_gen_or_vec(vece, d, d, t);
4328 tcg_temp_free_vec(t);
4329 tcg_temp_free_vec(m);
4332 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4333 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4335 static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
4336 const GVecGen2i ops[4] = {
4337 { .fni8 = gen_shl8_ins_i64,
4338 .fniv = gen_shl_ins_vec,
4339 .fno = gen_helper_gvec_sli_b,
4340 .load_dest = true,
4341 .opt_opc = vecop_list,
4342 .vece = MO_8 },
4343 { .fni8 = gen_shl16_ins_i64,
4344 .fniv = gen_shl_ins_vec,
4345 .fno = gen_helper_gvec_sli_h,
4346 .load_dest = true,
4347 .opt_opc = vecop_list,
4348 .vece = MO_16 },
4349 { .fni4 = gen_shl32_ins_i32,
4350 .fniv = gen_shl_ins_vec,
4351 .fno = gen_helper_gvec_sli_s,
4352 .load_dest = true,
4353 .opt_opc = vecop_list,
4354 .vece = MO_32 },
4355 { .fni8 = gen_shl64_ins_i64,
4356 .fniv = gen_shl_ins_vec,
4357 .fno = gen_helper_gvec_sli_d,
4358 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4359 .load_dest = true,
4360 .opt_opc = vecop_list,
4361 .vece = MO_64 },
4364 /* tszimm encoding produces immediates in the range [0..esize-1]. */
4365 tcg_debug_assert(shift >= 0);
4366 tcg_debug_assert(shift < (8 << vece));
4368 if (shift == 0) {
4369 tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
4370 } else {
4371 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4375 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4377 gen_helper_neon_mul_u8(a, a, b);
4378 gen_helper_neon_add_u8(d, d, a);
4381 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4383 gen_helper_neon_mul_u8(a, a, b);
4384 gen_helper_neon_sub_u8(d, d, a);
4387 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4389 gen_helper_neon_mul_u16(a, a, b);
4390 gen_helper_neon_add_u16(d, d, a);
4393 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4395 gen_helper_neon_mul_u16(a, a, b);
4396 gen_helper_neon_sub_u16(d, d, a);
4399 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4401 tcg_gen_mul_i32(a, a, b);
4402 tcg_gen_add_i32(d, d, a);
4405 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4407 tcg_gen_mul_i32(a, a, b);
4408 tcg_gen_sub_i32(d, d, a);
4411 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4413 tcg_gen_mul_i64(a, a, b);
4414 tcg_gen_add_i64(d, d, a);
4417 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4419 tcg_gen_mul_i64(a, a, b);
4420 tcg_gen_sub_i64(d, d, a);
4423 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4425 tcg_gen_mul_vec(vece, a, a, b);
4426 tcg_gen_add_vec(vece, d, d, a);
4429 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4431 tcg_gen_mul_vec(vece, a, a, b);
4432 tcg_gen_sub_vec(vece, d, d, a);
4435 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
4436 * these tables are shared with AArch64 which does support them.
4438 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4439 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4441 static const TCGOpcode vecop_list[] = {
4442 INDEX_op_mul_vec, INDEX_op_add_vec, 0
4444 static const GVecGen3 ops[4] = {
4445 { .fni4 = gen_mla8_i32,
4446 .fniv = gen_mla_vec,
4447 .load_dest = true,
4448 .opt_opc = vecop_list,
4449 .vece = MO_8 },
4450 { .fni4 = gen_mla16_i32,
4451 .fniv = gen_mla_vec,
4452 .load_dest = true,
4453 .opt_opc = vecop_list,
4454 .vece = MO_16 },
4455 { .fni4 = gen_mla32_i32,
4456 .fniv = gen_mla_vec,
4457 .load_dest = true,
4458 .opt_opc = vecop_list,
4459 .vece = MO_32 },
4460 { .fni8 = gen_mla64_i64,
4461 .fniv = gen_mla_vec,
4462 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4463 .load_dest = true,
4464 .opt_opc = vecop_list,
4465 .vece = MO_64 },
4467 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4470 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4471 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4473 static const TCGOpcode vecop_list[] = {
4474 INDEX_op_mul_vec, INDEX_op_sub_vec, 0
4476 static const GVecGen3 ops[4] = {
4477 { .fni4 = gen_mls8_i32,
4478 .fniv = gen_mls_vec,
4479 .load_dest = true,
4480 .opt_opc = vecop_list,
4481 .vece = MO_8 },
4482 { .fni4 = gen_mls16_i32,
4483 .fniv = gen_mls_vec,
4484 .load_dest = true,
4485 .opt_opc = vecop_list,
4486 .vece = MO_16 },
4487 { .fni4 = gen_mls32_i32,
4488 .fniv = gen_mls_vec,
4489 .load_dest = true,
4490 .opt_opc = vecop_list,
4491 .vece = MO_32 },
4492 { .fni8 = gen_mls64_i64,
4493 .fniv = gen_mls_vec,
4494 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4495 .load_dest = true,
4496 .opt_opc = vecop_list,
4497 .vece = MO_64 },
4499 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4502 /* CMTST : test is "if (X & Y != 0)". */
4503 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4505 tcg_gen_and_i32(d, a, b);
4506 tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
4507 tcg_gen_neg_i32(d, d);
4510 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4512 tcg_gen_and_i64(d, a, b);
4513 tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
4514 tcg_gen_neg_i64(d, d);
4517 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4519 tcg_gen_and_vec(vece, d, a, b);
4520 tcg_gen_dupi_vec(vece, a, 0);
4521 tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
4524 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4525 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4527 static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
4528 static const GVecGen3 ops[4] = {
4529 { .fni4 = gen_helper_neon_tst_u8,
4530 .fniv = gen_cmtst_vec,
4531 .opt_opc = vecop_list,
4532 .vece = MO_8 },
4533 { .fni4 = gen_helper_neon_tst_u16,
4534 .fniv = gen_cmtst_vec,
4535 .opt_opc = vecop_list,
4536 .vece = MO_16 },
4537 { .fni4 = gen_cmtst_i32,
4538 .fniv = gen_cmtst_vec,
4539 .opt_opc = vecop_list,
4540 .vece = MO_32 },
4541 { .fni8 = gen_cmtst_i64,
4542 .fniv = gen_cmtst_vec,
4543 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4544 .opt_opc = vecop_list,
4545 .vece = MO_64 },
4547 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4550 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4552 TCGv_i32 lval = tcg_temp_new_i32();
4553 TCGv_i32 rval = tcg_temp_new_i32();
4554 TCGv_i32 lsh = tcg_temp_new_i32();
4555 TCGv_i32 rsh = tcg_temp_new_i32();
4556 TCGv_i32 zero = tcg_const_i32(0);
4557 TCGv_i32 max = tcg_const_i32(32);
4560 * Rely on the TCG guarantee that out of range shifts produce
4561 * unspecified results, not undefined behaviour (i.e. no trap).
4562 * Discard out-of-range results after the fact.
4564 tcg_gen_ext8s_i32(lsh, shift);
4565 tcg_gen_neg_i32(rsh, lsh);
4566 tcg_gen_shl_i32(lval, src, lsh);
4567 tcg_gen_shr_i32(rval, src, rsh);
4568 tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
4569 tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
4571 tcg_temp_free_i32(lval);
4572 tcg_temp_free_i32(rval);
4573 tcg_temp_free_i32(lsh);
4574 tcg_temp_free_i32(rsh);
4575 tcg_temp_free_i32(zero);
4576 tcg_temp_free_i32(max);
4579 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4581 TCGv_i64 lval = tcg_temp_new_i64();
4582 TCGv_i64 rval = tcg_temp_new_i64();
4583 TCGv_i64 lsh = tcg_temp_new_i64();
4584 TCGv_i64 rsh = tcg_temp_new_i64();
4585 TCGv_i64 zero = tcg_const_i64(0);
4586 TCGv_i64 max = tcg_const_i64(64);
4589 * Rely on the TCG guarantee that out of range shifts produce
4590 * unspecified results, not undefined behaviour (i.e. no trap).
4591 * Discard out-of-range results after the fact.
4593 tcg_gen_ext8s_i64(lsh, shift);
4594 tcg_gen_neg_i64(rsh, lsh);
4595 tcg_gen_shl_i64(lval, src, lsh);
4596 tcg_gen_shr_i64(rval, src, rsh);
4597 tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
4598 tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
4600 tcg_temp_free_i64(lval);
4601 tcg_temp_free_i64(rval);
4602 tcg_temp_free_i64(lsh);
4603 tcg_temp_free_i64(rsh);
4604 tcg_temp_free_i64(zero);
4605 tcg_temp_free_i64(max);
4608 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
4609 TCGv_vec src, TCGv_vec shift)
4611 TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4612 TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4613 TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4614 TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4615 TCGv_vec msk, max;
4617 tcg_gen_neg_vec(vece, rsh, shift);
4618 if (vece == MO_8) {
4619 tcg_gen_mov_vec(lsh, shift);
4620 } else {
4621 msk = tcg_temp_new_vec_matching(dst);
4622 tcg_gen_dupi_vec(vece, msk, 0xff);
4623 tcg_gen_and_vec(vece, lsh, shift, msk);
4624 tcg_gen_and_vec(vece, rsh, rsh, msk);
4625 tcg_temp_free_vec(msk);
4629 * Rely on the TCG guarantee that out of range shifts produce
4630 * unspecified results, not undefined behaviour (i.e. no trap).
4631 * Discard out-of-range results after the fact.
4633 tcg_gen_shlv_vec(vece, lval, src, lsh);
4634 tcg_gen_shrv_vec(vece, rval, src, rsh);
4636 max = tcg_temp_new_vec_matching(dst);
4637 tcg_gen_dupi_vec(vece, max, 8 << vece);
4640 * The choice of LT (signed) and GEU (unsigned) are biased toward
4641 * the instructions of the x86_64 host. For MO_8, the whole byte
4642 * is significant so we must use an unsigned compare; otherwise we
4643 * have already masked to a byte and so a signed compare works.
4644 * Other tcg hosts have a full set of comparisons and do not care.
4646 if (vece == MO_8) {
4647 tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
4648 tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
4649 tcg_gen_andc_vec(vece, lval, lval, lsh);
4650 tcg_gen_andc_vec(vece, rval, rval, rsh);
4651 } else {
4652 tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
4653 tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
4654 tcg_gen_and_vec(vece, lval, lval, lsh);
4655 tcg_gen_and_vec(vece, rval, rval, rsh);
4657 tcg_gen_or_vec(vece, dst, lval, rval);
4659 tcg_temp_free_vec(max);
4660 tcg_temp_free_vec(lval);
4661 tcg_temp_free_vec(rval);
4662 tcg_temp_free_vec(lsh);
4663 tcg_temp_free_vec(rsh);
4666 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4667 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4669 static const TCGOpcode vecop_list[] = {
4670 INDEX_op_neg_vec, INDEX_op_shlv_vec,
4671 INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4673 static const GVecGen3 ops[4] = {
4674 { .fniv = gen_ushl_vec,
4675 .fno = gen_helper_gvec_ushl_b,
4676 .opt_opc = vecop_list,
4677 .vece = MO_8 },
4678 { .fniv = gen_ushl_vec,
4679 .fno = gen_helper_gvec_ushl_h,
4680 .opt_opc = vecop_list,
4681 .vece = MO_16 },
4682 { .fni4 = gen_ushl_i32,
4683 .fniv = gen_ushl_vec,
4684 .opt_opc = vecop_list,
4685 .vece = MO_32 },
4686 { .fni8 = gen_ushl_i64,
4687 .fniv = gen_ushl_vec,
4688 .opt_opc = vecop_list,
4689 .vece = MO_64 },
4691 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4694 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4696 TCGv_i32 lval = tcg_temp_new_i32();
4697 TCGv_i32 rval = tcg_temp_new_i32();
4698 TCGv_i32 lsh = tcg_temp_new_i32();
4699 TCGv_i32 rsh = tcg_temp_new_i32();
4700 TCGv_i32 zero = tcg_const_i32(0);
4701 TCGv_i32 max = tcg_const_i32(31);
4704 * Rely on the TCG guarantee that out of range shifts produce
4705 * unspecified results, not undefined behaviour (i.e. no trap).
4706 * Discard out-of-range results after the fact.
4708 tcg_gen_ext8s_i32(lsh, shift);
4709 tcg_gen_neg_i32(rsh, lsh);
4710 tcg_gen_shl_i32(lval, src, lsh);
4711 tcg_gen_umin_i32(rsh, rsh, max);
4712 tcg_gen_sar_i32(rval, src, rsh);
4713 tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4714 tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4716 tcg_temp_free_i32(lval);
4717 tcg_temp_free_i32(rval);
4718 tcg_temp_free_i32(lsh);
4719 tcg_temp_free_i32(rsh);
4720 tcg_temp_free_i32(zero);
4721 tcg_temp_free_i32(max);
4724 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4726 TCGv_i64 lval = tcg_temp_new_i64();
4727 TCGv_i64 rval = tcg_temp_new_i64();
4728 TCGv_i64 lsh = tcg_temp_new_i64();
4729 TCGv_i64 rsh = tcg_temp_new_i64();
4730 TCGv_i64 zero = tcg_const_i64(0);
4731 TCGv_i64 max = tcg_const_i64(63);
4734 * Rely on the TCG guarantee that out of range shifts produce
4735 * unspecified results, not undefined behaviour (i.e. no trap).
4736 * Discard out-of-range results after the fact.
4738 tcg_gen_ext8s_i64(lsh, shift);
4739 tcg_gen_neg_i64(rsh, lsh);
4740 tcg_gen_shl_i64(lval, src, lsh);
4741 tcg_gen_umin_i64(rsh, rsh, max);
4742 tcg_gen_sar_i64(rval, src, rsh);
4743 tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4744 tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4746 tcg_temp_free_i64(lval);
4747 tcg_temp_free_i64(rval);
4748 tcg_temp_free_i64(lsh);
4749 tcg_temp_free_i64(rsh);
4750 tcg_temp_free_i64(zero);
4751 tcg_temp_free_i64(max);
4754 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4755 TCGv_vec src, TCGv_vec shift)
4757 TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4758 TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4759 TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4760 TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4761 TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4764 * Rely on the TCG guarantee that out of range shifts produce
4765 * unspecified results, not undefined behaviour (i.e. no trap).
4766 * Discard out-of-range results after the fact.
4768 tcg_gen_neg_vec(vece, rsh, shift);
4769 if (vece == MO_8) {
4770 tcg_gen_mov_vec(lsh, shift);
4771 } else {
4772 tcg_gen_dupi_vec(vece, tmp, 0xff);
4773 tcg_gen_and_vec(vece, lsh, shift, tmp);
4774 tcg_gen_and_vec(vece, rsh, rsh, tmp);
4777 /* Bound rsh so out of bound right shift gets -1. */
4778 tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4779 tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4780 tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4782 tcg_gen_shlv_vec(vece, lval, src, lsh);
4783 tcg_gen_sarv_vec(vece, rval, src, rsh);
4785 /* Select in-bound left shift. */
4786 tcg_gen_andc_vec(vece, lval, lval, tmp);
4788 /* Select between left and right shift. */
4789 if (vece == MO_8) {
4790 tcg_gen_dupi_vec(vece, tmp, 0);
4791 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4792 } else {
4793 tcg_gen_dupi_vec(vece, tmp, 0x80);
4794 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4797 tcg_temp_free_vec(lval);
4798 tcg_temp_free_vec(rval);
4799 tcg_temp_free_vec(lsh);
4800 tcg_temp_free_vec(rsh);
4801 tcg_temp_free_vec(tmp);
4804 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4805 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4807 static const TCGOpcode vecop_list[] = {
4808 INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4809 INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4811 static const GVecGen3 ops[4] = {
4812 { .fniv = gen_sshl_vec,
4813 .fno = gen_helper_gvec_sshl_b,
4814 .opt_opc = vecop_list,
4815 .vece = MO_8 },
4816 { .fniv = gen_sshl_vec,
4817 .fno = gen_helper_gvec_sshl_h,
4818 .opt_opc = vecop_list,
4819 .vece = MO_16 },
4820 { .fni4 = gen_sshl_i32,
4821 .fniv = gen_sshl_vec,
4822 .opt_opc = vecop_list,
4823 .vece = MO_32 },
4824 { .fni8 = gen_sshl_i64,
4825 .fniv = gen_sshl_vec,
4826 .opt_opc = vecop_list,
4827 .vece = MO_64 },
4829 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4832 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4833 TCGv_vec a, TCGv_vec b)
4835 TCGv_vec x = tcg_temp_new_vec_matching(t);
4836 tcg_gen_add_vec(vece, x, a, b);
4837 tcg_gen_usadd_vec(vece, t, a, b);
4838 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4839 tcg_gen_or_vec(vece, sat, sat, x);
4840 tcg_temp_free_vec(x);
4843 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4844 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4846 static const TCGOpcode vecop_list[] = {
4847 INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4849 static const GVecGen4 ops[4] = {
4850 { .fniv = gen_uqadd_vec,
4851 .fno = gen_helper_gvec_uqadd_b,
4852 .write_aofs = true,
4853 .opt_opc = vecop_list,
4854 .vece = MO_8 },
4855 { .fniv = gen_uqadd_vec,
4856 .fno = gen_helper_gvec_uqadd_h,
4857 .write_aofs = true,
4858 .opt_opc = vecop_list,
4859 .vece = MO_16 },
4860 { .fniv = gen_uqadd_vec,
4861 .fno = gen_helper_gvec_uqadd_s,
4862 .write_aofs = true,
4863 .opt_opc = vecop_list,
4864 .vece = MO_32 },
4865 { .fniv = gen_uqadd_vec,
4866 .fno = gen_helper_gvec_uqadd_d,
4867 .write_aofs = true,
4868 .opt_opc = vecop_list,
4869 .vece = MO_64 },
4871 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4872 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4875 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4876 TCGv_vec a, TCGv_vec b)
4878 TCGv_vec x = tcg_temp_new_vec_matching(t);
4879 tcg_gen_add_vec(vece, x, a, b);
4880 tcg_gen_ssadd_vec(vece, t, a, b);
4881 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4882 tcg_gen_or_vec(vece, sat, sat, x);
4883 tcg_temp_free_vec(x);
4886 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4887 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4889 static const TCGOpcode vecop_list[] = {
4890 INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4892 static const GVecGen4 ops[4] = {
4893 { .fniv = gen_sqadd_vec,
4894 .fno = gen_helper_gvec_sqadd_b,
4895 .opt_opc = vecop_list,
4896 .write_aofs = true,
4897 .vece = MO_8 },
4898 { .fniv = gen_sqadd_vec,
4899 .fno = gen_helper_gvec_sqadd_h,
4900 .opt_opc = vecop_list,
4901 .write_aofs = true,
4902 .vece = MO_16 },
4903 { .fniv = gen_sqadd_vec,
4904 .fno = gen_helper_gvec_sqadd_s,
4905 .opt_opc = vecop_list,
4906 .write_aofs = true,
4907 .vece = MO_32 },
4908 { .fniv = gen_sqadd_vec,
4909 .fno = gen_helper_gvec_sqadd_d,
4910 .opt_opc = vecop_list,
4911 .write_aofs = true,
4912 .vece = MO_64 },
4914 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4915 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4918 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4919 TCGv_vec a, TCGv_vec b)
4921 TCGv_vec x = tcg_temp_new_vec_matching(t);
4922 tcg_gen_sub_vec(vece, x, a, b);
4923 tcg_gen_ussub_vec(vece, t, a, b);
4924 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4925 tcg_gen_or_vec(vece, sat, sat, x);
4926 tcg_temp_free_vec(x);
4929 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4930 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4932 static const TCGOpcode vecop_list[] = {
4933 INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4935 static const GVecGen4 ops[4] = {
4936 { .fniv = gen_uqsub_vec,
4937 .fno = gen_helper_gvec_uqsub_b,
4938 .opt_opc = vecop_list,
4939 .write_aofs = true,
4940 .vece = MO_8 },
4941 { .fniv = gen_uqsub_vec,
4942 .fno = gen_helper_gvec_uqsub_h,
4943 .opt_opc = vecop_list,
4944 .write_aofs = true,
4945 .vece = MO_16 },
4946 { .fniv = gen_uqsub_vec,
4947 .fno = gen_helper_gvec_uqsub_s,
4948 .opt_opc = vecop_list,
4949 .write_aofs = true,
4950 .vece = MO_32 },
4951 { .fniv = gen_uqsub_vec,
4952 .fno = gen_helper_gvec_uqsub_d,
4953 .opt_opc = vecop_list,
4954 .write_aofs = true,
4955 .vece = MO_64 },
4957 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4958 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4961 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4962 TCGv_vec a, TCGv_vec b)
4964 TCGv_vec x = tcg_temp_new_vec_matching(t);
4965 tcg_gen_sub_vec(vece, x, a, b);
4966 tcg_gen_sssub_vec(vece, t, a, b);
4967 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4968 tcg_gen_or_vec(vece, sat, sat, x);
4969 tcg_temp_free_vec(x);
4972 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4973 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4975 static const TCGOpcode vecop_list[] = {
4976 INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4978 static const GVecGen4 ops[4] = {
4979 { .fniv = gen_sqsub_vec,
4980 .fno = gen_helper_gvec_sqsub_b,
4981 .opt_opc = vecop_list,
4982 .write_aofs = true,
4983 .vece = MO_8 },
4984 { .fniv = gen_sqsub_vec,
4985 .fno = gen_helper_gvec_sqsub_h,
4986 .opt_opc = vecop_list,
4987 .write_aofs = true,
4988 .vece = MO_16 },
4989 { .fniv = gen_sqsub_vec,
4990 .fno = gen_helper_gvec_sqsub_s,
4991 .opt_opc = vecop_list,
4992 .write_aofs = true,
4993 .vece = MO_32 },
4994 { .fniv = gen_sqsub_vec,
4995 .fno = gen_helper_gvec_sqsub_d,
4996 .opt_opc = vecop_list,
4997 .write_aofs = true,
4998 .vece = MO_64 },
5000 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
5001 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5004 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5006 TCGv_i32 t = tcg_temp_new_i32();
5008 tcg_gen_sub_i32(t, a, b);
5009 tcg_gen_sub_i32(d, b, a);
5010 tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
5011 tcg_temp_free_i32(t);
5014 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5016 TCGv_i64 t = tcg_temp_new_i64();
5018 tcg_gen_sub_i64(t, a, b);
5019 tcg_gen_sub_i64(d, b, a);
5020 tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
5021 tcg_temp_free_i64(t);
5024 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5026 TCGv_vec t = tcg_temp_new_vec_matching(d);
5028 tcg_gen_smin_vec(vece, t, a, b);
5029 tcg_gen_smax_vec(vece, d, a, b);
5030 tcg_gen_sub_vec(vece, d, d, t);
5031 tcg_temp_free_vec(t);
5034 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5035 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5037 static const TCGOpcode vecop_list[] = {
5038 INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
5040 static const GVecGen3 ops[4] = {
5041 { .fniv = gen_sabd_vec,
5042 .fno = gen_helper_gvec_sabd_b,
5043 .opt_opc = vecop_list,
5044 .vece = MO_8 },
5045 { .fniv = gen_sabd_vec,
5046 .fno = gen_helper_gvec_sabd_h,
5047 .opt_opc = vecop_list,
5048 .vece = MO_16 },
5049 { .fni4 = gen_sabd_i32,
5050 .fniv = gen_sabd_vec,
5051 .fno = gen_helper_gvec_sabd_s,
5052 .opt_opc = vecop_list,
5053 .vece = MO_32 },
5054 { .fni8 = gen_sabd_i64,
5055 .fniv = gen_sabd_vec,
5056 .fno = gen_helper_gvec_sabd_d,
5057 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5058 .opt_opc = vecop_list,
5059 .vece = MO_64 },
5061 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5064 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5066 TCGv_i32 t = tcg_temp_new_i32();
5068 tcg_gen_sub_i32(t, a, b);
5069 tcg_gen_sub_i32(d, b, a);
5070 tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
5071 tcg_temp_free_i32(t);
5074 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5076 TCGv_i64 t = tcg_temp_new_i64();
5078 tcg_gen_sub_i64(t, a, b);
5079 tcg_gen_sub_i64(d, b, a);
5080 tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
5081 tcg_temp_free_i64(t);
5084 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5086 TCGv_vec t = tcg_temp_new_vec_matching(d);
5088 tcg_gen_umin_vec(vece, t, a, b);
5089 tcg_gen_umax_vec(vece, d, a, b);
5090 tcg_gen_sub_vec(vece, d, d, t);
5091 tcg_temp_free_vec(t);
5094 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5095 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5097 static const TCGOpcode vecop_list[] = {
5098 INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
5100 static const GVecGen3 ops[4] = {
5101 { .fniv = gen_uabd_vec,
5102 .fno = gen_helper_gvec_uabd_b,
5103 .opt_opc = vecop_list,
5104 .vece = MO_8 },
5105 { .fniv = gen_uabd_vec,
5106 .fno = gen_helper_gvec_uabd_h,
5107 .opt_opc = vecop_list,
5108 .vece = MO_16 },
5109 { .fni4 = gen_uabd_i32,
5110 .fniv = gen_uabd_vec,
5111 .fno = gen_helper_gvec_uabd_s,
5112 .opt_opc = vecop_list,
5113 .vece = MO_32 },
5114 { .fni8 = gen_uabd_i64,
5115 .fniv = gen_uabd_vec,
5116 .fno = gen_helper_gvec_uabd_d,
5117 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5118 .opt_opc = vecop_list,
5119 .vece = MO_64 },
5121 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5124 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5126 TCGv_i32 t = tcg_temp_new_i32();
5127 gen_sabd_i32(t, a, b);
5128 tcg_gen_add_i32(d, d, t);
5129 tcg_temp_free_i32(t);
5132 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5134 TCGv_i64 t = tcg_temp_new_i64();
5135 gen_sabd_i64(t, a, b);
5136 tcg_gen_add_i64(d, d, t);
5137 tcg_temp_free_i64(t);
5140 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5142 TCGv_vec t = tcg_temp_new_vec_matching(d);
5143 gen_sabd_vec(vece, t, a, b);
5144 tcg_gen_add_vec(vece, d, d, t);
5145 tcg_temp_free_vec(t);
5148 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5149 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5151 static const TCGOpcode vecop_list[] = {
5152 INDEX_op_sub_vec, INDEX_op_add_vec,
5153 INDEX_op_smin_vec, INDEX_op_smax_vec, 0
5155 static const GVecGen3 ops[4] = {
5156 { .fniv = gen_saba_vec,
5157 .fno = gen_helper_gvec_saba_b,
5158 .opt_opc = vecop_list,
5159 .load_dest = true,
5160 .vece = MO_8 },
5161 { .fniv = gen_saba_vec,
5162 .fno = gen_helper_gvec_saba_h,
5163 .opt_opc = vecop_list,
5164 .load_dest = true,
5165 .vece = MO_16 },
5166 { .fni4 = gen_saba_i32,
5167 .fniv = gen_saba_vec,
5168 .fno = gen_helper_gvec_saba_s,
5169 .opt_opc = vecop_list,
5170 .load_dest = true,
5171 .vece = MO_32 },
5172 { .fni8 = gen_saba_i64,
5173 .fniv = gen_saba_vec,
5174 .fno = gen_helper_gvec_saba_d,
5175 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5176 .opt_opc = vecop_list,
5177 .load_dest = true,
5178 .vece = MO_64 },
5180 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5183 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5185 TCGv_i32 t = tcg_temp_new_i32();
5186 gen_uabd_i32(t, a, b);
5187 tcg_gen_add_i32(d, d, t);
5188 tcg_temp_free_i32(t);
5191 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5193 TCGv_i64 t = tcg_temp_new_i64();
5194 gen_uabd_i64(t, a, b);
5195 tcg_gen_add_i64(d, d, t);
5196 tcg_temp_free_i64(t);
5199 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5201 TCGv_vec t = tcg_temp_new_vec_matching(d);
5202 gen_uabd_vec(vece, t, a, b);
5203 tcg_gen_add_vec(vece, d, d, t);
5204 tcg_temp_free_vec(t);
5207 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5208 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5210 static const TCGOpcode vecop_list[] = {
5211 INDEX_op_sub_vec, INDEX_op_add_vec,
5212 INDEX_op_umin_vec, INDEX_op_umax_vec, 0
5214 static const GVecGen3 ops[4] = {
5215 { .fniv = gen_uaba_vec,
5216 .fno = gen_helper_gvec_uaba_b,
5217 .opt_opc = vecop_list,
5218 .load_dest = true,
5219 .vece = MO_8 },
5220 { .fniv = gen_uaba_vec,
5221 .fno = gen_helper_gvec_uaba_h,
5222 .opt_opc = vecop_list,
5223 .load_dest = true,
5224 .vece = MO_16 },
5225 { .fni4 = gen_uaba_i32,
5226 .fniv = gen_uaba_vec,
5227 .fno = gen_helper_gvec_uaba_s,
5228 .opt_opc = vecop_list,
5229 .load_dest = true,
5230 .vece = MO_32 },
5231 { .fni8 = gen_uaba_i64,
5232 .fniv = gen_uaba_vec,
5233 .fno = gen_helper_gvec_uaba_d,
5234 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5235 .opt_opc = vecop_list,
5236 .load_dest = true,
5237 .vece = MO_64 },
5239 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5242 /* Translate a NEON data processing instruction. Return nonzero if the
5243 instruction is invalid.
5244 We process data in a mixture of 32-bit and 64-bit chunks.
5245 Mostly we use 32-bit chunks so we can use normal scalar instructions. */
5247 static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
5249 int op;
5250 int q;
5251 int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
5252 int size;
5253 int shift;
5254 int pass;
5255 int count;
5256 int u;
5257 int vec_size;
5258 uint32_t imm;
5259 TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
5260 TCGv_ptr ptr1, ptr2;
5261 TCGv_i64 tmp64;
5263 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
5264 return 1;
5267 /* FIXME: this access check should not take precedence over UNDEF
5268 * for invalid encodings; we will generate incorrect syndrome information
5269 * for attempts to execute invalid vfp/neon encodings with FP disabled.
5271 if (s->fp_excp_el) {
5272 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
5273 syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
5274 return 0;
5277 if (!s->vfp_enabled)
5278 return 1;
5279 q = (insn & (1 << 6)) != 0;
5280 u = (insn >> 24) & 1;
5281 VFP_DREG_D(rd, insn);
5282 VFP_DREG_N(rn, insn);
5283 VFP_DREG_M(rm, insn);
5284 size = (insn >> 20) & 3;
5285 vec_size = q ? 16 : 8;
5286 rd_ofs = neon_reg_offset(rd, 0);
5287 rn_ofs = neon_reg_offset(rn, 0);
5288 rm_ofs = neon_reg_offset(rm, 0);
5290 if ((insn & (1 << 23)) == 0) {
5291 /* Three register same length: handled by decodetree */
5292 return 1;
5293 } else if (insn & (1 << 4)) {
5294 if ((insn & 0x00380080) != 0) {
5295 /* Two registers and shift. */
5296 op = (insn >> 8) & 0xf;
5297 if (insn & (1 << 7)) {
5298 /* 64-bit shift. */
5299 if (op > 7) {
5300 return 1;
5302 size = 3;
5303 } else {
5304 size = 2;
5305 while ((insn & (1 << (size + 19))) == 0)
5306 size--;
5308 shift = (insn >> 16) & ((1 << (3 + size)) - 1);
5309 if (op < 8) {
5310 /* Shift by immediate:
5311 VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */
5312 if (q && ((rd | rm) & 1)) {
5313 return 1;
5315 if (!u && (op == 4 || op == 6)) {
5316 return 1;
5318 /* Right shifts are encoded as N - shift, where N is the
5319 element size in bits. */
5320 if (op <= 4) {
5321 shift = shift - (1 << (size + 3));
5324 switch (op) {
5325 case 0: /* VSHR */
5326 /* Right shift comes here negative. */
5327 shift = -shift;
5328 /* Shifts larger than the element size are architecturally
5329 * valid. Unsigned results in all zeros; signed results
5330 * in all sign bits.
5332 if (!u) {
5333 tcg_gen_gvec_sari(size, rd_ofs, rm_ofs,
5334 MIN(shift, (8 << size) - 1),
5335 vec_size, vec_size);
5336 } else if (shift >= 8 << size) {
5337 tcg_gen_gvec_dup_imm(MO_8, rd_ofs, vec_size,
5338 vec_size, 0);
5339 } else {
5340 tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift,
5341 vec_size, vec_size);
5343 return 0;
5345 case 1: /* VSRA */
5346 /* Right shift comes here negative. */
5347 shift = -shift;
5348 if (u) {
5349 gen_gvec_usra(size, rd_ofs, rm_ofs, shift,
5350 vec_size, vec_size);
5351 } else {
5352 gen_gvec_ssra(size, rd_ofs, rm_ofs, shift,
5353 vec_size, vec_size);
5355 return 0;
5357 case 2: /* VRSHR */
5358 /* Right shift comes here negative. */
5359 shift = -shift;
5360 if (u) {
5361 gen_gvec_urshr(size, rd_ofs, rm_ofs, shift,
5362 vec_size, vec_size);
5363 } else {
5364 gen_gvec_srshr(size, rd_ofs, rm_ofs, shift,
5365 vec_size, vec_size);
5367 return 0;
5369 case 3: /* VRSRA */
5370 /* Right shift comes here negative. */
5371 shift = -shift;
5372 if (u) {
5373 gen_gvec_ursra(size, rd_ofs, rm_ofs, shift,
5374 vec_size, vec_size);
5375 } else {
5376 gen_gvec_srsra(size, rd_ofs, rm_ofs, shift,
5377 vec_size, vec_size);
5379 return 0;
5381 case 4: /* VSRI */
5382 if (!u) {
5383 return 1;
5385 /* Right shift comes here negative. */
5386 shift = -shift;
5387 gen_gvec_sri(size, rd_ofs, rm_ofs, shift,
5388 vec_size, vec_size);
5389 return 0;
5391 case 5: /* VSHL, VSLI */
5392 if (u) { /* VSLI */
5393 gen_gvec_sli(size, rd_ofs, rm_ofs, shift,
5394 vec_size, vec_size);
5395 } else { /* VSHL */
5396 tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
5397 vec_size, vec_size);
5399 return 0;
5402 if (size == 3) {
5403 count = q + 1;
5404 } else {
5405 count = q ? 4: 2;
5408 /* To avoid excessive duplication of ops we implement shift
5409 * by immediate using the variable shift operations.
5411 imm = dup_const(size, shift);
5413 for (pass = 0; pass < count; pass++) {
5414 if (size == 3) {
5415 neon_load_reg64(cpu_V0, rm + pass);
5416 tcg_gen_movi_i64(cpu_V1, imm);
5417 switch (op) {
5418 case 6: /* VQSHLU */
5419 gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
5420 cpu_V0, cpu_V1);
5421 break;
5422 case 7: /* VQSHL */
5423 if (u) {
5424 gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5425 cpu_V0, cpu_V1);
5426 } else {
5427 gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5428 cpu_V0, cpu_V1);
5430 break;
5431 default:
5432 g_assert_not_reached();
5434 neon_store_reg64(cpu_V0, rd + pass);
5435 } else { /* size < 3 */
5436 /* Operands in T0 and T1. */
5437 tmp = neon_load_reg(rm, pass);
5438 tmp2 = tcg_temp_new_i32();
5439 tcg_gen_movi_i32(tmp2, imm);
5440 switch (op) {
5441 case 6: /* VQSHLU */
5442 switch (size) {
5443 case 0:
5444 gen_helper_neon_qshlu_s8(tmp, cpu_env,
5445 tmp, tmp2);
5446 break;
5447 case 1:
5448 gen_helper_neon_qshlu_s16(tmp, cpu_env,
5449 tmp, tmp2);
5450 break;
5451 case 2:
5452 gen_helper_neon_qshlu_s32(tmp, cpu_env,
5453 tmp, tmp2);
5454 break;
5455 default:
5456 abort();
5458 break;
5459 case 7: /* VQSHL */
5460 GEN_NEON_INTEGER_OP_ENV(qshl);
5461 break;
5462 default:
5463 g_assert_not_reached();
5465 tcg_temp_free_i32(tmp2);
5466 neon_store_reg(rd, pass, tmp);
5468 } /* for pass */
5469 } else if (op < 10) {
5470 /* Shift by immediate and narrow:
5471 VSHRN, VRSHRN, VQSHRN, VQRSHRN. */
5472 int input_unsigned = (op == 8) ? !u : u;
5473 if (rm & 1) {
5474 return 1;
5476 shift = shift - (1 << (size + 3));
5477 size++;
5478 if (size == 3) {
5479 tmp64 = tcg_const_i64(shift);
5480 neon_load_reg64(cpu_V0, rm);
5481 neon_load_reg64(cpu_V1, rm + 1);
5482 for (pass = 0; pass < 2; pass++) {
5483 TCGv_i64 in;
5484 if (pass == 0) {
5485 in = cpu_V0;
5486 } else {
5487 in = cpu_V1;
5489 if (q) {
5490 if (input_unsigned) {
5491 gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
5492 } else {
5493 gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
5495 } else {
5496 if (input_unsigned) {
5497 gen_ushl_i64(cpu_V0, in, tmp64);
5498 } else {
5499 gen_sshl_i64(cpu_V0, in, tmp64);
5502 tmp = tcg_temp_new_i32();
5503 gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5504 neon_store_reg(rd, pass, tmp);
5505 } /* for pass */
5506 tcg_temp_free_i64(tmp64);
5507 } else {
5508 if (size == 1) {
5509 imm = (uint16_t)shift;
5510 imm |= imm << 16;
5511 } else {
5512 /* size == 2 */
5513 imm = (uint32_t)shift;
5515 tmp2 = tcg_const_i32(imm);
5516 tmp4 = neon_load_reg(rm + 1, 0);
5517 tmp5 = neon_load_reg(rm + 1, 1);
5518 for (pass = 0; pass < 2; pass++) {
5519 if (pass == 0) {
5520 tmp = neon_load_reg(rm, 0);
5521 } else {
5522 tmp = tmp4;
5524 gen_neon_shift_narrow(size, tmp, tmp2, q,
5525 input_unsigned);
5526 if (pass == 0) {
5527 tmp3 = neon_load_reg(rm, 1);
5528 } else {
5529 tmp3 = tmp5;
5531 gen_neon_shift_narrow(size, tmp3, tmp2, q,
5532 input_unsigned);
5533 tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
5534 tcg_temp_free_i32(tmp);
5535 tcg_temp_free_i32(tmp3);
5536 tmp = tcg_temp_new_i32();
5537 gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5538 neon_store_reg(rd, pass, tmp);
5539 } /* for pass */
5540 tcg_temp_free_i32(tmp2);
5542 } else if (op == 10) {
5543 /* VSHLL, VMOVL */
5544 if (q || (rd & 1)) {
5545 return 1;
5547 tmp = neon_load_reg(rm, 0);
5548 tmp2 = neon_load_reg(rm, 1);
5549 for (pass = 0; pass < 2; pass++) {
5550 if (pass == 1)
5551 tmp = tmp2;
5553 gen_neon_widen(cpu_V0, tmp, size, u);
5555 if (shift != 0) {
5556 /* The shift is less than the width of the source
5557 type, so we can just shift the whole register. */
5558 tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
5559 /* Widen the result of shift: we need to clear
5560 * the potential overflow bits resulting from
5561 * left bits of the narrow input appearing as
5562 * right bits of left the neighbour narrow
5563 * input. */
5564 if (size < 2 || !u) {
5565 uint64_t imm64;
5566 if (size == 0) {
5567 imm = (0xffu >> (8 - shift));
5568 imm |= imm << 16;
5569 } else if (size == 1) {
5570 imm = 0xffff >> (16 - shift);
5571 } else {
5572 /* size == 2 */
5573 imm = 0xffffffff >> (32 - shift);
5575 if (size < 2) {
5576 imm64 = imm | (((uint64_t)imm) << 32);
5577 } else {
5578 imm64 = imm;
5580 tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
5583 neon_store_reg64(cpu_V0, rd + pass);
5585 } else if (op >= 14) {
5586 /* VCVT fixed-point. */
5587 TCGv_ptr fpst;
5588 TCGv_i32 shiftv;
5589 VFPGenFixPointFn *fn;
5591 if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
5592 return 1;
5595 if (!(op & 1)) {
5596 if (u) {
5597 fn = gen_helper_vfp_ultos;
5598 } else {
5599 fn = gen_helper_vfp_sltos;
5601 } else {
5602 if (u) {
5603 fn = gen_helper_vfp_touls_round_to_zero;
5604 } else {
5605 fn = gen_helper_vfp_tosls_round_to_zero;
5609 /* We have already masked out the must-be-1 top bit of imm6,
5610 * hence this 32-shift where the ARM ARM has 64-imm6.
5612 shift = 32 - shift;
5613 fpst = get_fpstatus_ptr(1);
5614 shiftv = tcg_const_i32(shift);
5615 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5616 TCGv_i32 tmpf = neon_load_reg(rm, pass);
5617 fn(tmpf, tmpf, shiftv, fpst);
5618 neon_store_reg(rd, pass, tmpf);
5620 tcg_temp_free_ptr(fpst);
5621 tcg_temp_free_i32(shiftv);
5622 } else {
5623 return 1;
5625 } else { /* (insn & 0x00380080) == 0 */
5626 int invert, reg_ofs, vec_size;
5628 if (q && (rd & 1)) {
5629 return 1;
5632 op = (insn >> 8) & 0xf;
5633 /* One register and immediate. */
5634 imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
5635 invert = (insn & (1 << 5)) != 0;
5636 /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
5637 * We choose to not special-case this and will behave as if a
5638 * valid constant encoding of 0 had been given.
5640 switch (op) {
5641 case 0: case 1:
5642 /* no-op */
5643 break;
5644 case 2: case 3:
5645 imm <<= 8;
5646 break;
5647 case 4: case 5:
5648 imm <<= 16;
5649 break;
5650 case 6: case 7:
5651 imm <<= 24;
5652 break;
5653 case 8: case 9:
5654 imm |= imm << 16;
5655 break;
5656 case 10: case 11:
5657 imm = (imm << 8) | (imm << 24);
5658 break;
5659 case 12:
5660 imm = (imm << 8) | 0xff;
5661 break;
5662 case 13:
5663 imm = (imm << 16) | 0xffff;
5664 break;
5665 case 14:
5666 imm |= (imm << 8) | (imm << 16) | (imm << 24);
5667 if (invert) {
5668 imm = ~imm;
5670 break;
5671 case 15:
5672 if (invert) {
5673 return 1;
5675 imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
5676 | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
5677 break;
5679 if (invert) {
5680 imm = ~imm;
5683 reg_ofs = neon_reg_offset(rd, 0);
5684 vec_size = q ? 16 : 8;
5686 if (op & 1 && op < 12) {
5687 if (invert) {
5688 /* The immediate value has already been inverted,
5689 * so BIC becomes AND.
5691 tcg_gen_gvec_andi(MO_32, reg_ofs, reg_ofs, imm,
5692 vec_size, vec_size);
5693 } else {
5694 tcg_gen_gvec_ori(MO_32, reg_ofs, reg_ofs, imm,
5695 vec_size, vec_size);
5697 } else {
5698 /* VMOV, VMVN. */
5699 if (op == 14 && invert) {
5700 TCGv_i64 t64 = tcg_temp_new_i64();
5702 for (pass = 0; pass <= q; ++pass) {
5703 uint64_t val = 0;
5704 int n;
5706 for (n = 0; n < 8; n++) {
5707 if (imm & (1 << (n + pass * 8))) {
5708 val |= 0xffull << (n * 8);
5711 tcg_gen_movi_i64(t64, val);
5712 neon_store_reg64(t64, rd + pass);
5714 tcg_temp_free_i64(t64);
5715 } else {
5716 tcg_gen_gvec_dup_imm(MO_32, reg_ofs, vec_size,
5717 vec_size, imm);
5721 } else { /* (insn & 0x00800010 == 0x00800000) */
5722 if (size != 3) {
5723 op = (insn >> 8) & 0xf;
5724 if ((insn & (1 << 6)) == 0) {
5725 /* Three registers of different lengths. */
5726 int src1_wide;
5727 int src2_wide;
5728 int prewiden;
5729 /* undefreq: bit 0 : UNDEF if size == 0
5730 * bit 1 : UNDEF if size == 1
5731 * bit 2 : UNDEF if size == 2
5732 * bit 3 : UNDEF if U == 1
5733 * Note that [2:0] set implies 'always UNDEF'
5735 int undefreq;
5736 /* prewiden, src1_wide, src2_wide, undefreq */
5737 static const int neon_3reg_wide[16][4] = {
5738 {1, 0, 0, 0}, /* VADDL */
5739 {1, 1, 0, 0}, /* VADDW */
5740 {1, 0, 0, 0}, /* VSUBL */
5741 {1, 1, 0, 0}, /* VSUBW */
5742 {0, 1, 1, 0}, /* VADDHN */
5743 {0, 0, 0, 0}, /* VABAL */
5744 {0, 1, 1, 0}, /* VSUBHN */
5745 {0, 0, 0, 0}, /* VABDL */
5746 {0, 0, 0, 0}, /* VMLAL */
5747 {0, 0, 0, 9}, /* VQDMLAL */
5748 {0, 0, 0, 0}, /* VMLSL */
5749 {0, 0, 0, 9}, /* VQDMLSL */
5750 {0, 0, 0, 0}, /* Integer VMULL */
5751 {0, 0, 0, 9}, /* VQDMULL */
5752 {0, 0, 0, 0xa}, /* Polynomial VMULL */
5753 {0, 0, 0, 7}, /* Reserved: always UNDEF */
5756 prewiden = neon_3reg_wide[op][0];
5757 src1_wide = neon_3reg_wide[op][1];
5758 src2_wide = neon_3reg_wide[op][2];
5759 undefreq = neon_3reg_wide[op][3];
5761 if ((undefreq & (1 << size)) ||
5762 ((undefreq & 8) && u)) {
5763 return 1;
5765 if ((src1_wide && (rn & 1)) ||
5766 (src2_wide && (rm & 1)) ||
5767 (!src2_wide && (rd & 1))) {
5768 return 1;
5771 /* Handle polynomial VMULL in a single pass. */
5772 if (op == 14) {
5773 if (size == 0) {
5774 /* VMULL.P8 */
5775 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
5776 0, gen_helper_neon_pmull_h);
5777 } else {
5778 /* VMULL.P64 */
5779 if (!dc_isar_feature(aa32_pmull, s)) {
5780 return 1;
5782 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
5783 0, gen_helper_gvec_pmull_q);
5785 return 0;
5788 /* Avoid overlapping operands. Wide source operands are
5789 always aligned so will never overlap with wide
5790 destinations in problematic ways. */
5791 if (rd == rm && !src2_wide) {
5792 tmp = neon_load_reg(rm, 1);
5793 neon_store_scratch(2, tmp);
5794 } else if (rd == rn && !src1_wide) {
5795 tmp = neon_load_reg(rn, 1);
5796 neon_store_scratch(2, tmp);
5798 tmp3 = NULL;
5799 for (pass = 0; pass < 2; pass++) {
5800 if (src1_wide) {
5801 neon_load_reg64(cpu_V0, rn + pass);
5802 tmp = NULL;
5803 } else {
5804 if (pass == 1 && rd == rn) {
5805 tmp = neon_load_scratch(2);
5806 } else {
5807 tmp = neon_load_reg(rn, pass);
5809 if (prewiden) {
5810 gen_neon_widen(cpu_V0, tmp, size, u);
5813 if (src2_wide) {
5814 neon_load_reg64(cpu_V1, rm + pass);
5815 tmp2 = NULL;
5816 } else {
5817 if (pass == 1 && rd == rm) {
5818 tmp2 = neon_load_scratch(2);
5819 } else {
5820 tmp2 = neon_load_reg(rm, pass);
5822 if (prewiden) {
5823 gen_neon_widen(cpu_V1, tmp2, size, u);
5826 switch (op) {
5827 case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
5828 gen_neon_addl(size);
5829 break;
5830 case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
5831 gen_neon_subl(size);
5832 break;
5833 case 5: case 7: /* VABAL, VABDL */
5834 switch ((size << 1) | u) {
5835 case 0:
5836 gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
5837 break;
5838 case 1:
5839 gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
5840 break;
5841 case 2:
5842 gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
5843 break;
5844 case 3:
5845 gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
5846 break;
5847 case 4:
5848 gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
5849 break;
5850 case 5:
5851 gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
5852 break;
5853 default: abort();
5855 tcg_temp_free_i32(tmp2);
5856 tcg_temp_free_i32(tmp);
5857 break;
5858 case 8: case 9: case 10: case 11: case 12: case 13:
5859 /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
5860 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
5861 break;
5862 default: /* 15 is RESERVED: caught earlier */
5863 abort();
5865 if (op == 13) {
5866 /* VQDMULL */
5867 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5868 neon_store_reg64(cpu_V0, rd + pass);
5869 } else if (op == 5 || (op >= 8 && op <= 11)) {
5870 /* Accumulate. */
5871 neon_load_reg64(cpu_V1, rd + pass);
5872 switch (op) {
5873 case 10: /* VMLSL */
5874 gen_neon_negl(cpu_V0, size);
5875 /* Fall through */
5876 case 5: case 8: /* VABAL, VMLAL */
5877 gen_neon_addl(size);
5878 break;
5879 case 9: case 11: /* VQDMLAL, VQDMLSL */
5880 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5881 if (op == 11) {
5882 gen_neon_negl(cpu_V0, size);
5884 gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
5885 break;
5886 default:
5887 abort();
5889 neon_store_reg64(cpu_V0, rd + pass);
5890 } else if (op == 4 || op == 6) {
5891 /* Narrowing operation. */
5892 tmp = tcg_temp_new_i32();
5893 if (!u) {
5894 switch (size) {
5895 case 0:
5896 gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
5897 break;
5898 case 1:
5899 gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
5900 break;
5901 case 2:
5902 tcg_gen_extrh_i64_i32(tmp, cpu_V0);
5903 break;
5904 default: abort();
5906 } else {
5907 switch (size) {
5908 case 0:
5909 gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
5910 break;
5911 case 1:
5912 gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
5913 break;
5914 case 2:
5915 tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
5916 tcg_gen_extrh_i64_i32(tmp, cpu_V0);
5917 break;
5918 default: abort();
5921 if (pass == 0) {
5922 tmp3 = tmp;
5923 } else {
5924 neon_store_reg(rd, 0, tmp3);
5925 neon_store_reg(rd, 1, tmp);
5927 } else {
5928 /* Write back the result. */
5929 neon_store_reg64(cpu_V0, rd + pass);
5932 } else {
5933 /* Two registers and a scalar. NB that for ops of this form
5934 * the ARM ARM labels bit 24 as Q, but it is in our variable
5935 * 'u', not 'q'.
5937 if (size == 0) {
5938 return 1;
5940 switch (op) {
5941 case 1: /* Float VMLA scalar */
5942 case 5: /* Floating point VMLS scalar */
5943 case 9: /* Floating point VMUL scalar */
5944 if (size == 1) {
5945 return 1;
5947 /* fall through */
5948 case 0: /* Integer VMLA scalar */
5949 case 4: /* Integer VMLS scalar */
5950 case 8: /* Integer VMUL scalar */
5951 case 12: /* VQDMULH scalar */
5952 case 13: /* VQRDMULH scalar */
5953 if (u && ((rd | rn) & 1)) {
5954 return 1;
5956 tmp = neon_get_scalar(size, rm);
5957 neon_store_scratch(0, tmp);
5958 for (pass = 0; pass < (u ? 4 : 2); pass++) {
5959 tmp = neon_load_scratch(0);
5960 tmp2 = neon_load_reg(rn, pass);
5961 if (op == 12) {
5962 if (size == 1) {
5963 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
5964 } else {
5965 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
5967 } else if (op == 13) {
5968 if (size == 1) {
5969 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
5970 } else {
5971 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
5973 } else if (op & 1) {
5974 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5975 gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
5976 tcg_temp_free_ptr(fpstatus);
5977 } else {
5978 switch (size) {
5979 case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
5980 case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
5981 case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
5982 default: abort();
5985 tcg_temp_free_i32(tmp2);
5986 if (op < 8) {
5987 /* Accumulate. */
5988 tmp2 = neon_load_reg(rd, pass);
5989 switch (op) {
5990 case 0:
5991 gen_neon_add(size, tmp, tmp2);
5992 break;
5993 case 1:
5995 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5996 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5997 tcg_temp_free_ptr(fpstatus);
5998 break;
6000 case 4:
6001 gen_neon_rsb(size, tmp, tmp2);
6002 break;
6003 case 5:
6005 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6006 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
6007 tcg_temp_free_ptr(fpstatus);
6008 break;
6010 default:
6011 abort();
6013 tcg_temp_free_i32(tmp2);
6015 neon_store_reg(rd, pass, tmp);
6017 break;
6018 case 3: /* VQDMLAL scalar */
6019 case 7: /* VQDMLSL scalar */
6020 case 11: /* VQDMULL scalar */
6021 if (u == 1) {
6022 return 1;
6024 /* fall through */
6025 case 2: /* VMLAL sclar */
6026 case 6: /* VMLSL scalar */
6027 case 10: /* VMULL scalar */
6028 if (rd & 1) {
6029 return 1;
6031 tmp2 = neon_get_scalar(size, rm);
6032 /* We need a copy of tmp2 because gen_neon_mull
6033 * deletes it during pass 0. */
6034 tmp4 = tcg_temp_new_i32();
6035 tcg_gen_mov_i32(tmp4, tmp2);
6036 tmp3 = neon_load_reg(rn, 1);
6038 for (pass = 0; pass < 2; pass++) {
6039 if (pass == 0) {
6040 tmp = neon_load_reg(rn, 0);
6041 } else {
6042 tmp = tmp3;
6043 tmp2 = tmp4;
6045 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
6046 if (op != 11) {
6047 neon_load_reg64(cpu_V1, rd + pass);
6049 switch (op) {
6050 case 6:
6051 gen_neon_negl(cpu_V0, size);
6052 /* Fall through */
6053 case 2:
6054 gen_neon_addl(size);
6055 break;
6056 case 3: case 7:
6057 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6058 if (op == 7) {
6059 gen_neon_negl(cpu_V0, size);
6061 gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
6062 break;
6063 case 10:
6064 /* no-op */
6065 break;
6066 case 11:
6067 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6068 break;
6069 default:
6070 abort();
6072 neon_store_reg64(cpu_V0, rd + pass);
6074 break;
6075 case 14: /* VQRDMLAH scalar */
6076 case 15: /* VQRDMLSH scalar */
6078 NeonGenThreeOpEnvFn *fn;
6080 if (!dc_isar_feature(aa32_rdm, s)) {
6081 return 1;
6083 if (u && ((rd | rn) & 1)) {
6084 return 1;
6086 if (op == 14) {
6087 if (size == 1) {
6088 fn = gen_helper_neon_qrdmlah_s16;
6089 } else {
6090 fn = gen_helper_neon_qrdmlah_s32;
6092 } else {
6093 if (size == 1) {
6094 fn = gen_helper_neon_qrdmlsh_s16;
6095 } else {
6096 fn = gen_helper_neon_qrdmlsh_s32;
6100 tmp2 = neon_get_scalar(size, rm);
6101 for (pass = 0; pass < (u ? 4 : 2); pass++) {
6102 tmp = neon_load_reg(rn, pass);
6103 tmp3 = neon_load_reg(rd, pass);
6104 fn(tmp, cpu_env, tmp, tmp2, tmp3);
6105 tcg_temp_free_i32(tmp3);
6106 neon_store_reg(rd, pass, tmp);
6108 tcg_temp_free_i32(tmp2);
6110 break;
6111 default:
6112 g_assert_not_reached();
6115 } else { /* size == 3 */
6116 if (!u) {
6117 /* Extract. */
6118 imm = (insn >> 8) & 0xf;
6120 if (imm > 7 && !q)
6121 return 1;
6123 if (q && ((rd | rn | rm) & 1)) {
6124 return 1;
6127 if (imm == 0) {
6128 neon_load_reg64(cpu_V0, rn);
6129 if (q) {
6130 neon_load_reg64(cpu_V1, rn + 1);
6132 } else if (imm == 8) {
6133 neon_load_reg64(cpu_V0, rn + 1);
6134 if (q) {
6135 neon_load_reg64(cpu_V1, rm);
6137 } else if (q) {
6138 tmp64 = tcg_temp_new_i64();
6139 if (imm < 8) {
6140 neon_load_reg64(cpu_V0, rn);
6141 neon_load_reg64(tmp64, rn + 1);
6142 } else {
6143 neon_load_reg64(cpu_V0, rn + 1);
6144 neon_load_reg64(tmp64, rm);
6146 tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
6147 tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
6148 tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6149 if (imm < 8) {
6150 neon_load_reg64(cpu_V1, rm);
6151 } else {
6152 neon_load_reg64(cpu_V1, rm + 1);
6153 imm -= 8;
6155 tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6156 tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
6157 tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
6158 tcg_temp_free_i64(tmp64);
6159 } else {
6160 /* BUGFIX */
6161 neon_load_reg64(cpu_V0, rn);
6162 tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
6163 neon_load_reg64(cpu_V1, rm);
6164 tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6165 tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6167 neon_store_reg64(cpu_V0, rd);
6168 if (q) {
6169 neon_store_reg64(cpu_V1, rd + 1);
6171 } else if ((insn & (1 << 11)) == 0) {
6172 /* Two register misc. */
6173 op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
6174 size = (insn >> 18) & 3;
6175 /* UNDEF for unknown op values and bad op-size combinations */
6176 if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
6177 return 1;
6179 if (neon_2rm_is_v8_op(op) &&
6180 !arm_dc_feature(s, ARM_FEATURE_V8)) {
6181 return 1;
6183 if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
6184 q && ((rm | rd) & 1)) {
6185 return 1;
6187 switch (op) {
6188 case NEON_2RM_VREV64:
6189 for (pass = 0; pass < (q ? 2 : 1); pass++) {
6190 tmp = neon_load_reg(rm, pass * 2);
6191 tmp2 = neon_load_reg(rm, pass * 2 + 1);
6192 switch (size) {
6193 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6194 case 1: gen_swap_half(tmp); break;
6195 case 2: /* no-op */ break;
6196 default: abort();
6198 neon_store_reg(rd, pass * 2 + 1, tmp);
6199 if (size == 2) {
6200 neon_store_reg(rd, pass * 2, tmp2);
6201 } else {
6202 switch (size) {
6203 case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
6204 case 1: gen_swap_half(tmp2); break;
6205 default: abort();
6207 neon_store_reg(rd, pass * 2, tmp2);
6210 break;
6211 case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
6212 case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
6213 for (pass = 0; pass < q + 1; pass++) {
6214 tmp = neon_load_reg(rm, pass * 2);
6215 gen_neon_widen(cpu_V0, tmp, size, op & 1);
6216 tmp = neon_load_reg(rm, pass * 2 + 1);
6217 gen_neon_widen(cpu_V1, tmp, size, op & 1);
6218 switch (size) {
6219 case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
6220 case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
6221 case 2: tcg_gen_add_i64(CPU_V001); break;
6222 default: abort();
6224 if (op >= NEON_2RM_VPADAL) {
6225 /* Accumulate. */
6226 neon_load_reg64(cpu_V1, rd + pass);
6227 gen_neon_addl(size);
6229 neon_store_reg64(cpu_V0, rd + pass);
6231 break;
6232 case NEON_2RM_VTRN:
6233 if (size == 2) {
6234 int n;
6235 for (n = 0; n < (q ? 4 : 2); n += 2) {
6236 tmp = neon_load_reg(rm, n);
6237 tmp2 = neon_load_reg(rd, n + 1);
6238 neon_store_reg(rm, n, tmp2);
6239 neon_store_reg(rd, n + 1, tmp);
6241 } else {
6242 goto elementwise;
6244 break;
6245 case NEON_2RM_VUZP:
6246 if (gen_neon_unzip(rd, rm, size, q)) {
6247 return 1;
6249 break;
6250 case NEON_2RM_VZIP:
6251 if (gen_neon_zip(rd, rm, size, q)) {
6252 return 1;
6254 break;
6255 case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
6256 /* also VQMOVUN; op field and mnemonics don't line up */
6257 if (rm & 1) {
6258 return 1;
6260 tmp2 = NULL;
6261 for (pass = 0; pass < 2; pass++) {
6262 neon_load_reg64(cpu_V0, rm + pass);
6263 tmp = tcg_temp_new_i32();
6264 gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
6265 tmp, cpu_V0);
6266 if (pass == 0) {
6267 tmp2 = tmp;
6268 } else {
6269 neon_store_reg(rd, 0, tmp2);
6270 neon_store_reg(rd, 1, tmp);
6273 break;
6274 case NEON_2RM_VSHLL:
6275 if (q || (rd & 1)) {
6276 return 1;
6278 tmp = neon_load_reg(rm, 0);
6279 tmp2 = neon_load_reg(rm, 1);
6280 for (pass = 0; pass < 2; pass++) {
6281 if (pass == 1)
6282 tmp = tmp2;
6283 gen_neon_widen(cpu_V0, tmp, size, 1);
6284 tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
6285 neon_store_reg64(cpu_V0, rd + pass);
6287 break;
6288 case NEON_2RM_VCVT_F16_F32:
6290 TCGv_ptr fpst;
6291 TCGv_i32 ahp;
6293 if (!dc_isar_feature(aa32_fp16_spconv, s) ||
6294 q || (rm & 1)) {
6295 return 1;
6297 fpst = get_fpstatus_ptr(true);
6298 ahp = get_ahp_flag();
6299 tmp = neon_load_reg(rm, 0);
6300 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
6301 tmp2 = neon_load_reg(rm, 1);
6302 gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
6303 tcg_gen_shli_i32(tmp2, tmp2, 16);
6304 tcg_gen_or_i32(tmp2, tmp2, tmp);
6305 tcg_temp_free_i32(tmp);
6306 tmp = neon_load_reg(rm, 2);
6307 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
6308 tmp3 = neon_load_reg(rm, 3);
6309 neon_store_reg(rd, 0, tmp2);
6310 gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
6311 tcg_gen_shli_i32(tmp3, tmp3, 16);
6312 tcg_gen_or_i32(tmp3, tmp3, tmp);
6313 neon_store_reg(rd, 1, tmp3);
6314 tcg_temp_free_i32(tmp);
6315 tcg_temp_free_i32(ahp);
6316 tcg_temp_free_ptr(fpst);
6317 break;
6319 case NEON_2RM_VCVT_F32_F16:
6321 TCGv_ptr fpst;
6322 TCGv_i32 ahp;
6323 if (!dc_isar_feature(aa32_fp16_spconv, s) ||
6324 q || (rd & 1)) {
6325 return 1;
6327 fpst = get_fpstatus_ptr(true);
6328 ahp = get_ahp_flag();
6329 tmp3 = tcg_temp_new_i32();
6330 tmp = neon_load_reg(rm, 0);
6331 tmp2 = neon_load_reg(rm, 1);
6332 tcg_gen_ext16u_i32(tmp3, tmp);
6333 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
6334 neon_store_reg(rd, 0, tmp3);
6335 tcg_gen_shri_i32(tmp, tmp, 16);
6336 gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
6337 neon_store_reg(rd, 1, tmp);
6338 tmp3 = tcg_temp_new_i32();
6339 tcg_gen_ext16u_i32(tmp3, tmp2);
6340 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
6341 neon_store_reg(rd, 2, tmp3);
6342 tcg_gen_shri_i32(tmp2, tmp2, 16);
6343 gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
6344 neon_store_reg(rd, 3, tmp2);
6345 tcg_temp_free_i32(ahp);
6346 tcg_temp_free_ptr(fpst);
6347 break;
6349 case NEON_2RM_AESE: case NEON_2RM_AESMC:
6350 if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) {
6351 return 1;
6353 ptr1 = vfp_reg_ptr(true, rd);
6354 ptr2 = vfp_reg_ptr(true, rm);
6356 /* Bit 6 is the lowest opcode bit; it distinguishes between
6357 * encryption (AESE/AESMC) and decryption (AESD/AESIMC)
6359 tmp3 = tcg_const_i32(extract32(insn, 6, 1));
6361 if (op == NEON_2RM_AESE) {
6362 gen_helper_crypto_aese(ptr1, ptr2, tmp3);
6363 } else {
6364 gen_helper_crypto_aesmc(ptr1, ptr2, tmp3);
6366 tcg_temp_free_ptr(ptr1);
6367 tcg_temp_free_ptr(ptr2);
6368 tcg_temp_free_i32(tmp3);
6369 break;
6370 case NEON_2RM_SHA1H:
6371 if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) {
6372 return 1;
6374 ptr1 = vfp_reg_ptr(true, rd);
6375 ptr2 = vfp_reg_ptr(true, rm);
6377 gen_helper_crypto_sha1h(ptr1, ptr2);
6379 tcg_temp_free_ptr(ptr1);
6380 tcg_temp_free_ptr(ptr2);
6381 break;
6382 case NEON_2RM_SHA1SU1:
6383 if ((rm | rd) & 1) {
6384 return 1;
6386 /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
6387 if (q) {
6388 if (!dc_isar_feature(aa32_sha2, s)) {
6389 return 1;
6391 } else if (!dc_isar_feature(aa32_sha1, s)) {
6392 return 1;
6394 ptr1 = vfp_reg_ptr(true, rd);
6395 ptr2 = vfp_reg_ptr(true, rm);
6396 if (q) {
6397 gen_helper_crypto_sha256su0(ptr1, ptr2);
6398 } else {
6399 gen_helper_crypto_sha1su1(ptr1, ptr2);
6401 tcg_temp_free_ptr(ptr1);
6402 tcg_temp_free_ptr(ptr2);
6403 break;
6405 case NEON_2RM_VMVN:
6406 tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size);
6407 break;
6408 case NEON_2RM_VNEG:
6409 tcg_gen_gvec_neg(size, rd_ofs, rm_ofs, vec_size, vec_size);
6410 break;
6411 case NEON_2RM_VABS:
6412 tcg_gen_gvec_abs(size, rd_ofs, rm_ofs, vec_size, vec_size);
6413 break;
6415 case NEON_2RM_VCEQ0:
6416 gen_gvec_ceq0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6417 break;
6418 case NEON_2RM_VCGT0:
6419 gen_gvec_cgt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6420 break;
6421 case NEON_2RM_VCLE0:
6422 gen_gvec_cle0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6423 break;
6424 case NEON_2RM_VCGE0:
6425 gen_gvec_cge0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6426 break;
6427 case NEON_2RM_VCLT0:
6428 gen_gvec_clt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6429 break;
6431 default:
6432 elementwise:
6433 for (pass = 0; pass < (q ? 4 : 2); pass++) {
6434 tmp = neon_load_reg(rm, pass);
6435 switch (op) {
6436 case NEON_2RM_VREV32:
6437 switch (size) {
6438 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6439 case 1: gen_swap_half(tmp); break;
6440 default: abort();
6442 break;
6443 case NEON_2RM_VREV16:
6444 gen_rev16(tmp, tmp);
6445 break;
6446 case NEON_2RM_VCLS:
6447 switch (size) {
6448 case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
6449 case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
6450 case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
6451 default: abort();
6453 break;
6454 case NEON_2RM_VCLZ:
6455 switch (size) {
6456 case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
6457 case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
6458 case 2: tcg_gen_clzi_i32(tmp, tmp, 32); break;
6459 default: abort();
6461 break;
6462 case NEON_2RM_VCNT:
6463 gen_helper_neon_cnt_u8(tmp, tmp);
6464 break;
6465 case NEON_2RM_VQABS:
6466 switch (size) {
6467 case 0:
6468 gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
6469 break;
6470 case 1:
6471 gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
6472 break;
6473 case 2:
6474 gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
6475 break;
6476 default: abort();
6478 break;
6479 case NEON_2RM_VQNEG:
6480 switch (size) {
6481 case 0:
6482 gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
6483 break;
6484 case 1:
6485 gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
6486 break;
6487 case 2:
6488 gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
6489 break;
6490 default: abort();
6492 break;
6493 case NEON_2RM_VCGT0_F:
6495 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6496 tmp2 = tcg_const_i32(0);
6497 gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
6498 tcg_temp_free_i32(tmp2);
6499 tcg_temp_free_ptr(fpstatus);
6500 break;
6502 case NEON_2RM_VCGE0_F:
6504 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6505 tmp2 = tcg_const_i32(0);
6506 gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
6507 tcg_temp_free_i32(tmp2);
6508 tcg_temp_free_ptr(fpstatus);
6509 break;
6511 case NEON_2RM_VCEQ0_F:
6513 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6514 tmp2 = tcg_const_i32(0);
6515 gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
6516 tcg_temp_free_i32(tmp2);
6517 tcg_temp_free_ptr(fpstatus);
6518 break;
6520 case NEON_2RM_VCLE0_F:
6522 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6523 tmp2 = tcg_const_i32(0);
6524 gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
6525 tcg_temp_free_i32(tmp2);
6526 tcg_temp_free_ptr(fpstatus);
6527 break;
6529 case NEON_2RM_VCLT0_F:
6531 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6532 tmp2 = tcg_const_i32(0);
6533 gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
6534 tcg_temp_free_i32(tmp2);
6535 tcg_temp_free_ptr(fpstatus);
6536 break;
6538 case NEON_2RM_VABS_F:
6539 gen_helper_vfp_abss(tmp, tmp);
6540 break;
6541 case NEON_2RM_VNEG_F:
6542 gen_helper_vfp_negs(tmp, tmp);
6543 break;
6544 case NEON_2RM_VSWP:
6545 tmp2 = neon_load_reg(rd, pass);
6546 neon_store_reg(rm, pass, tmp2);
6547 break;
6548 case NEON_2RM_VTRN:
6549 tmp2 = neon_load_reg(rd, pass);
6550 switch (size) {
6551 case 0: gen_neon_trn_u8(tmp, tmp2); break;
6552 case 1: gen_neon_trn_u16(tmp, tmp2); break;
6553 default: abort();
6555 neon_store_reg(rm, pass, tmp2);
6556 break;
6557 case NEON_2RM_VRINTN:
6558 case NEON_2RM_VRINTA:
6559 case NEON_2RM_VRINTM:
6560 case NEON_2RM_VRINTP:
6561 case NEON_2RM_VRINTZ:
6563 TCGv_i32 tcg_rmode;
6564 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6565 int rmode;
6567 if (op == NEON_2RM_VRINTZ) {
6568 rmode = FPROUNDING_ZERO;
6569 } else {
6570 rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
6573 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6574 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6575 cpu_env);
6576 gen_helper_rints(tmp, tmp, fpstatus);
6577 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6578 cpu_env);
6579 tcg_temp_free_ptr(fpstatus);
6580 tcg_temp_free_i32(tcg_rmode);
6581 break;
6583 case NEON_2RM_VRINTX:
6585 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6586 gen_helper_rints_exact(tmp, tmp, fpstatus);
6587 tcg_temp_free_ptr(fpstatus);
6588 break;
6590 case NEON_2RM_VCVTAU:
6591 case NEON_2RM_VCVTAS:
6592 case NEON_2RM_VCVTNU:
6593 case NEON_2RM_VCVTNS:
6594 case NEON_2RM_VCVTPU:
6595 case NEON_2RM_VCVTPS:
6596 case NEON_2RM_VCVTMU:
6597 case NEON_2RM_VCVTMS:
6599 bool is_signed = !extract32(insn, 7, 1);
6600 TCGv_ptr fpst = get_fpstatus_ptr(1);
6601 TCGv_i32 tcg_rmode, tcg_shift;
6602 int rmode = fp_decode_rm[extract32(insn, 8, 2)];
6604 tcg_shift = tcg_const_i32(0);
6605 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6606 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6607 cpu_env);
6609 if (is_signed) {
6610 gen_helper_vfp_tosls(tmp, tmp,
6611 tcg_shift, fpst);
6612 } else {
6613 gen_helper_vfp_touls(tmp, tmp,
6614 tcg_shift, fpst);
6617 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6618 cpu_env);
6619 tcg_temp_free_i32(tcg_rmode);
6620 tcg_temp_free_i32(tcg_shift);
6621 tcg_temp_free_ptr(fpst);
6622 break;
6624 case NEON_2RM_VRECPE:
6625 gen_helper_recpe_u32(tmp, tmp);
6626 break;
6627 case NEON_2RM_VRSQRTE:
6628 gen_helper_rsqrte_u32(tmp, tmp);
6629 break;
6630 case NEON_2RM_VRECPE_F:
6632 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6633 gen_helper_recpe_f32(tmp, tmp, fpstatus);
6634 tcg_temp_free_ptr(fpstatus);
6635 break;
6637 case NEON_2RM_VRSQRTE_F:
6639 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6640 gen_helper_rsqrte_f32(tmp, tmp, fpstatus);
6641 tcg_temp_free_ptr(fpstatus);
6642 break;
6644 case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
6646 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6647 gen_helper_vfp_sitos(tmp, tmp, fpstatus);
6648 tcg_temp_free_ptr(fpstatus);
6649 break;
6651 case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
6653 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6654 gen_helper_vfp_uitos(tmp, tmp, fpstatus);
6655 tcg_temp_free_ptr(fpstatus);
6656 break;
6658 case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
6660 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6661 gen_helper_vfp_tosizs(tmp, tmp, fpstatus);
6662 tcg_temp_free_ptr(fpstatus);
6663 break;
6665 case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
6667 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6668 gen_helper_vfp_touizs(tmp, tmp, fpstatus);
6669 tcg_temp_free_ptr(fpstatus);
6670 break;
6672 default:
6673 /* Reserved op values were caught by the
6674 * neon_2rm_sizes[] check earlier.
6676 abort();
6678 neon_store_reg(rd, pass, tmp);
6680 break;
6682 } else if ((insn & (1 << 10)) == 0) {
6683 /* VTBL, VTBX. */
6684 int n = ((insn >> 8) & 3) + 1;
6685 if ((rn + n) > 32) {
6686 /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
6687 * helper function running off the end of the register file.
6689 return 1;
6691 n <<= 3;
6692 if (insn & (1 << 6)) {
6693 tmp = neon_load_reg(rd, 0);
6694 } else {
6695 tmp = tcg_temp_new_i32();
6696 tcg_gen_movi_i32(tmp, 0);
6698 tmp2 = neon_load_reg(rm, 0);
6699 ptr1 = vfp_reg_ptr(true, rn);
6700 tmp5 = tcg_const_i32(n);
6701 gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5);
6702 tcg_temp_free_i32(tmp);
6703 if (insn & (1 << 6)) {
6704 tmp = neon_load_reg(rd, 1);
6705 } else {
6706 tmp = tcg_temp_new_i32();
6707 tcg_gen_movi_i32(tmp, 0);
6709 tmp3 = neon_load_reg(rm, 1);
6710 gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5);
6711 tcg_temp_free_i32(tmp5);
6712 tcg_temp_free_ptr(ptr1);
6713 neon_store_reg(rd, 0, tmp2);
6714 neon_store_reg(rd, 1, tmp3);
6715 tcg_temp_free_i32(tmp);
6716 } else if ((insn & 0x380) == 0) {
6717 /* VDUP */
6718 int element;
6719 MemOp size;
6721 if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
6722 return 1;
6724 if (insn & (1 << 16)) {
6725 size = MO_8;
6726 element = (insn >> 17) & 7;
6727 } else if (insn & (1 << 17)) {
6728 size = MO_16;
6729 element = (insn >> 18) & 3;
6730 } else {
6731 size = MO_32;
6732 element = (insn >> 19) & 1;
6734 tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0),
6735 neon_element_offset(rm, element, size),
6736 q ? 16 : 8, q ? 16 : 8);
6737 } else {
6738 return 1;
6742 return 0;
6745 static int disas_coproc_insn(DisasContext *s, uint32_t insn)
6747 int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
6748 const ARMCPRegInfo *ri;
6750 cpnum = (insn >> 8) & 0xf;
6752 /* First check for coprocessor space used for XScale/iwMMXt insns */
6753 if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cpnum < 2)) {
6754 if (extract32(s->c15_cpar, cpnum, 1) == 0) {
6755 return 1;
6757 if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
6758 return disas_iwmmxt_insn(s, insn);
6759 } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
6760 return disas_dsp_insn(s, insn);
6762 return 1;
6765 /* Otherwise treat as a generic register access */
6766 is64 = (insn & (1 << 25)) == 0;
6767 if (!is64 && ((insn & (1 << 4)) == 0)) {
6768 /* cdp */
6769 return 1;
6772 crm = insn & 0xf;
6773 if (is64) {
6774 crn = 0;
6775 opc1 = (insn >> 4) & 0xf;
6776 opc2 = 0;
6777 rt2 = (insn >> 16) & 0xf;
6778 } else {
6779 crn = (insn >> 16) & 0xf;
6780 opc1 = (insn >> 21) & 7;
6781 opc2 = (insn >> 5) & 7;
6782 rt2 = 0;
6784 isread = (insn >> 20) & 1;
6785 rt = (insn >> 12) & 0xf;
6787 ri = get_arm_cp_reginfo(s->cp_regs,
6788 ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
6789 if (ri) {
6790 bool need_exit_tb;
6792 /* Check access permissions */
6793 if (!cp_access_ok(s->current_el, ri, isread)) {
6794 return 1;
6797 if (s->hstr_active || ri->accessfn ||
6798 (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
6799 /* Emit code to perform further access permissions checks at
6800 * runtime; this may result in an exception.
6801 * Note that on XScale all cp0..c13 registers do an access check
6802 * call in order to handle c15_cpar.
6804 TCGv_ptr tmpptr;
6805 TCGv_i32 tcg_syn, tcg_isread;
6806 uint32_t syndrome;
6808 /* Note that since we are an implementation which takes an
6809 * exception on a trapped conditional instruction only if the
6810 * instruction passes its condition code check, we can take
6811 * advantage of the clause in the ARM ARM that allows us to set
6812 * the COND field in the instruction to 0xE in all cases.
6813 * We could fish the actual condition out of the insn (ARM)
6814 * or the condexec bits (Thumb) but it isn't necessary.
6816 switch (cpnum) {
6817 case 14:
6818 if (is64) {
6819 syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
6820 isread, false);
6821 } else {
6822 syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
6823 rt, isread, false);
6825 break;
6826 case 15:
6827 if (is64) {
6828 syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
6829 isread, false);
6830 } else {
6831 syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
6832 rt, isread, false);
6834 break;
6835 default:
6836 /* ARMv8 defines that only coprocessors 14 and 15 exist,
6837 * so this can only happen if this is an ARMv7 or earlier CPU,
6838 * in which case the syndrome information won't actually be
6839 * guest visible.
6841 assert(!arm_dc_feature(s, ARM_FEATURE_V8));
6842 syndrome = syn_uncategorized();
6843 break;
6846 gen_set_condexec(s);
6847 gen_set_pc_im(s, s->pc_curr);
6848 tmpptr = tcg_const_ptr(ri);
6849 tcg_syn = tcg_const_i32(syndrome);
6850 tcg_isread = tcg_const_i32(isread);
6851 gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn,
6852 tcg_isread);
6853 tcg_temp_free_ptr(tmpptr);
6854 tcg_temp_free_i32(tcg_syn);
6855 tcg_temp_free_i32(tcg_isread);
6856 } else if (ri->type & ARM_CP_RAISES_EXC) {
6858 * The readfn or writefn might raise an exception;
6859 * synchronize the CPU state in case it does.
6861 gen_set_condexec(s);
6862 gen_set_pc_im(s, s->pc_curr);
6865 /* Handle special cases first */
6866 switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
6867 case ARM_CP_NOP:
6868 return 0;
6869 case ARM_CP_WFI:
6870 if (isread) {
6871 return 1;
6873 gen_set_pc_im(s, s->base.pc_next);
6874 s->base.is_jmp = DISAS_WFI;
6875 return 0;
6876 default:
6877 break;
6880 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
6881 gen_io_start();
6884 if (isread) {
6885 /* Read */
6886 if (is64) {
6887 TCGv_i64 tmp64;
6888 TCGv_i32 tmp;
6889 if (ri->type & ARM_CP_CONST) {
6890 tmp64 = tcg_const_i64(ri->resetvalue);
6891 } else if (ri->readfn) {
6892 TCGv_ptr tmpptr;
6893 tmp64 = tcg_temp_new_i64();
6894 tmpptr = tcg_const_ptr(ri);
6895 gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
6896 tcg_temp_free_ptr(tmpptr);
6897 } else {
6898 tmp64 = tcg_temp_new_i64();
6899 tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
6901 tmp = tcg_temp_new_i32();
6902 tcg_gen_extrl_i64_i32(tmp, tmp64);
6903 store_reg(s, rt, tmp);
6904 tmp = tcg_temp_new_i32();
6905 tcg_gen_extrh_i64_i32(tmp, tmp64);
6906 tcg_temp_free_i64(tmp64);
6907 store_reg(s, rt2, tmp);
6908 } else {
6909 TCGv_i32 tmp;
6910 if (ri->type & ARM_CP_CONST) {
6911 tmp = tcg_const_i32(ri->resetvalue);
6912 } else if (ri->readfn) {
6913 TCGv_ptr tmpptr;
6914 tmp = tcg_temp_new_i32();
6915 tmpptr = tcg_const_ptr(ri);
6916 gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
6917 tcg_temp_free_ptr(tmpptr);
6918 } else {
6919 tmp = load_cpu_offset(ri->fieldoffset);
6921 if (rt == 15) {
6922 /* Destination register of r15 for 32 bit loads sets
6923 * the condition codes from the high 4 bits of the value
6925 gen_set_nzcv(tmp);
6926 tcg_temp_free_i32(tmp);
6927 } else {
6928 store_reg(s, rt, tmp);
6931 } else {
6932 /* Write */
6933 if (ri->type & ARM_CP_CONST) {
6934 /* If not forbidden by access permissions, treat as WI */
6935 return 0;
6938 if (is64) {
6939 TCGv_i32 tmplo, tmphi;
6940 TCGv_i64 tmp64 = tcg_temp_new_i64();
6941 tmplo = load_reg(s, rt);
6942 tmphi = load_reg(s, rt2);
6943 tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
6944 tcg_temp_free_i32(tmplo);
6945 tcg_temp_free_i32(tmphi);
6946 if (ri->writefn) {
6947 TCGv_ptr tmpptr = tcg_const_ptr(ri);
6948 gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
6949 tcg_temp_free_ptr(tmpptr);
6950 } else {
6951 tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
6953 tcg_temp_free_i64(tmp64);
6954 } else {
6955 if (ri->writefn) {
6956 TCGv_i32 tmp;
6957 TCGv_ptr tmpptr;
6958 tmp = load_reg(s, rt);
6959 tmpptr = tcg_const_ptr(ri);
6960 gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
6961 tcg_temp_free_ptr(tmpptr);
6962 tcg_temp_free_i32(tmp);
6963 } else {
6964 TCGv_i32 tmp = load_reg(s, rt);
6965 store_cpu_offset(tmp, ri->fieldoffset);
6970 /* I/O operations must end the TB here (whether read or write) */
6971 need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
6972 (ri->type & ARM_CP_IO));
6974 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
6976 * A write to any coprocessor register that ends a TB
6977 * must rebuild the hflags for the next TB.
6979 TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
6980 if (arm_dc_feature(s, ARM_FEATURE_M)) {
6981 gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
6982 } else {
6983 if (ri->type & ARM_CP_NEWEL) {
6984 gen_helper_rebuild_hflags_a32_newel(cpu_env);
6985 } else {
6986 gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
6989 tcg_temp_free_i32(tcg_el);
6991 * We default to ending the TB on a coprocessor register write,
6992 * but allow this to be suppressed by the register definition
6993 * (usually only necessary to work around guest bugs).
6995 need_exit_tb = true;
6997 if (need_exit_tb) {
6998 gen_lookup_tb(s);
7001 return 0;
7004 /* Unknown register; this might be a guest error or a QEMU
7005 * unimplemented feature.
7007 if (is64) {
7008 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7009 "64 bit system register cp:%d opc1: %d crm:%d "
7010 "(%s)\n",
7011 isread ? "read" : "write", cpnum, opc1, crm,
7012 s->ns ? "non-secure" : "secure");
7013 } else {
7014 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7015 "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
7016 "(%s)\n",
7017 isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
7018 s->ns ? "non-secure" : "secure");
7021 return 1;
7025 /* Store a 64-bit value to a register pair. Clobbers val. */
7026 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
7028 TCGv_i32 tmp;
7029 tmp = tcg_temp_new_i32();
7030 tcg_gen_extrl_i64_i32(tmp, val);
7031 store_reg(s, rlow, tmp);
7032 tmp = tcg_temp_new_i32();
7033 tcg_gen_extrh_i64_i32(tmp, val);
7034 store_reg(s, rhigh, tmp);
7037 /* load and add a 64-bit value from a register pair. */
7038 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
7040 TCGv_i64 tmp;
7041 TCGv_i32 tmpl;
7042 TCGv_i32 tmph;
7044 /* Load 64-bit value rd:rn. */
7045 tmpl = load_reg(s, rlow);
7046 tmph = load_reg(s, rhigh);
7047 tmp = tcg_temp_new_i64();
7048 tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
7049 tcg_temp_free_i32(tmpl);
7050 tcg_temp_free_i32(tmph);
7051 tcg_gen_add_i64(val, val, tmp);
7052 tcg_temp_free_i64(tmp);
7055 /* Set N and Z flags from hi|lo. */
7056 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
7058 tcg_gen_mov_i32(cpu_NF, hi);
7059 tcg_gen_or_i32(cpu_ZF, lo, hi);
7062 /* Load/Store exclusive instructions are implemented by remembering
7063 the value/address loaded, and seeing if these are the same
7064 when the store is performed. This should be sufficient to implement
7065 the architecturally mandated semantics, and avoids having to monitor
7066 regular stores. The compare vs the remembered value is done during
7067 the cmpxchg operation, but we must compare the addresses manually. */
7068 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
7069 TCGv_i32 addr, int size)
7071 TCGv_i32 tmp = tcg_temp_new_i32();
7072 MemOp opc = size | MO_ALIGN | s->be_data;
7074 s->is_ldex = true;
7076 if (size == 3) {
7077 TCGv_i32 tmp2 = tcg_temp_new_i32();
7078 TCGv_i64 t64 = tcg_temp_new_i64();
7080 /* For AArch32, architecturally the 32-bit word at the lowest
7081 * address is always Rt and the one at addr+4 is Rt2, even if
7082 * the CPU is big-endian. That means we don't want to do a
7083 * gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if
7084 * for an architecturally 64-bit access, but instead do a
7085 * 64-bit access using MO_BE if appropriate and then split
7086 * the two halves.
7087 * This only makes a difference for BE32 user-mode, where
7088 * frob64() must not flip the two halves of the 64-bit data
7089 * but this code must treat BE32 user-mode like BE32 system.
7091 TCGv taddr = gen_aa32_addr(s, addr, opc);
7093 tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
7094 tcg_temp_free(taddr);
7095 tcg_gen_mov_i64(cpu_exclusive_val, t64);
7096 if (s->be_data == MO_BE) {
7097 tcg_gen_extr_i64_i32(tmp2, tmp, t64);
7098 } else {
7099 tcg_gen_extr_i64_i32(tmp, tmp2, t64);
7101 tcg_temp_free_i64(t64);
7103 store_reg(s, rt2, tmp2);
7104 } else {
7105 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
7106 tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
7109 store_reg(s, rt, tmp);
7110 tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
7113 static void gen_clrex(DisasContext *s)
7115 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7118 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
7119 TCGv_i32 addr, int size)
7121 TCGv_i32 t0, t1, t2;
7122 TCGv_i64 extaddr;
7123 TCGv taddr;
7124 TCGLabel *done_label;
7125 TCGLabel *fail_label;
7126 MemOp opc = size | MO_ALIGN | s->be_data;
7128 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
7129 [addr] = {Rt};
7130 {Rd} = 0;
7131 } else {
7132 {Rd} = 1;
7133 } */
7134 fail_label = gen_new_label();
7135 done_label = gen_new_label();
7136 extaddr = tcg_temp_new_i64();
7137 tcg_gen_extu_i32_i64(extaddr, addr);
7138 tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
7139 tcg_temp_free_i64(extaddr);
7141 taddr = gen_aa32_addr(s, addr, opc);
7142 t0 = tcg_temp_new_i32();
7143 t1 = load_reg(s, rt);
7144 if (size == 3) {
7145 TCGv_i64 o64 = tcg_temp_new_i64();
7146 TCGv_i64 n64 = tcg_temp_new_i64();
7148 t2 = load_reg(s, rt2);
7149 /* For AArch32, architecturally the 32-bit word at the lowest
7150 * address is always Rt and the one at addr+4 is Rt2, even if
7151 * the CPU is big-endian. Since we're going to treat this as a
7152 * single 64-bit BE store, we need to put the two halves in the
7153 * opposite order for BE to LE, so that they end up in the right
7154 * places.
7155 * We don't want gen_aa32_frob64() because that does the wrong
7156 * thing for BE32 usermode.
7158 if (s->be_data == MO_BE) {
7159 tcg_gen_concat_i32_i64(n64, t2, t1);
7160 } else {
7161 tcg_gen_concat_i32_i64(n64, t1, t2);
7163 tcg_temp_free_i32(t2);
7165 tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
7166 get_mem_index(s), opc);
7167 tcg_temp_free_i64(n64);
7169 tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
7170 tcg_gen_extrl_i64_i32(t0, o64);
7172 tcg_temp_free_i64(o64);
7173 } else {
7174 t2 = tcg_temp_new_i32();
7175 tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
7176 tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
7177 tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
7178 tcg_temp_free_i32(t2);
7180 tcg_temp_free_i32(t1);
7181 tcg_temp_free(taddr);
7182 tcg_gen_mov_i32(cpu_R[rd], t0);
7183 tcg_temp_free_i32(t0);
7184 tcg_gen_br(done_label);
7186 gen_set_label(fail_label);
7187 tcg_gen_movi_i32(cpu_R[rd], 1);
7188 gen_set_label(done_label);
7189 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7192 /* gen_srs:
7193 * @env: CPUARMState
7194 * @s: DisasContext
7195 * @mode: mode field from insn (which stack to store to)
7196 * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
7197 * @writeback: true if writeback bit set
7199 * Generate code for the SRS (Store Return State) insn.
7201 static void gen_srs(DisasContext *s,
7202 uint32_t mode, uint32_t amode, bool writeback)
7204 int32_t offset;
7205 TCGv_i32 addr, tmp;
7206 bool undef = false;
7208 /* SRS is:
7209 * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
7210 * and specified mode is monitor mode
7211 * - UNDEFINED in Hyp mode
7212 * - UNPREDICTABLE in User or System mode
7213 * - UNPREDICTABLE if the specified mode is:
7214 * -- not implemented
7215 * -- not a valid mode number
7216 * -- a mode that's at a higher exception level
7217 * -- Monitor, if we are Non-secure
7218 * For the UNPREDICTABLE cases we choose to UNDEF.
7220 if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
7221 gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(), 3);
7222 return;
7225 if (s->current_el == 0 || s->current_el == 2) {
7226 undef = true;
7229 switch (mode) {
7230 case ARM_CPU_MODE_USR:
7231 case ARM_CPU_MODE_FIQ:
7232 case ARM_CPU_MODE_IRQ:
7233 case ARM_CPU_MODE_SVC:
7234 case ARM_CPU_MODE_ABT:
7235 case ARM_CPU_MODE_UND:
7236 case ARM_CPU_MODE_SYS:
7237 break;
7238 case ARM_CPU_MODE_HYP:
7239 if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
7240 undef = true;
7242 break;
7243 case ARM_CPU_MODE_MON:
7244 /* No need to check specifically for "are we non-secure" because
7245 * we've already made EL0 UNDEF and handled the trap for S-EL1;
7246 * so if this isn't EL3 then we must be non-secure.
7248 if (s->current_el != 3) {
7249 undef = true;
7251 break;
7252 default:
7253 undef = true;
7256 if (undef) {
7257 unallocated_encoding(s);
7258 return;
7261 addr = tcg_temp_new_i32();
7262 tmp = tcg_const_i32(mode);
7263 /* get_r13_banked() will raise an exception if called from System mode */
7264 gen_set_condexec(s);
7265 gen_set_pc_im(s, s->pc_curr);
7266 gen_helper_get_r13_banked(addr, cpu_env, tmp);
7267 tcg_temp_free_i32(tmp);
7268 switch (amode) {
7269 case 0: /* DA */
7270 offset = -4;
7271 break;
7272 case 1: /* IA */
7273 offset = 0;
7274 break;
7275 case 2: /* DB */
7276 offset = -8;
7277 break;
7278 case 3: /* IB */
7279 offset = 4;
7280 break;
7281 default:
7282 abort();
7284 tcg_gen_addi_i32(addr, addr, offset);
7285 tmp = load_reg(s, 14);
7286 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
7287 tcg_temp_free_i32(tmp);
7288 tmp = load_cpu_field(spsr);
7289 tcg_gen_addi_i32(addr, addr, 4);
7290 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
7291 tcg_temp_free_i32(tmp);
7292 if (writeback) {
7293 switch (amode) {
7294 case 0:
7295 offset = -8;
7296 break;
7297 case 1:
7298 offset = 4;
7299 break;
7300 case 2:
7301 offset = -4;
7302 break;
7303 case 3:
7304 offset = 0;
7305 break;
7306 default:
7307 abort();
7309 tcg_gen_addi_i32(addr, addr, offset);
7310 tmp = tcg_const_i32(mode);
7311 gen_helper_set_r13_banked(cpu_env, tmp, addr);
7312 tcg_temp_free_i32(tmp);
7314 tcg_temp_free_i32(addr);
7315 s->base.is_jmp = DISAS_UPDATE;
7318 /* Generate a label used for skipping this instruction */
7319 static void arm_gen_condlabel(DisasContext *s)
7321 if (!s->condjmp) {
7322 s->condlabel = gen_new_label();
7323 s->condjmp = 1;
7327 /* Skip this instruction if the ARM condition is false */
7328 static void arm_skip_unless(DisasContext *s, uint32_t cond)
7330 arm_gen_condlabel(s);
7331 arm_gen_test_cc(cond ^ 1, s->condlabel);
7336 * Constant expanders for the decoders.
7339 static int negate(DisasContext *s, int x)
7341 return -x;
7344 static int plus_2(DisasContext *s, int x)
7346 return x + 2;
7349 static int times_2(DisasContext *s, int x)
7351 return x * 2;
7354 static int times_4(DisasContext *s, int x)
7356 return x * 4;
7359 /* Return only the rotation part of T32ExpandImm. */
7360 static int t32_expandimm_rot(DisasContext *s, int x)
7362 return x & 0xc00 ? extract32(x, 7, 5) : 0;
7365 /* Return the unrotated immediate from T32ExpandImm. */
7366 static int t32_expandimm_imm(DisasContext *s, int x)
7368 int imm = extract32(x, 0, 8);
7370 switch (extract32(x, 8, 4)) {
7371 case 0: /* XY */
7372 /* Nothing to do. */
7373 break;
7374 case 1: /* 00XY00XY */
7375 imm *= 0x00010001;
7376 break;
7377 case 2: /* XY00XY00 */
7378 imm *= 0x01000100;
7379 break;
7380 case 3: /* XYXYXYXY */
7381 imm *= 0x01010101;
7382 break;
7383 default:
7384 /* Rotated constant. */
7385 imm |= 0x80;
7386 break;
7388 return imm;
7391 static int t32_branch24(DisasContext *s, int x)
7393 /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S. */
7394 x ^= !(x < 0) * (3 << 21);
7395 /* Append the final zero. */
7396 return x << 1;
7399 static int t16_setflags(DisasContext *s)
7401 return s->condexec_mask == 0;
7404 static int t16_push_list(DisasContext *s, int x)
7406 return (x & 0xff) | (x & 0x100) << (14 - 8);
7409 static int t16_pop_list(DisasContext *s, int x)
7411 return (x & 0xff) | (x & 0x100) << (15 - 8);
7415 * Include the generated decoders.
7418 #include "decode-a32.inc.c"
7419 #include "decode-a32-uncond.inc.c"
7420 #include "decode-t32.inc.c"
7421 #include "decode-t16.inc.c"
7423 /* Helpers to swap operands for reverse-subtract. */
7424 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
7426 tcg_gen_sub_i32(dst, b, a);
7429 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
7431 gen_sub_CC(dst, b, a);
7434 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
7436 gen_sub_carry(dest, b, a);
7439 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
7441 gen_sbc_CC(dest, b, a);
7445 * Helpers for the data processing routines.
7447 * After the computation store the results back.
7448 * This may be suppressed altogether (STREG_NONE), require a runtime
7449 * check against the stack limits (STREG_SP_CHECK), or generate an
7450 * exception return. Oh, or store into a register.
7452 * Always return true, indicating success for a trans_* function.
7454 typedef enum {
7455 STREG_NONE,
7456 STREG_NORMAL,
7457 STREG_SP_CHECK,
7458 STREG_EXC_RET,
7459 } StoreRegKind;
7461 static bool store_reg_kind(DisasContext *s, int rd,
7462 TCGv_i32 val, StoreRegKind kind)
7464 switch (kind) {
7465 case STREG_NONE:
7466 tcg_temp_free_i32(val);
7467 return true;
7468 case STREG_NORMAL:
7469 /* See ALUWritePC: Interworking only from a32 mode. */
7470 if (s->thumb) {
7471 store_reg(s, rd, val);
7472 } else {
7473 store_reg_bx(s, rd, val);
7475 return true;
7476 case STREG_SP_CHECK:
7477 store_sp_checked(s, val);
7478 return true;
7479 case STREG_EXC_RET:
7480 gen_exception_return(s, val);
7481 return true;
7483 g_assert_not_reached();
7487 * Data Processing (register)
7489 * Operate, with set flags, one register source,
7490 * one immediate shifted register source, and a destination.
7492 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
7493 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7494 int logic_cc, StoreRegKind kind)
7496 TCGv_i32 tmp1, tmp2;
7498 tmp2 = load_reg(s, a->rm);
7499 gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
7500 tmp1 = load_reg(s, a->rn);
7502 gen(tmp1, tmp1, tmp2);
7503 tcg_temp_free_i32(tmp2);
7505 if (logic_cc) {
7506 gen_logic_CC(tmp1);
7508 return store_reg_kind(s, a->rd, tmp1, kind);
7511 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
7512 void (*gen)(TCGv_i32, TCGv_i32),
7513 int logic_cc, StoreRegKind kind)
7515 TCGv_i32 tmp;
7517 tmp = load_reg(s, a->rm);
7518 gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
7520 gen(tmp, tmp);
7521 if (logic_cc) {
7522 gen_logic_CC(tmp);
7524 return store_reg_kind(s, a->rd, tmp, kind);
7528 * Data-processing (register-shifted register)
7530 * Operate, with set flags, one register source,
7531 * one register shifted register source, and a destination.
7533 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
7534 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7535 int logic_cc, StoreRegKind kind)
7537 TCGv_i32 tmp1, tmp2;
7539 tmp1 = load_reg(s, a->rs);
7540 tmp2 = load_reg(s, a->rm);
7541 gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
7542 tmp1 = load_reg(s, a->rn);
7544 gen(tmp1, tmp1, tmp2);
7545 tcg_temp_free_i32(tmp2);
7547 if (logic_cc) {
7548 gen_logic_CC(tmp1);
7550 return store_reg_kind(s, a->rd, tmp1, kind);
7553 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
7554 void (*gen)(TCGv_i32, TCGv_i32),
7555 int logic_cc, StoreRegKind kind)
7557 TCGv_i32 tmp1, tmp2;
7559 tmp1 = load_reg(s, a->rs);
7560 tmp2 = load_reg(s, a->rm);
7561 gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
7563 gen(tmp2, tmp2);
7564 if (logic_cc) {
7565 gen_logic_CC(tmp2);
7567 return store_reg_kind(s, a->rd, tmp2, kind);
7571 * Data-processing (immediate)
7573 * Operate, with set flags, one register source,
7574 * one rotated immediate, and a destination.
7576 * Note that logic_cc && a->rot setting CF based on the msb of the
7577 * immediate is the reason why we must pass in the unrotated form
7578 * of the immediate.
7580 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
7581 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7582 int logic_cc, StoreRegKind kind)
7584 TCGv_i32 tmp1, tmp2;
7585 uint32_t imm;
7587 imm = ror32(a->imm, a->rot);
7588 if (logic_cc && a->rot) {
7589 tcg_gen_movi_i32(cpu_CF, imm >> 31);
7591 tmp2 = tcg_const_i32(imm);
7592 tmp1 = load_reg(s, a->rn);
7594 gen(tmp1, tmp1, tmp2);
7595 tcg_temp_free_i32(tmp2);
7597 if (logic_cc) {
7598 gen_logic_CC(tmp1);
7600 return store_reg_kind(s, a->rd, tmp1, kind);
7603 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
7604 void (*gen)(TCGv_i32, TCGv_i32),
7605 int logic_cc, StoreRegKind kind)
7607 TCGv_i32 tmp;
7608 uint32_t imm;
7610 imm = ror32(a->imm, a->rot);
7611 if (logic_cc && a->rot) {
7612 tcg_gen_movi_i32(cpu_CF, imm >> 31);
7614 tmp = tcg_const_i32(imm);
7616 gen(tmp, tmp);
7617 if (logic_cc) {
7618 gen_logic_CC(tmp);
7620 return store_reg_kind(s, a->rd, tmp, kind);
7623 #define DO_ANY3(NAME, OP, L, K) \
7624 static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a) \
7625 { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); } \
7626 static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a) \
7627 { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); } \
7628 static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a) \
7629 { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
7631 #define DO_ANY2(NAME, OP, L, K) \
7632 static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a) \
7633 { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); } \
7634 static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a) \
7635 { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); } \
7636 static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a) \
7637 { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
7639 #define DO_CMP2(NAME, OP, L) \
7640 static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a) \
7641 { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); } \
7642 static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a) \
7643 { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); } \
7644 static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a) \
7645 { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
7647 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
7648 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
7649 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
7650 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
7652 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
7653 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
7654 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
7655 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
7657 DO_CMP2(TST, tcg_gen_and_i32, true)
7658 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
7659 DO_CMP2(CMN, gen_add_CC, false)
7660 DO_CMP2(CMP, gen_sub_CC, false)
7662 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
7663 a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
7666 * Note for the computation of StoreRegKind we return out of the
7667 * middle of the functions that are expanded by DO_ANY3, and that
7668 * we modify a->s via that parameter before it is used by OP.
7670 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
7672 StoreRegKind ret = STREG_NORMAL;
7673 if (a->rd == 15 && a->s) {
7675 * See ALUExceptionReturn:
7676 * In User mode, UNPREDICTABLE; we choose UNDEF.
7677 * In Hyp mode, UNDEFINED.
7679 if (IS_USER(s) || s->current_el == 2) {
7680 unallocated_encoding(s);
7681 return true;
7683 /* There is no writeback of nzcv to PSTATE. */
7684 a->s = 0;
7685 ret = STREG_EXC_RET;
7686 } else if (a->rd == 13 && a->rn == 13) {
7687 ret = STREG_SP_CHECK;
7689 ret;
7692 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
7694 StoreRegKind ret = STREG_NORMAL;
7695 if (a->rd == 15 && a->s) {
7697 * See ALUExceptionReturn:
7698 * In User mode, UNPREDICTABLE; we choose UNDEF.
7699 * In Hyp mode, UNDEFINED.
7701 if (IS_USER(s) || s->current_el == 2) {
7702 unallocated_encoding(s);
7703 return true;
7705 /* There is no writeback of nzcv to PSTATE. */
7706 a->s = 0;
7707 ret = STREG_EXC_RET;
7708 } else if (a->rd == 13) {
7709 ret = STREG_SP_CHECK;
7711 ret;
7714 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
7717 * ORN is only available with T32, so there is no register-shifted-register
7718 * form of the insn. Using the DO_ANY3 macro would create an unused function.
7720 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
7722 return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
7725 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
7727 return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
7730 #undef DO_ANY3
7731 #undef DO_ANY2
7732 #undef DO_CMP2
7734 static bool trans_ADR(DisasContext *s, arg_ri *a)
7736 store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
7737 return true;
7740 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
7742 TCGv_i32 tmp;
7744 if (!ENABLE_ARCH_6T2) {
7745 return false;
7748 tmp = tcg_const_i32(a->imm);
7749 store_reg(s, a->rd, tmp);
7750 return true;
7753 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
7755 TCGv_i32 tmp;
7757 if (!ENABLE_ARCH_6T2) {
7758 return false;
7761 tmp = load_reg(s, a->rd);
7762 tcg_gen_ext16u_i32(tmp, tmp);
7763 tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
7764 store_reg(s, a->rd, tmp);
7765 return true;
7769 * Multiply and multiply accumulate
7772 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
7774 TCGv_i32 t1, t2;
7776 t1 = load_reg(s, a->rn);
7777 t2 = load_reg(s, a->rm);
7778 tcg_gen_mul_i32(t1, t1, t2);
7779 tcg_temp_free_i32(t2);
7780 if (add) {
7781 t2 = load_reg(s, a->ra);
7782 tcg_gen_add_i32(t1, t1, t2);
7783 tcg_temp_free_i32(t2);
7785 if (a->s) {
7786 gen_logic_CC(t1);
7788 store_reg(s, a->rd, t1);
7789 return true;
7792 static bool trans_MUL(DisasContext *s, arg_MUL *a)
7794 return op_mla(s, a, false);
7797 static bool trans_MLA(DisasContext *s, arg_MLA *a)
7799 return op_mla(s, a, true);
7802 static bool trans_MLS(DisasContext *s, arg_MLS *a)
7804 TCGv_i32 t1, t2;
7806 if (!ENABLE_ARCH_6T2) {
7807 return false;
7809 t1 = load_reg(s, a->rn);
7810 t2 = load_reg(s, a->rm);
7811 tcg_gen_mul_i32(t1, t1, t2);
7812 tcg_temp_free_i32(t2);
7813 t2 = load_reg(s, a->ra);
7814 tcg_gen_sub_i32(t1, t2, t1);
7815 tcg_temp_free_i32(t2);
7816 store_reg(s, a->rd, t1);
7817 return true;
7820 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
7822 TCGv_i32 t0, t1, t2, t3;
7824 t0 = load_reg(s, a->rm);
7825 t1 = load_reg(s, a->rn);
7826 if (uns) {
7827 tcg_gen_mulu2_i32(t0, t1, t0, t1);
7828 } else {
7829 tcg_gen_muls2_i32(t0, t1, t0, t1);
7831 if (add) {
7832 t2 = load_reg(s, a->ra);
7833 t3 = load_reg(s, a->rd);
7834 tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
7835 tcg_temp_free_i32(t2);
7836 tcg_temp_free_i32(t3);
7838 if (a->s) {
7839 gen_logicq_cc(t0, t1);
7841 store_reg(s, a->ra, t0);
7842 store_reg(s, a->rd, t1);
7843 return true;
7846 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
7848 return op_mlal(s, a, true, false);
7851 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
7853 return op_mlal(s, a, false, false);
7856 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
7858 return op_mlal(s, a, true, true);
7861 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
7863 return op_mlal(s, a, false, true);
7866 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
7868 TCGv_i32 t0, t1, t2, zero;
7870 if (s->thumb
7871 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7872 : !ENABLE_ARCH_6) {
7873 return false;
7876 t0 = load_reg(s, a->rm);
7877 t1 = load_reg(s, a->rn);
7878 tcg_gen_mulu2_i32(t0, t1, t0, t1);
7879 zero = tcg_const_i32(0);
7880 t2 = load_reg(s, a->ra);
7881 tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
7882 tcg_temp_free_i32(t2);
7883 t2 = load_reg(s, a->rd);
7884 tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
7885 tcg_temp_free_i32(t2);
7886 tcg_temp_free_i32(zero);
7887 store_reg(s, a->ra, t0);
7888 store_reg(s, a->rd, t1);
7889 return true;
7893 * Saturating addition and subtraction
7896 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
7898 TCGv_i32 t0, t1;
7900 if (s->thumb
7901 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7902 : !ENABLE_ARCH_5TE) {
7903 return false;
7906 t0 = load_reg(s, a->rm);
7907 t1 = load_reg(s, a->rn);
7908 if (doub) {
7909 gen_helper_add_saturate(t1, cpu_env, t1, t1);
7911 if (add) {
7912 gen_helper_add_saturate(t0, cpu_env, t0, t1);
7913 } else {
7914 gen_helper_sub_saturate(t0, cpu_env, t0, t1);
7916 tcg_temp_free_i32(t1);
7917 store_reg(s, a->rd, t0);
7918 return true;
7921 #define DO_QADDSUB(NAME, ADD, DOUB) \
7922 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
7924 return op_qaddsub(s, a, ADD, DOUB); \
7927 DO_QADDSUB(QADD, true, false)
7928 DO_QADDSUB(QSUB, false, false)
7929 DO_QADDSUB(QDADD, true, true)
7930 DO_QADDSUB(QDSUB, false, true)
7932 #undef DO_QADDSUB
7935 * Halfword multiply and multiply accumulate
7938 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
7939 int add_long, bool nt, bool mt)
7941 TCGv_i32 t0, t1, tl, th;
7943 if (s->thumb
7944 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7945 : !ENABLE_ARCH_5TE) {
7946 return false;
7949 t0 = load_reg(s, a->rn);
7950 t1 = load_reg(s, a->rm);
7951 gen_mulxy(t0, t1, nt, mt);
7952 tcg_temp_free_i32(t1);
7954 switch (add_long) {
7955 case 0:
7956 store_reg(s, a->rd, t0);
7957 break;
7958 case 1:
7959 t1 = load_reg(s, a->ra);
7960 gen_helper_add_setq(t0, cpu_env, t0, t1);
7961 tcg_temp_free_i32(t1);
7962 store_reg(s, a->rd, t0);
7963 break;
7964 case 2:
7965 tl = load_reg(s, a->ra);
7966 th = load_reg(s, a->rd);
7967 /* Sign-extend the 32-bit product to 64 bits. */
7968 t1 = tcg_temp_new_i32();
7969 tcg_gen_sari_i32(t1, t0, 31);
7970 tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
7971 tcg_temp_free_i32(t0);
7972 tcg_temp_free_i32(t1);
7973 store_reg(s, a->ra, tl);
7974 store_reg(s, a->rd, th);
7975 break;
7976 default:
7977 g_assert_not_reached();
7979 return true;
7982 #define DO_SMLAX(NAME, add, nt, mt) \
7983 static bool trans_##NAME(DisasContext *s, arg_rrrr *a) \
7985 return op_smlaxxx(s, a, add, nt, mt); \
7988 DO_SMLAX(SMULBB, 0, 0, 0)
7989 DO_SMLAX(SMULBT, 0, 0, 1)
7990 DO_SMLAX(SMULTB, 0, 1, 0)
7991 DO_SMLAX(SMULTT, 0, 1, 1)
7993 DO_SMLAX(SMLABB, 1, 0, 0)
7994 DO_SMLAX(SMLABT, 1, 0, 1)
7995 DO_SMLAX(SMLATB, 1, 1, 0)
7996 DO_SMLAX(SMLATT, 1, 1, 1)
7998 DO_SMLAX(SMLALBB, 2, 0, 0)
7999 DO_SMLAX(SMLALBT, 2, 0, 1)
8000 DO_SMLAX(SMLALTB, 2, 1, 0)
8001 DO_SMLAX(SMLALTT, 2, 1, 1)
8003 #undef DO_SMLAX
8005 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
8007 TCGv_i32 t0, t1;
8009 if (!ENABLE_ARCH_5TE) {
8010 return false;
8013 t0 = load_reg(s, a->rn);
8014 t1 = load_reg(s, a->rm);
8016 * Since the nominal result is product<47:16>, shift the 16-bit
8017 * input up by 16 bits, so that the result is at product<63:32>.
8019 if (mt) {
8020 tcg_gen_andi_i32(t1, t1, 0xffff0000);
8021 } else {
8022 tcg_gen_shli_i32(t1, t1, 16);
8024 tcg_gen_muls2_i32(t0, t1, t0, t1);
8025 tcg_temp_free_i32(t0);
8026 if (add) {
8027 t0 = load_reg(s, a->ra);
8028 gen_helper_add_setq(t1, cpu_env, t1, t0);
8029 tcg_temp_free_i32(t0);
8031 store_reg(s, a->rd, t1);
8032 return true;
8035 #define DO_SMLAWX(NAME, add, mt) \
8036 static bool trans_##NAME(DisasContext *s, arg_rrrr *a) \
8038 return op_smlawx(s, a, add, mt); \
8041 DO_SMLAWX(SMULWB, 0, 0)
8042 DO_SMLAWX(SMULWT, 0, 1)
8043 DO_SMLAWX(SMLAWB, 1, 0)
8044 DO_SMLAWX(SMLAWT, 1, 1)
8046 #undef DO_SMLAWX
8049 * MSR (immediate) and hints
8052 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
8055 * When running single-threaded TCG code, use the helper to ensure that
8056 * the next round-robin scheduled vCPU gets a crack. When running in
8057 * MTTCG we don't generate jumps to the helper as it won't affect the
8058 * scheduling of other vCPUs.
8060 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
8061 gen_set_pc_im(s, s->base.pc_next);
8062 s->base.is_jmp = DISAS_YIELD;
8064 return true;
8067 static bool trans_WFE(DisasContext *s, arg_WFE *a)
8070 * When running single-threaded TCG code, use the helper to ensure that
8071 * the next round-robin scheduled vCPU gets a crack. In MTTCG mode we
8072 * just skip this instruction. Currently the SEV/SEVL instructions,
8073 * which are *one* of many ways to wake the CPU from WFE, are not
8074 * implemented so we can't sleep like WFI does.
8076 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
8077 gen_set_pc_im(s, s->base.pc_next);
8078 s->base.is_jmp = DISAS_WFE;
8080 return true;
8083 static bool trans_WFI(DisasContext *s, arg_WFI *a)
8085 /* For WFI, halt the vCPU until an IRQ. */
8086 gen_set_pc_im(s, s->base.pc_next);
8087 s->base.is_jmp = DISAS_WFI;
8088 return true;
8091 static bool trans_NOP(DisasContext *s, arg_NOP *a)
8093 return true;
8096 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
8098 uint32_t val = ror32(a->imm, a->rot * 2);
8099 uint32_t mask = msr_mask(s, a->mask, a->r);
8101 if (gen_set_psr_im(s, mask, a->r, val)) {
8102 unallocated_encoding(s);
8104 return true;
8108 * Cyclic Redundancy Check
8111 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
8113 TCGv_i32 t1, t2, t3;
8115 if (!dc_isar_feature(aa32_crc32, s)) {
8116 return false;
8119 t1 = load_reg(s, a->rn);
8120 t2 = load_reg(s, a->rm);
8121 switch (sz) {
8122 case MO_8:
8123 gen_uxtb(t2);
8124 break;
8125 case MO_16:
8126 gen_uxth(t2);
8127 break;
8128 case MO_32:
8129 break;
8130 default:
8131 g_assert_not_reached();
8133 t3 = tcg_const_i32(1 << sz);
8134 if (c) {
8135 gen_helper_crc32c(t1, t1, t2, t3);
8136 } else {
8137 gen_helper_crc32(t1, t1, t2, t3);
8139 tcg_temp_free_i32(t2);
8140 tcg_temp_free_i32(t3);
8141 store_reg(s, a->rd, t1);
8142 return true;
8145 #define DO_CRC32(NAME, c, sz) \
8146 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
8147 { return op_crc32(s, a, c, sz); }
8149 DO_CRC32(CRC32B, false, MO_8)
8150 DO_CRC32(CRC32H, false, MO_16)
8151 DO_CRC32(CRC32W, false, MO_32)
8152 DO_CRC32(CRC32CB, true, MO_8)
8153 DO_CRC32(CRC32CH, true, MO_16)
8154 DO_CRC32(CRC32CW, true, MO_32)
8156 #undef DO_CRC32
8159 * Miscellaneous instructions
8162 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
8164 if (arm_dc_feature(s, ARM_FEATURE_M)) {
8165 return false;
8167 gen_mrs_banked(s, a->r, a->sysm, a->rd);
8168 return true;
8171 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
8173 if (arm_dc_feature(s, ARM_FEATURE_M)) {
8174 return false;
8176 gen_msr_banked(s, a->r, a->sysm, a->rn);
8177 return true;
8180 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
8182 TCGv_i32 tmp;
8184 if (arm_dc_feature(s, ARM_FEATURE_M)) {
8185 return false;
8187 if (a->r) {
8188 if (IS_USER(s)) {
8189 unallocated_encoding(s);
8190 return true;
8192 tmp = load_cpu_field(spsr);
8193 } else {
8194 tmp = tcg_temp_new_i32();
8195 gen_helper_cpsr_read(tmp, cpu_env);
8197 store_reg(s, a->rd, tmp);
8198 return true;
8201 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
8203 TCGv_i32 tmp;
8204 uint32_t mask = msr_mask(s, a->mask, a->r);
8206 if (arm_dc_feature(s, ARM_FEATURE_M)) {
8207 return false;
8209 tmp = load_reg(s, a->rn);
8210 if (gen_set_psr(s, mask, a->r, tmp)) {
8211 unallocated_encoding(s);
8213 return true;
8216 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
8218 TCGv_i32 tmp;
8220 if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8221 return false;
8223 tmp = tcg_const_i32(a->sysm);
8224 gen_helper_v7m_mrs(tmp, cpu_env, tmp);
8225 store_reg(s, a->rd, tmp);
8226 return true;
8229 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
8231 TCGv_i32 addr, reg;
8233 if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8234 return false;
8236 addr = tcg_const_i32((a->mask << 10) | a->sysm);
8237 reg = load_reg(s, a->rn);
8238 gen_helper_v7m_msr(cpu_env, addr, reg);
8239 tcg_temp_free_i32(addr);
8240 tcg_temp_free_i32(reg);
8241 /* If we wrote to CONTROL, the EL might have changed */
8242 gen_helper_rebuild_hflags_m32_newel(cpu_env);
8243 gen_lookup_tb(s);
8244 return true;
8247 static bool trans_BX(DisasContext *s, arg_BX *a)
8249 if (!ENABLE_ARCH_4T) {
8250 return false;
8252 gen_bx_excret(s, load_reg(s, a->rm));
8253 return true;
8256 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
8258 if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
8259 return false;
8261 /* Trivial implementation equivalent to bx. */
8262 gen_bx(s, load_reg(s, a->rm));
8263 return true;
8266 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
8268 TCGv_i32 tmp;
8270 if (!ENABLE_ARCH_5) {
8271 return false;
8273 tmp = load_reg(s, a->rm);
8274 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
8275 gen_bx(s, tmp);
8276 return true;
8280 * BXNS/BLXNS: only exist for v8M with the security extensions,
8281 * and always UNDEF if NonSecure. We don't implement these in
8282 * the user-only mode either (in theory you can use them from
8283 * Secure User mode but they are too tied in to system emulation).
8285 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
8287 if (!s->v8m_secure || IS_USER_ONLY) {
8288 unallocated_encoding(s);
8289 } else {
8290 gen_bxns(s, a->rm);
8292 return true;
8295 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
8297 if (!s->v8m_secure || IS_USER_ONLY) {
8298 unallocated_encoding(s);
8299 } else {
8300 gen_blxns(s, a->rm);
8302 return true;
8305 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
8307 TCGv_i32 tmp;
8309 if (!ENABLE_ARCH_5) {
8310 return false;
8312 tmp = load_reg(s, a->rm);
8313 tcg_gen_clzi_i32(tmp, tmp, 32);
8314 store_reg(s, a->rd, tmp);
8315 return true;
8318 static bool trans_ERET(DisasContext *s, arg_ERET *a)
8320 TCGv_i32 tmp;
8322 if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
8323 return false;
8325 if (IS_USER(s)) {
8326 unallocated_encoding(s);
8327 return true;
8329 if (s->current_el == 2) {
8330 /* ERET from Hyp uses ELR_Hyp, not LR */
8331 tmp = load_cpu_field(elr_el[2]);
8332 } else {
8333 tmp = load_reg(s, 14);
8335 gen_exception_return(s, tmp);
8336 return true;
8339 static bool trans_HLT(DisasContext *s, arg_HLT *a)
8341 gen_hlt(s, a->imm);
8342 return true;
8345 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
8347 if (!ENABLE_ARCH_5) {
8348 return false;
8350 if (arm_dc_feature(s, ARM_FEATURE_M) &&
8351 semihosting_enabled() &&
8352 #ifndef CONFIG_USER_ONLY
8353 !IS_USER(s) &&
8354 #endif
8355 (a->imm == 0xab)) {
8356 gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
8357 } else {
8358 gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
8360 return true;
8363 static bool trans_HVC(DisasContext *s, arg_HVC *a)
8365 if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
8366 return false;
8368 if (IS_USER(s)) {
8369 unallocated_encoding(s);
8370 } else {
8371 gen_hvc(s, a->imm);
8373 return true;
8376 static bool trans_SMC(DisasContext *s, arg_SMC *a)
8378 if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
8379 return false;
8381 if (IS_USER(s)) {
8382 unallocated_encoding(s);
8383 } else {
8384 gen_smc(s);
8386 return true;
8389 static bool trans_SG(DisasContext *s, arg_SG *a)
8391 if (!arm_dc_feature(s, ARM_FEATURE_M) ||
8392 !arm_dc_feature(s, ARM_FEATURE_V8)) {
8393 return false;
8396 * SG (v8M only)
8397 * The bulk of the behaviour for this instruction is implemented
8398 * in v7m_handle_execute_nsc(), which deals with the insn when
8399 * it is executed by a CPU in non-secure state from memory
8400 * which is Secure & NonSecure-Callable.
8401 * Here we only need to handle the remaining cases:
8402 * * in NS memory (including the "security extension not
8403 * implemented" case) : NOP
8404 * * in S memory but CPU already secure (clear IT bits)
8405 * We know that the attribute for the memory this insn is
8406 * in must match the current CPU state, because otherwise
8407 * get_phys_addr_pmsav8 would have generated an exception.
8409 if (s->v8m_secure) {
8410 /* Like the IT insn, we don't need to generate any code */
8411 s->condexec_cond = 0;
8412 s->condexec_mask = 0;
8414 return true;
8417 static bool trans_TT(DisasContext *s, arg_TT *a)
8419 TCGv_i32 addr, tmp;
8421 if (!arm_dc_feature(s, ARM_FEATURE_M) ||
8422 !arm_dc_feature(s, ARM_FEATURE_V8)) {
8423 return false;
8425 if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
8426 /* We UNDEF for these UNPREDICTABLE cases */
8427 unallocated_encoding(s);
8428 return true;
8430 if (a->A && !s->v8m_secure) {
8431 /* This case is UNDEFINED. */
8432 unallocated_encoding(s);
8433 return true;
8436 addr = load_reg(s, a->rn);
8437 tmp = tcg_const_i32((a->A << 1) | a->T);
8438 gen_helper_v7m_tt(tmp, cpu_env, addr, tmp);
8439 tcg_temp_free_i32(addr);
8440 store_reg(s, a->rd, tmp);
8441 return true;
8445 * Load/store register index
8448 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
8450 ISSInfo ret;
8452 /* ISS not valid if writeback */
8453 if (p && !w) {
8454 ret = rd;
8455 if (s->base.pc_next - s->pc_curr == 2) {
8456 ret |= ISSIs16Bit;
8458 } else {
8459 ret = ISSInvalid;
8461 return ret;
8464 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
8466 TCGv_i32 addr = load_reg(s, a->rn);
8468 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8469 gen_helper_v8m_stackcheck(cpu_env, addr);
8472 if (a->p) {
8473 TCGv_i32 ofs = load_reg(s, a->rm);
8474 gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
8475 if (a->u) {
8476 tcg_gen_add_i32(addr, addr, ofs);
8477 } else {
8478 tcg_gen_sub_i32(addr, addr, ofs);
8480 tcg_temp_free_i32(ofs);
8482 return addr;
8485 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
8486 TCGv_i32 addr, int address_offset)
8488 if (!a->p) {
8489 TCGv_i32 ofs = load_reg(s, a->rm);
8490 gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
8491 if (a->u) {
8492 tcg_gen_add_i32(addr, addr, ofs);
8493 } else {
8494 tcg_gen_sub_i32(addr, addr, ofs);
8496 tcg_temp_free_i32(ofs);
8497 } else if (!a->w) {
8498 tcg_temp_free_i32(addr);
8499 return;
8501 tcg_gen_addi_i32(addr, addr, address_offset);
8502 store_reg(s, a->rn, addr);
8505 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
8506 MemOp mop, int mem_idx)
8508 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
8509 TCGv_i32 addr, tmp;
8511 addr = op_addr_rr_pre(s, a);
8513 tmp = tcg_temp_new_i32();
8514 gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8515 disas_set_da_iss(s, mop, issinfo);
8518 * Perform base writeback before the loaded value to
8519 * ensure correct behavior with overlapping index registers.
8521 op_addr_rr_post(s, a, addr, 0);
8522 store_reg_from_load(s, a->rt, tmp);
8523 return true;
8526 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
8527 MemOp mop, int mem_idx)
8529 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
8530 TCGv_i32 addr, tmp;
8532 addr = op_addr_rr_pre(s, a);
8534 tmp = load_reg(s, a->rt);
8535 gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8536 disas_set_da_iss(s, mop, issinfo);
8537 tcg_temp_free_i32(tmp);
8539 op_addr_rr_post(s, a, addr, 0);
8540 return true;
8543 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
8545 int mem_idx = get_mem_index(s);
8546 TCGv_i32 addr, tmp;
8548 if (!ENABLE_ARCH_5TE) {
8549 return false;
8551 if (a->rt & 1) {
8552 unallocated_encoding(s);
8553 return true;
8555 addr = op_addr_rr_pre(s, a);
8557 tmp = tcg_temp_new_i32();
8558 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8559 store_reg(s, a->rt, tmp);
8561 tcg_gen_addi_i32(addr, addr, 4);
8563 tmp = tcg_temp_new_i32();
8564 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8565 store_reg(s, a->rt + 1, tmp);
8567 /* LDRD w/ base writeback is undefined if the registers overlap. */
8568 op_addr_rr_post(s, a, addr, -4);
8569 return true;
8572 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
8574 int mem_idx = get_mem_index(s);
8575 TCGv_i32 addr, tmp;
8577 if (!ENABLE_ARCH_5TE) {
8578 return false;
8580 if (a->rt & 1) {
8581 unallocated_encoding(s);
8582 return true;
8584 addr = op_addr_rr_pre(s, a);
8586 tmp = load_reg(s, a->rt);
8587 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8588 tcg_temp_free_i32(tmp);
8590 tcg_gen_addi_i32(addr, addr, 4);
8592 tmp = load_reg(s, a->rt + 1);
8593 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8594 tcg_temp_free_i32(tmp);
8596 op_addr_rr_post(s, a, addr, -4);
8597 return true;
8601 * Load/store immediate index
8604 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
8606 int ofs = a->imm;
8608 if (!a->u) {
8609 ofs = -ofs;
8612 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8614 * Stackcheck. Here we know 'addr' is the current SP;
8615 * U is set if we're moving SP up, else down. It is
8616 * UNKNOWN whether the limit check triggers when SP starts
8617 * below the limit and ends up above it; we chose to do so.
8619 if (!a->u) {
8620 TCGv_i32 newsp = tcg_temp_new_i32();
8621 tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
8622 gen_helper_v8m_stackcheck(cpu_env, newsp);
8623 tcg_temp_free_i32(newsp);
8624 } else {
8625 gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
8629 return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
8632 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
8633 TCGv_i32 addr, int address_offset)
8635 if (!a->p) {
8636 if (a->u) {
8637 address_offset += a->imm;
8638 } else {
8639 address_offset -= a->imm;
8641 } else if (!a->w) {
8642 tcg_temp_free_i32(addr);
8643 return;
8645 tcg_gen_addi_i32(addr, addr, address_offset);
8646 store_reg(s, a->rn, addr);
8649 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
8650 MemOp mop, int mem_idx)
8652 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
8653 TCGv_i32 addr, tmp;
8655 addr = op_addr_ri_pre(s, a);
8657 tmp = tcg_temp_new_i32();
8658 gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8659 disas_set_da_iss(s, mop, issinfo);
8662 * Perform base writeback before the loaded value to
8663 * ensure correct behavior with overlapping index registers.
8665 op_addr_ri_post(s, a, addr, 0);
8666 store_reg_from_load(s, a->rt, tmp);
8667 return true;
8670 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
8671 MemOp mop, int mem_idx)
8673 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
8674 TCGv_i32 addr, tmp;
8676 addr = op_addr_ri_pre(s, a);
8678 tmp = load_reg(s, a->rt);
8679 gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8680 disas_set_da_iss(s, mop, issinfo);
8681 tcg_temp_free_i32(tmp);
8683 op_addr_ri_post(s, a, addr, 0);
8684 return true;
8687 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
8689 int mem_idx = get_mem_index(s);
8690 TCGv_i32 addr, tmp;
8692 addr = op_addr_ri_pre(s, a);
8694 tmp = tcg_temp_new_i32();
8695 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8696 store_reg(s, a->rt, tmp);
8698 tcg_gen_addi_i32(addr, addr, 4);
8700 tmp = tcg_temp_new_i32();
8701 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8702 store_reg(s, rt2, tmp);
8704 /* LDRD w/ base writeback is undefined if the registers overlap. */
8705 op_addr_ri_post(s, a, addr, -4);
8706 return true;
8709 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
8711 if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
8712 return false;
8714 return op_ldrd_ri(s, a, a->rt + 1);
8717 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
8719 arg_ldst_ri b = {
8720 .u = a->u, .w = a->w, .p = a->p,
8721 .rn = a->rn, .rt = a->rt, .imm = a->imm
8723 return op_ldrd_ri(s, &b, a->rt2);
8726 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
8728 int mem_idx = get_mem_index(s);
8729 TCGv_i32 addr, tmp;
8731 addr = op_addr_ri_pre(s, a);
8733 tmp = load_reg(s, a->rt);
8734 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8735 tcg_temp_free_i32(tmp);
8737 tcg_gen_addi_i32(addr, addr, 4);
8739 tmp = load_reg(s, rt2);
8740 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8741 tcg_temp_free_i32(tmp);
8743 op_addr_ri_post(s, a, addr, -4);
8744 return true;
8747 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
8749 if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
8750 return false;
8752 return op_strd_ri(s, a, a->rt + 1);
8755 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
8757 arg_ldst_ri b = {
8758 .u = a->u, .w = a->w, .p = a->p,
8759 .rn = a->rn, .rt = a->rt, .imm = a->imm
8761 return op_strd_ri(s, &b, a->rt2);
8764 #define DO_LDST(NAME, WHICH, MEMOP) \
8765 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a) \
8767 return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s)); \
8769 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a) \
8771 return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s)); \
8773 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a) \
8775 return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s)); \
8777 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a) \
8779 return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s)); \
8782 DO_LDST(LDR, load, MO_UL)
8783 DO_LDST(LDRB, load, MO_UB)
8784 DO_LDST(LDRH, load, MO_UW)
8785 DO_LDST(LDRSB, load, MO_SB)
8786 DO_LDST(LDRSH, load, MO_SW)
8788 DO_LDST(STR, store, MO_UL)
8789 DO_LDST(STRB, store, MO_UB)
8790 DO_LDST(STRH, store, MO_UW)
8792 #undef DO_LDST
8795 * Synchronization primitives
8798 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
8800 TCGv_i32 addr, tmp;
8801 TCGv taddr;
8803 opc |= s->be_data;
8804 addr = load_reg(s, a->rn);
8805 taddr = gen_aa32_addr(s, addr, opc);
8806 tcg_temp_free_i32(addr);
8808 tmp = load_reg(s, a->rt2);
8809 tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
8810 tcg_temp_free(taddr);
8812 store_reg(s, a->rt, tmp);
8813 return true;
8816 static bool trans_SWP(DisasContext *s, arg_SWP *a)
8818 return op_swp(s, a, MO_UL | MO_ALIGN);
8821 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
8823 return op_swp(s, a, MO_UB);
8827 * Load/Store Exclusive and Load-Acquire/Store-Release
8830 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
8832 TCGv_i32 addr;
8833 /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
8834 bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
8836 /* We UNDEF for these UNPREDICTABLE cases. */
8837 if (a->rd == 15 || a->rn == 15 || a->rt == 15
8838 || a->rd == a->rn || a->rd == a->rt
8839 || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
8840 || (mop == MO_64
8841 && (a->rt2 == 15
8842 || a->rd == a->rt2
8843 || (!v8a && s->thumb && a->rt2 == 13)))) {
8844 unallocated_encoding(s);
8845 return true;
8848 if (rel) {
8849 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
8852 addr = tcg_temp_local_new_i32();
8853 load_reg_var(s, addr, a->rn);
8854 tcg_gen_addi_i32(addr, addr, a->imm);
8856 gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
8857 tcg_temp_free_i32(addr);
8858 return true;
8861 static bool trans_STREX(DisasContext *s, arg_STREX *a)
8863 if (!ENABLE_ARCH_6) {
8864 return false;
8866 return op_strex(s, a, MO_32, false);
8869 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
8871 if (!ENABLE_ARCH_6K) {
8872 return false;
8874 /* We UNDEF for these UNPREDICTABLE cases. */
8875 if (a->rt & 1) {
8876 unallocated_encoding(s);
8877 return true;
8879 a->rt2 = a->rt + 1;
8880 return op_strex(s, a, MO_64, false);
8883 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
8885 return op_strex(s, a, MO_64, false);
8888 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
8890 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
8891 return false;
8893 return op_strex(s, a, MO_8, false);
8896 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
8898 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
8899 return false;
8901 return op_strex(s, a, MO_16, false);
8904 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
8906 if (!ENABLE_ARCH_8) {
8907 return false;
8909 return op_strex(s, a, MO_32, true);
8912 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
8914 if (!ENABLE_ARCH_8) {
8915 return false;
8917 /* We UNDEF for these UNPREDICTABLE cases. */
8918 if (a->rt & 1) {
8919 unallocated_encoding(s);
8920 return true;
8922 a->rt2 = a->rt + 1;
8923 return op_strex(s, a, MO_64, true);
8926 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
8928 if (!ENABLE_ARCH_8) {
8929 return false;
8931 return op_strex(s, a, MO_64, true);
8934 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
8936 if (!ENABLE_ARCH_8) {
8937 return false;
8939 return op_strex(s, a, MO_8, true);
8942 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
8944 if (!ENABLE_ARCH_8) {
8945 return false;
8947 return op_strex(s, a, MO_16, true);
8950 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
8952 TCGv_i32 addr, tmp;
8954 if (!ENABLE_ARCH_8) {
8955 return false;
8957 /* We UNDEF for these UNPREDICTABLE cases. */
8958 if (a->rn == 15 || a->rt == 15) {
8959 unallocated_encoding(s);
8960 return true;
8963 addr = load_reg(s, a->rn);
8964 tmp = load_reg(s, a->rt);
8965 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
8966 gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
8967 disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
8969 tcg_temp_free_i32(tmp);
8970 tcg_temp_free_i32(addr);
8971 return true;
8974 static bool trans_STL(DisasContext *s, arg_STL *a)
8976 return op_stl(s, a, MO_UL);
8979 static bool trans_STLB(DisasContext *s, arg_STL *a)
8981 return op_stl(s, a, MO_UB);
8984 static bool trans_STLH(DisasContext *s, arg_STL *a)
8986 return op_stl(s, a, MO_UW);
8989 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
8991 TCGv_i32 addr;
8992 /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
8993 bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
8995 /* We UNDEF for these UNPREDICTABLE cases. */
8996 if (a->rn == 15 || a->rt == 15
8997 || (!v8a && s->thumb && a->rt == 13)
8998 || (mop == MO_64
8999 && (a->rt2 == 15 || a->rt == a->rt2
9000 || (!v8a && s->thumb && a->rt2 == 13)))) {
9001 unallocated_encoding(s);
9002 return true;
9005 addr = tcg_temp_local_new_i32();
9006 load_reg_var(s, addr, a->rn);
9007 tcg_gen_addi_i32(addr, addr, a->imm);
9009 gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
9010 tcg_temp_free_i32(addr);
9012 if (acq) {
9013 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
9015 return true;
9018 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
9020 if (!ENABLE_ARCH_6) {
9021 return false;
9023 return op_ldrex(s, a, MO_32, false);
9026 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
9028 if (!ENABLE_ARCH_6K) {
9029 return false;
9031 /* We UNDEF for these UNPREDICTABLE cases. */
9032 if (a->rt & 1) {
9033 unallocated_encoding(s);
9034 return true;
9036 a->rt2 = a->rt + 1;
9037 return op_ldrex(s, a, MO_64, false);
9040 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
9042 return op_ldrex(s, a, MO_64, false);
9045 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
9047 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9048 return false;
9050 return op_ldrex(s, a, MO_8, false);
9053 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
9055 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9056 return false;
9058 return op_ldrex(s, a, MO_16, false);
9061 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
9063 if (!ENABLE_ARCH_8) {
9064 return false;
9066 return op_ldrex(s, a, MO_32, true);
9069 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
9071 if (!ENABLE_ARCH_8) {
9072 return false;
9074 /* We UNDEF for these UNPREDICTABLE cases. */
9075 if (a->rt & 1) {
9076 unallocated_encoding(s);
9077 return true;
9079 a->rt2 = a->rt + 1;
9080 return op_ldrex(s, a, MO_64, true);
9083 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
9085 if (!ENABLE_ARCH_8) {
9086 return false;
9088 return op_ldrex(s, a, MO_64, true);
9091 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
9093 if (!ENABLE_ARCH_8) {
9094 return false;
9096 return op_ldrex(s, a, MO_8, true);
9099 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
9101 if (!ENABLE_ARCH_8) {
9102 return false;
9104 return op_ldrex(s, a, MO_16, true);
9107 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
9109 TCGv_i32 addr, tmp;
9111 if (!ENABLE_ARCH_8) {
9112 return false;
9114 /* We UNDEF for these UNPREDICTABLE cases. */
9115 if (a->rn == 15 || a->rt == 15) {
9116 unallocated_encoding(s);
9117 return true;
9120 addr = load_reg(s, a->rn);
9121 tmp = tcg_temp_new_i32();
9122 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
9123 disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
9124 tcg_temp_free_i32(addr);
9126 store_reg(s, a->rt, tmp);
9127 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
9128 return true;
9131 static bool trans_LDA(DisasContext *s, arg_LDA *a)
9133 return op_lda(s, a, MO_UL);
9136 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
9138 return op_lda(s, a, MO_UB);
9141 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
9143 return op_lda(s, a, MO_UW);
9147 * Media instructions
9150 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
9152 TCGv_i32 t1, t2;
9154 if (!ENABLE_ARCH_6) {
9155 return false;
9158 t1 = load_reg(s, a->rn);
9159 t2 = load_reg(s, a->rm);
9160 gen_helper_usad8(t1, t1, t2);
9161 tcg_temp_free_i32(t2);
9162 if (a->ra != 15) {
9163 t2 = load_reg(s, a->ra);
9164 tcg_gen_add_i32(t1, t1, t2);
9165 tcg_temp_free_i32(t2);
9167 store_reg(s, a->rd, t1);
9168 return true;
9171 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
9173 TCGv_i32 tmp;
9174 int width = a->widthm1 + 1;
9175 int shift = a->lsb;
9177 if (!ENABLE_ARCH_6T2) {
9178 return false;
9180 if (shift + width > 32) {
9181 /* UNPREDICTABLE; we choose to UNDEF */
9182 unallocated_encoding(s);
9183 return true;
9186 tmp = load_reg(s, a->rn);
9187 if (u) {
9188 tcg_gen_extract_i32(tmp, tmp, shift, width);
9189 } else {
9190 tcg_gen_sextract_i32(tmp, tmp, shift, width);
9192 store_reg(s, a->rd, tmp);
9193 return true;
9196 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
9198 return op_bfx(s, a, false);
9201 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
9203 return op_bfx(s, a, true);
9206 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
9208 TCGv_i32 tmp;
9209 int msb = a->msb, lsb = a->lsb;
9210 int width;
9212 if (!ENABLE_ARCH_6T2) {
9213 return false;
9215 if (msb < lsb) {
9216 /* UNPREDICTABLE; we choose to UNDEF */
9217 unallocated_encoding(s);
9218 return true;
9221 width = msb + 1 - lsb;
9222 if (a->rn == 15) {
9223 /* BFC */
9224 tmp = tcg_const_i32(0);
9225 } else {
9226 /* BFI */
9227 tmp = load_reg(s, a->rn);
9229 if (width != 32) {
9230 TCGv_i32 tmp2 = load_reg(s, a->rd);
9231 tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
9232 tcg_temp_free_i32(tmp2);
9234 store_reg(s, a->rd, tmp);
9235 return true;
9238 static bool trans_UDF(DisasContext *s, arg_UDF *a)
9240 unallocated_encoding(s);
9241 return true;
9245 * Parallel addition and subtraction
9248 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
9249 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
9251 TCGv_i32 t0, t1;
9253 if (s->thumb
9254 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9255 : !ENABLE_ARCH_6) {
9256 return false;
9259 t0 = load_reg(s, a->rn);
9260 t1 = load_reg(s, a->rm);
9262 gen(t0, t0, t1);
9264 tcg_temp_free_i32(t1);
9265 store_reg(s, a->rd, t0);
9266 return true;
9269 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
9270 void (*gen)(TCGv_i32, TCGv_i32,
9271 TCGv_i32, TCGv_ptr))
9273 TCGv_i32 t0, t1;
9274 TCGv_ptr ge;
9276 if (s->thumb
9277 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9278 : !ENABLE_ARCH_6) {
9279 return false;
9282 t0 = load_reg(s, a->rn);
9283 t1 = load_reg(s, a->rm);
9285 ge = tcg_temp_new_ptr();
9286 tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
9287 gen(t0, t0, t1, ge);
9289 tcg_temp_free_ptr(ge);
9290 tcg_temp_free_i32(t1);
9291 store_reg(s, a->rd, t0);
9292 return true;
9295 #define DO_PAR_ADDSUB(NAME, helper) \
9296 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
9298 return op_par_addsub(s, a, helper); \
9301 #define DO_PAR_ADDSUB_GE(NAME, helper) \
9302 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
9304 return op_par_addsub_ge(s, a, helper); \
9307 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
9308 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
9309 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
9310 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
9311 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
9312 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
9314 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
9315 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
9316 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
9317 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
9318 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
9319 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
9321 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
9322 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
9323 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
9324 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
9325 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
9326 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
9328 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
9329 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
9330 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
9331 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
9332 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
9333 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
9335 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
9336 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
9337 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
9338 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
9339 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
9340 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
9342 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
9343 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
9344 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
9345 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
9346 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
9347 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
9349 #undef DO_PAR_ADDSUB
9350 #undef DO_PAR_ADDSUB_GE
9353 * Packing, unpacking, saturation, and reversal
9356 static bool trans_PKH(DisasContext *s, arg_PKH *a)
9358 TCGv_i32 tn, tm;
9359 int shift = a->imm;
9361 if (s->thumb
9362 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9363 : !ENABLE_ARCH_6) {
9364 return false;
9367 tn = load_reg(s, a->rn);
9368 tm = load_reg(s, a->rm);
9369 if (a->tb) {
9370 /* PKHTB */
9371 if (shift == 0) {
9372 shift = 31;
9374 tcg_gen_sari_i32(tm, tm, shift);
9375 tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
9376 } else {
9377 /* PKHBT */
9378 tcg_gen_shli_i32(tm, tm, shift);
9379 tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
9381 tcg_temp_free_i32(tm);
9382 store_reg(s, a->rd, tn);
9383 return true;
9386 static bool op_sat(DisasContext *s, arg_sat *a,
9387 void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
9389 TCGv_i32 tmp, satimm;
9390 int shift = a->imm;
9392 if (!ENABLE_ARCH_6) {
9393 return false;
9396 tmp = load_reg(s, a->rn);
9397 if (a->sh) {
9398 tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
9399 } else {
9400 tcg_gen_shli_i32(tmp, tmp, shift);
9403 satimm = tcg_const_i32(a->satimm);
9404 gen(tmp, cpu_env, tmp, satimm);
9405 tcg_temp_free_i32(satimm);
9407 store_reg(s, a->rd, tmp);
9408 return true;
9411 static bool trans_SSAT(DisasContext *s, arg_sat *a)
9413 return op_sat(s, a, gen_helper_ssat);
9416 static bool trans_USAT(DisasContext *s, arg_sat *a)
9418 return op_sat(s, a, gen_helper_usat);
9421 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
9423 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9424 return false;
9426 return op_sat(s, a, gen_helper_ssat16);
9429 static bool trans_USAT16(DisasContext *s, arg_sat *a)
9431 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9432 return false;
9434 return op_sat(s, a, gen_helper_usat16);
9437 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
9438 void (*gen_extract)(TCGv_i32, TCGv_i32),
9439 void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
9441 TCGv_i32 tmp;
9443 if (!ENABLE_ARCH_6) {
9444 return false;
9447 tmp = load_reg(s, a->rm);
9449 * TODO: In many cases we could do a shift instead of a rotate.
9450 * Combined with a simple extend, that becomes an extract.
9452 tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
9453 gen_extract(tmp, tmp);
9455 if (a->rn != 15) {
9456 TCGv_i32 tmp2 = load_reg(s, a->rn);
9457 gen_add(tmp, tmp, tmp2);
9458 tcg_temp_free_i32(tmp2);
9460 store_reg(s, a->rd, tmp);
9461 return true;
9464 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
9466 return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
9469 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
9471 return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
9474 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
9476 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9477 return false;
9479 return op_xta(s, a, gen_helper_sxtb16, gen_add16);
9482 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
9484 return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
9487 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
9489 return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
9492 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
9494 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9495 return false;
9497 return op_xta(s, a, gen_helper_uxtb16, gen_add16);
9500 static bool trans_SEL(DisasContext *s, arg_rrr *a)
9502 TCGv_i32 t1, t2, t3;
9504 if (s->thumb
9505 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9506 : !ENABLE_ARCH_6) {
9507 return false;
9510 t1 = load_reg(s, a->rn);
9511 t2 = load_reg(s, a->rm);
9512 t3 = tcg_temp_new_i32();
9513 tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
9514 gen_helper_sel_flags(t1, t3, t1, t2);
9515 tcg_temp_free_i32(t3);
9516 tcg_temp_free_i32(t2);
9517 store_reg(s, a->rd, t1);
9518 return true;
9521 static bool op_rr(DisasContext *s, arg_rr *a,
9522 void (*gen)(TCGv_i32, TCGv_i32))
9524 TCGv_i32 tmp;
9526 tmp = load_reg(s, a->rm);
9527 gen(tmp, tmp);
9528 store_reg(s, a->rd, tmp);
9529 return true;
9532 static bool trans_REV(DisasContext *s, arg_rr *a)
9534 if (!ENABLE_ARCH_6) {
9535 return false;
9537 return op_rr(s, a, tcg_gen_bswap32_i32);
9540 static bool trans_REV16(DisasContext *s, arg_rr *a)
9542 if (!ENABLE_ARCH_6) {
9543 return false;
9545 return op_rr(s, a, gen_rev16);
9548 static bool trans_REVSH(DisasContext *s, arg_rr *a)
9550 if (!ENABLE_ARCH_6) {
9551 return false;
9553 return op_rr(s, a, gen_revsh);
9556 static bool trans_RBIT(DisasContext *s, arg_rr *a)
9558 if (!ENABLE_ARCH_6T2) {
9559 return false;
9561 return op_rr(s, a, gen_helper_rbit);
9565 * Signed multiply, signed and unsigned divide
9568 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
9570 TCGv_i32 t1, t2;
9572 if (!ENABLE_ARCH_6) {
9573 return false;
9576 t1 = load_reg(s, a->rn);
9577 t2 = load_reg(s, a->rm);
9578 if (m_swap) {
9579 gen_swap_half(t2);
9581 gen_smul_dual(t1, t2);
9583 if (sub) {
9584 /* This subtraction cannot overflow. */
9585 tcg_gen_sub_i32(t1, t1, t2);
9586 } else {
9588 * This addition cannot overflow 32 bits; however it may
9589 * overflow considered as a signed operation, in which case
9590 * we must set the Q flag.
9592 gen_helper_add_setq(t1, cpu_env, t1, t2);
9594 tcg_temp_free_i32(t2);
9596 if (a->ra != 15) {
9597 t2 = load_reg(s, a->ra);
9598 gen_helper_add_setq(t1, cpu_env, t1, t2);
9599 tcg_temp_free_i32(t2);
9601 store_reg(s, a->rd, t1);
9602 return true;
9605 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
9607 return op_smlad(s, a, false, false);
9610 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
9612 return op_smlad(s, a, true, false);
9615 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
9617 return op_smlad(s, a, false, true);
9620 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
9622 return op_smlad(s, a, true, true);
9625 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
9627 TCGv_i32 t1, t2;
9628 TCGv_i64 l1, l2;
9630 if (!ENABLE_ARCH_6) {
9631 return false;
9634 t1 = load_reg(s, a->rn);
9635 t2 = load_reg(s, a->rm);
9636 if (m_swap) {
9637 gen_swap_half(t2);
9639 gen_smul_dual(t1, t2);
9641 l1 = tcg_temp_new_i64();
9642 l2 = tcg_temp_new_i64();
9643 tcg_gen_ext_i32_i64(l1, t1);
9644 tcg_gen_ext_i32_i64(l2, t2);
9645 tcg_temp_free_i32(t1);
9646 tcg_temp_free_i32(t2);
9648 if (sub) {
9649 tcg_gen_sub_i64(l1, l1, l2);
9650 } else {
9651 tcg_gen_add_i64(l1, l1, l2);
9653 tcg_temp_free_i64(l2);
9655 gen_addq(s, l1, a->ra, a->rd);
9656 gen_storeq_reg(s, a->ra, a->rd, l1);
9657 tcg_temp_free_i64(l1);
9658 return true;
9661 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
9663 return op_smlald(s, a, false, false);
9666 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
9668 return op_smlald(s, a, true, false);
9671 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
9673 return op_smlald(s, a, false, true);
9676 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
9678 return op_smlald(s, a, true, true);
9681 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
9683 TCGv_i32 t1, t2;
9685 if (s->thumb
9686 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9687 : !ENABLE_ARCH_6) {
9688 return false;
9691 t1 = load_reg(s, a->rn);
9692 t2 = load_reg(s, a->rm);
9693 tcg_gen_muls2_i32(t2, t1, t1, t2);
9695 if (a->ra != 15) {
9696 TCGv_i32 t3 = load_reg(s, a->ra);
9697 if (sub) {
9699 * For SMMLS, we need a 64-bit subtract. Borrow caused by
9700 * a non-zero multiplicand lowpart, and the correct result
9701 * lowpart for rounding.
9703 TCGv_i32 zero = tcg_const_i32(0);
9704 tcg_gen_sub2_i32(t2, t1, zero, t3, t2, t1);
9705 tcg_temp_free_i32(zero);
9706 } else {
9707 tcg_gen_add_i32(t1, t1, t3);
9709 tcg_temp_free_i32(t3);
9711 if (round) {
9713 * Adding 0x80000000 to the 64-bit quantity means that we have
9714 * carry in to the high word when the low word has the msb set.
9716 tcg_gen_shri_i32(t2, t2, 31);
9717 tcg_gen_add_i32(t1, t1, t2);
9719 tcg_temp_free_i32(t2);
9720 store_reg(s, a->rd, t1);
9721 return true;
9724 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
9726 return op_smmla(s, a, false, false);
9729 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
9731 return op_smmla(s, a, true, false);
9734 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
9736 return op_smmla(s, a, false, true);
9739 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
9741 return op_smmla(s, a, true, true);
9744 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
9746 TCGv_i32 t1, t2;
9748 if (s->thumb
9749 ? !dc_isar_feature(aa32_thumb_div, s)
9750 : !dc_isar_feature(aa32_arm_div, s)) {
9751 return false;
9754 t1 = load_reg(s, a->rn);
9755 t2 = load_reg(s, a->rm);
9756 if (u) {
9757 gen_helper_udiv(t1, t1, t2);
9758 } else {
9759 gen_helper_sdiv(t1, t1, t2);
9761 tcg_temp_free_i32(t2);
9762 store_reg(s, a->rd, t1);
9763 return true;
9766 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
9768 return op_div(s, a, false);
9771 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
9773 return op_div(s, a, true);
9777 * Block data transfer
9780 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
9782 TCGv_i32 addr = load_reg(s, a->rn);
9784 if (a->b) {
9785 if (a->i) {
9786 /* pre increment */
9787 tcg_gen_addi_i32(addr, addr, 4);
9788 } else {
9789 /* pre decrement */
9790 tcg_gen_addi_i32(addr, addr, -(n * 4));
9792 } else if (!a->i && n != 1) {
9793 /* post decrement */
9794 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
9797 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
9799 * If the writeback is incrementing SP rather than
9800 * decrementing it, and the initial SP is below the
9801 * stack limit but the final written-back SP would
9802 * be above, then then we must not perform any memory
9803 * accesses, but it is IMPDEF whether we generate
9804 * an exception. We choose to do so in this case.
9805 * At this point 'addr' is the lowest address, so
9806 * either the original SP (if incrementing) or our
9807 * final SP (if decrementing), so that's what we check.
9809 gen_helper_v8m_stackcheck(cpu_env, addr);
9812 return addr;
9815 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
9816 TCGv_i32 addr, int n)
9818 if (a->w) {
9819 /* write back */
9820 if (!a->b) {
9821 if (a->i) {
9822 /* post increment */
9823 tcg_gen_addi_i32(addr, addr, 4);
9824 } else {
9825 /* post decrement */
9826 tcg_gen_addi_i32(addr, addr, -(n * 4));
9828 } else if (!a->i && n != 1) {
9829 /* pre decrement */
9830 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
9832 store_reg(s, a->rn, addr);
9833 } else {
9834 tcg_temp_free_i32(addr);
9838 static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
9840 int i, j, n, list, mem_idx;
9841 bool user = a->u;
9842 TCGv_i32 addr, tmp, tmp2;
9844 if (user) {
9845 /* STM (user) */
9846 if (IS_USER(s)) {
9847 /* Only usable in supervisor mode. */
9848 unallocated_encoding(s);
9849 return true;
9853 list = a->list;
9854 n = ctpop16(list);
9855 if (n < min_n || a->rn == 15) {
9856 unallocated_encoding(s);
9857 return true;
9860 addr = op_addr_block_pre(s, a, n);
9861 mem_idx = get_mem_index(s);
9863 for (i = j = 0; i < 16; i++) {
9864 if (!(list & (1 << i))) {
9865 continue;
9868 if (user && i != 15) {
9869 tmp = tcg_temp_new_i32();
9870 tmp2 = tcg_const_i32(i);
9871 gen_helper_get_user_reg(tmp, cpu_env, tmp2);
9872 tcg_temp_free_i32(tmp2);
9873 } else {
9874 tmp = load_reg(s, i);
9876 gen_aa32_st32(s, tmp, addr, mem_idx);
9877 tcg_temp_free_i32(tmp);
9879 /* No need to add after the last transfer. */
9880 if (++j != n) {
9881 tcg_gen_addi_i32(addr, addr, 4);
9885 op_addr_block_post(s, a, addr, n);
9886 return true;
9889 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
9891 /* BitCount(list) < 1 is UNPREDICTABLE */
9892 return op_stm(s, a, 1);
9895 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
9897 /* Writeback register in register list is UNPREDICTABLE for T32. */
9898 if (a->w && (a->list & (1 << a->rn))) {
9899 unallocated_encoding(s);
9900 return true;
9902 /* BitCount(list) < 2 is UNPREDICTABLE */
9903 return op_stm(s, a, 2);
9906 static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
9908 int i, j, n, list, mem_idx;
9909 bool loaded_base;
9910 bool user = a->u;
9911 bool exc_return = false;
9912 TCGv_i32 addr, tmp, tmp2, loaded_var;
9914 if (user) {
9915 /* LDM (user), LDM (exception return) */
9916 if (IS_USER(s)) {
9917 /* Only usable in supervisor mode. */
9918 unallocated_encoding(s);
9919 return true;
9921 if (extract32(a->list, 15, 1)) {
9922 exc_return = true;
9923 user = false;
9924 } else {
9925 /* LDM (user) does not allow writeback. */
9926 if (a->w) {
9927 unallocated_encoding(s);
9928 return true;
9933 list = a->list;
9934 n = ctpop16(list);
9935 if (n < min_n || a->rn == 15) {
9936 unallocated_encoding(s);
9937 return true;
9940 addr = op_addr_block_pre(s, a, n);
9941 mem_idx = get_mem_index(s);
9942 loaded_base = false;
9943 loaded_var = NULL;
9945 for (i = j = 0; i < 16; i++) {
9946 if (!(list & (1 << i))) {
9947 continue;
9950 tmp = tcg_temp_new_i32();
9951 gen_aa32_ld32u(s, tmp, addr, mem_idx);
9952 if (user) {
9953 tmp2 = tcg_const_i32(i);
9954 gen_helper_set_user_reg(cpu_env, tmp2, tmp);
9955 tcg_temp_free_i32(tmp2);
9956 tcg_temp_free_i32(tmp);
9957 } else if (i == a->rn) {
9958 loaded_var = tmp;
9959 loaded_base = true;
9960 } else if (i == 15 && exc_return) {
9961 store_pc_exc_ret(s, tmp);
9962 } else {
9963 store_reg_from_load(s, i, tmp);
9966 /* No need to add after the last transfer. */
9967 if (++j != n) {
9968 tcg_gen_addi_i32(addr, addr, 4);
9972 op_addr_block_post(s, a, addr, n);
9974 if (loaded_base) {
9975 /* Note that we reject base == pc above. */
9976 store_reg(s, a->rn, loaded_var);
9979 if (exc_return) {
9980 /* Restore CPSR from SPSR. */
9981 tmp = load_cpu_field(spsr);
9982 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
9983 gen_io_start();
9985 gen_helper_cpsr_write_eret(cpu_env, tmp);
9986 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
9987 gen_io_end();
9989 tcg_temp_free_i32(tmp);
9990 /* Must exit loop to check un-masked IRQs */
9991 s->base.is_jmp = DISAS_EXIT;
9993 return true;
9996 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
9999 * Writeback register in register list is UNPREDICTABLE
10000 * for ArchVersion() >= 7. Prior to v7, A32 would write
10001 * an UNKNOWN value to the base register.
10003 if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
10004 unallocated_encoding(s);
10005 return true;
10007 /* BitCount(list) < 1 is UNPREDICTABLE */
10008 return do_ldm(s, a, 1);
10011 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
10013 /* Writeback register in register list is UNPREDICTABLE for T32. */
10014 if (a->w && (a->list & (1 << a->rn))) {
10015 unallocated_encoding(s);
10016 return true;
10018 /* BitCount(list) < 2 is UNPREDICTABLE */
10019 return do_ldm(s, a, 2);
10022 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
10024 /* Writeback is conditional on the base register not being loaded. */
10025 a->w = !(a->list & (1 << a->rn));
10026 /* BitCount(list) < 1 is UNPREDICTABLE */
10027 return do_ldm(s, a, 1);
10031 * Branch, branch with link
10034 static bool trans_B(DisasContext *s, arg_i *a)
10036 gen_jmp(s, read_pc(s) + a->imm);
10037 return true;
10040 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
10042 /* This has cond from encoding, required to be outside IT block. */
10043 if (a->cond >= 0xe) {
10044 return false;
10046 if (s->condexec_mask) {
10047 unallocated_encoding(s);
10048 return true;
10050 arm_skip_unless(s, a->cond);
10051 gen_jmp(s, read_pc(s) + a->imm);
10052 return true;
10055 static bool trans_BL(DisasContext *s, arg_i *a)
10057 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
10058 gen_jmp(s, read_pc(s) + a->imm);
10059 return true;
10062 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
10064 TCGv_i32 tmp;
10066 /* For A32, ARCH(5) is checked near the start of the uncond block. */
10067 if (s->thumb && (a->imm & 2)) {
10068 return false;
10070 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
10071 tmp = tcg_const_i32(!s->thumb);
10072 store_cpu_field(tmp, thumb);
10073 gen_jmp(s, (read_pc(s) & ~3) + a->imm);
10074 return true;
10077 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
10079 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10080 tcg_gen_movi_i32(cpu_R[14], read_pc(s) + (a->imm << 12));
10081 return true;
10084 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
10086 TCGv_i32 tmp = tcg_temp_new_i32();
10088 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10089 tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
10090 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
10091 gen_bx(s, tmp);
10092 return true;
10095 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
10097 TCGv_i32 tmp;
10099 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10100 if (!ENABLE_ARCH_5) {
10101 return false;
10103 tmp = tcg_temp_new_i32();
10104 tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
10105 tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
10106 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
10107 gen_bx(s, tmp);
10108 return true;
10111 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
10113 TCGv_i32 addr, tmp;
10115 tmp = load_reg(s, a->rm);
10116 if (half) {
10117 tcg_gen_add_i32(tmp, tmp, tmp);
10119 addr = load_reg(s, a->rn);
10120 tcg_gen_add_i32(addr, addr, tmp);
10122 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
10123 half ? MO_UW | s->be_data : MO_UB);
10124 tcg_temp_free_i32(addr);
10126 tcg_gen_add_i32(tmp, tmp, tmp);
10127 tcg_gen_addi_i32(tmp, tmp, read_pc(s));
10128 store_reg(s, 15, tmp);
10129 return true;
10132 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
10134 return op_tbranch(s, a, false);
10137 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
10139 return op_tbranch(s, a, true);
10142 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
10144 TCGv_i32 tmp = load_reg(s, a->rn);
10146 arm_gen_condlabel(s);
10147 tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
10148 tmp, 0, s->condlabel);
10149 tcg_temp_free_i32(tmp);
10150 gen_jmp(s, read_pc(s) + a->imm);
10151 return true;
10155 * Supervisor call - both T32 & A32 come here so we need to check
10156 * which mode we are in when checking for semihosting.
10159 static bool trans_SVC(DisasContext *s, arg_SVC *a)
10161 const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
10163 if (!arm_dc_feature(s, ARM_FEATURE_M) && semihosting_enabled() &&
10164 #ifndef CONFIG_USER_ONLY
10165 !IS_USER(s) &&
10166 #endif
10167 (a->imm == semihost_imm)) {
10168 gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
10169 } else {
10170 gen_set_pc_im(s, s->base.pc_next);
10171 s->svc_imm = a->imm;
10172 s->base.is_jmp = DISAS_SWI;
10174 return true;
10178 * Unconditional system instructions
10181 static bool trans_RFE(DisasContext *s, arg_RFE *a)
10183 static const int8_t pre_offset[4] = {
10184 /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
10186 static const int8_t post_offset[4] = {
10187 /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
10189 TCGv_i32 addr, t1, t2;
10191 if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10192 return false;
10194 if (IS_USER(s)) {
10195 unallocated_encoding(s);
10196 return true;
10199 addr = load_reg(s, a->rn);
10200 tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
10202 /* Load PC into tmp and CPSR into tmp2. */
10203 t1 = tcg_temp_new_i32();
10204 gen_aa32_ld32u(s, t1, addr, get_mem_index(s));
10205 tcg_gen_addi_i32(addr, addr, 4);
10206 t2 = tcg_temp_new_i32();
10207 gen_aa32_ld32u(s, t2, addr, get_mem_index(s));
10209 if (a->w) {
10210 /* Base writeback. */
10211 tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
10212 store_reg(s, a->rn, addr);
10213 } else {
10214 tcg_temp_free_i32(addr);
10216 gen_rfe(s, t1, t2);
10217 return true;
10220 static bool trans_SRS(DisasContext *s, arg_SRS *a)
10222 if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10223 return false;
10225 gen_srs(s, a->mode, a->pu, a->w);
10226 return true;
10229 static bool trans_CPS(DisasContext *s, arg_CPS *a)
10231 uint32_t mask, val;
10233 if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10234 return false;
10236 if (IS_USER(s)) {
10237 /* Implemented as NOP in user mode. */
10238 return true;
10240 /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
10242 mask = val = 0;
10243 if (a->imod & 2) {
10244 if (a->A) {
10245 mask |= CPSR_A;
10247 if (a->I) {
10248 mask |= CPSR_I;
10250 if (a->F) {
10251 mask |= CPSR_F;
10253 if (a->imod & 1) {
10254 val |= mask;
10257 if (a->M) {
10258 mask |= CPSR_M;
10259 val |= a->mode;
10261 if (mask) {
10262 gen_set_psr_im(s, mask, 0, val);
10264 return true;
10267 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
10269 TCGv_i32 tmp, addr, el;
10271 if (!arm_dc_feature(s, ARM_FEATURE_M)) {
10272 return false;
10274 if (IS_USER(s)) {
10275 /* Implemented as NOP in user mode. */
10276 return true;
10279 tmp = tcg_const_i32(a->im);
10280 /* FAULTMASK */
10281 if (a->F) {
10282 addr = tcg_const_i32(19);
10283 gen_helper_v7m_msr(cpu_env, addr, tmp);
10284 tcg_temp_free_i32(addr);
10286 /* PRIMASK */
10287 if (a->I) {
10288 addr = tcg_const_i32(16);
10289 gen_helper_v7m_msr(cpu_env, addr, tmp);
10290 tcg_temp_free_i32(addr);
10292 el = tcg_const_i32(s->current_el);
10293 gen_helper_rebuild_hflags_m32(cpu_env, el);
10294 tcg_temp_free_i32(el);
10295 tcg_temp_free_i32(tmp);
10296 gen_lookup_tb(s);
10297 return true;
10301 * Clear-Exclusive, Barriers
10304 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
10306 if (s->thumb
10307 ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
10308 : !ENABLE_ARCH_6K) {
10309 return false;
10311 gen_clrex(s);
10312 return true;
10315 static bool trans_DSB(DisasContext *s, arg_DSB *a)
10317 if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
10318 return false;
10320 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
10321 return true;
10324 static bool trans_DMB(DisasContext *s, arg_DMB *a)
10326 return trans_DSB(s, NULL);
10329 static bool trans_ISB(DisasContext *s, arg_ISB *a)
10331 if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
10332 return false;
10335 * We need to break the TB after this insn to execute
10336 * self-modifying code correctly and also to take
10337 * any pending interrupts immediately.
10339 gen_goto_tb(s, 0, s->base.pc_next);
10340 return true;
10343 static bool trans_SB(DisasContext *s, arg_SB *a)
10345 if (!dc_isar_feature(aa32_sb, s)) {
10346 return false;
10349 * TODO: There is no speculation barrier opcode
10350 * for TCG; MB and end the TB instead.
10352 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
10353 gen_goto_tb(s, 0, s->base.pc_next);
10354 return true;
10357 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
10359 if (!ENABLE_ARCH_6) {
10360 return false;
10362 if (a->E != (s->be_data == MO_BE)) {
10363 gen_helper_setend(cpu_env);
10364 s->base.is_jmp = DISAS_UPDATE;
10366 return true;
10370 * Preload instructions
10371 * All are nops, contingent on the appropriate arch level.
10374 static bool trans_PLD(DisasContext *s, arg_PLD *a)
10376 return ENABLE_ARCH_5TE;
10379 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
10381 return arm_dc_feature(s, ARM_FEATURE_V7MP);
10384 static bool trans_PLI(DisasContext *s, arg_PLD *a)
10386 return ENABLE_ARCH_7;
10390 * If-then
10393 static bool trans_IT(DisasContext *s, arg_IT *a)
10395 int cond_mask = a->cond_mask;
10398 * No actual code generated for this insn, just setup state.
10400 * Combinations of firstcond and mask which set up an 0b1111
10401 * condition are UNPREDICTABLE; we take the CONSTRAINED
10402 * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
10403 * i.e. both meaning "execute always".
10405 s->condexec_cond = (cond_mask >> 4) & 0xe;
10406 s->condexec_mask = cond_mask & 0x1f;
10407 return true;
10411 * Legacy decoder.
10414 static void disas_arm_insn(DisasContext *s, unsigned int insn)
10416 unsigned int cond = insn >> 28;
10418 /* M variants do not implement ARM mode; this must raise the INVSTATE
10419 * UsageFault exception.
10421 if (arm_dc_feature(s, ARM_FEATURE_M)) {
10422 gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
10423 default_exception_el(s));
10424 return;
10427 if (cond == 0xf) {
10428 /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
10429 * choose to UNDEF. In ARMv5 and above the space is used
10430 * for miscellaneous unconditional instructions.
10432 ARCH(5);
10434 /* Unconditional instructions. */
10435 /* TODO: Perhaps merge these into one decodetree output file. */
10436 if (disas_a32_uncond(s, insn) ||
10437 disas_vfp_uncond(s, insn) ||
10438 disas_neon_dp(s, insn) ||
10439 disas_neon_ls(s, insn) ||
10440 disas_neon_shared(s, insn)) {
10441 return;
10443 /* fall back to legacy decoder */
10445 if (((insn >> 25) & 7) == 1) {
10446 /* NEON Data processing. */
10447 if (disas_neon_data_insn(s, insn)) {
10448 goto illegal_op;
10450 return;
10452 if ((insn & 0x0e000f00) == 0x0c000100) {
10453 if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
10454 /* iWMMXt register transfer. */
10455 if (extract32(s->c15_cpar, 1, 1)) {
10456 if (!disas_iwmmxt_insn(s, insn)) {
10457 return;
10462 goto illegal_op;
10464 if (cond != 0xe) {
10465 /* if not always execute, we generate a conditional jump to
10466 next instruction */
10467 arm_skip_unless(s, cond);
10470 /* TODO: Perhaps merge these into one decodetree output file. */
10471 if (disas_a32(s, insn) ||
10472 disas_vfp(s, insn)) {
10473 return;
10475 /* fall back to legacy decoder */
10477 switch ((insn >> 24) & 0xf) {
10478 case 0xc:
10479 case 0xd:
10480 case 0xe:
10481 if (((insn >> 8) & 0xe) == 10) {
10482 /* VFP, but failed disas_vfp. */
10483 goto illegal_op;
10485 if (disas_coproc_insn(s, insn)) {
10486 /* Coprocessor. */
10487 goto illegal_op;
10489 break;
10490 default:
10491 illegal_op:
10492 unallocated_encoding(s);
10493 break;
10497 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
10500 * Return true if this is a 16 bit instruction. We must be precise
10501 * about this (matching the decode).
10503 if ((insn >> 11) < 0x1d) {
10504 /* Definitely a 16-bit instruction */
10505 return true;
10508 /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
10509 * first half of a 32-bit Thumb insn. Thumb-1 cores might
10510 * end up actually treating this as two 16-bit insns, though,
10511 * if it's half of a bl/blx pair that might span a page boundary.
10513 if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
10514 arm_dc_feature(s, ARM_FEATURE_M)) {
10515 /* Thumb2 cores (including all M profile ones) always treat
10516 * 32-bit insns as 32-bit.
10518 return false;
10521 if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
10522 /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
10523 * is not on the next page; we merge this into a 32-bit
10524 * insn.
10526 return false;
10528 /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
10529 * 0b1111_1xxx_xxxx_xxxx : BL suffix;
10530 * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
10531 * -- handle as single 16 bit insn
10533 return true;
10536 /* Translate a 32-bit thumb instruction. */
10537 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
10540 * ARMv6-M supports a limited subset of Thumb2 instructions.
10541 * Other Thumb1 architectures allow only 32-bit
10542 * combined BL/BLX prefix and suffix.
10544 if (arm_dc_feature(s, ARM_FEATURE_M) &&
10545 !arm_dc_feature(s, ARM_FEATURE_V7)) {
10546 int i;
10547 bool found = false;
10548 static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
10549 0xf3b08040 /* dsb */,
10550 0xf3b08050 /* dmb */,
10551 0xf3b08060 /* isb */,
10552 0xf3e08000 /* mrs */,
10553 0xf000d000 /* bl */};
10554 static const uint32_t armv6m_mask[] = {0xffe0d000,
10555 0xfff0d0f0,
10556 0xfff0d0f0,
10557 0xfff0d0f0,
10558 0xffe0d000,
10559 0xf800d000};
10561 for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
10562 if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
10563 found = true;
10564 break;
10567 if (!found) {
10568 goto illegal_op;
10570 } else if ((insn & 0xf800e800) != 0xf000e800) {
10571 ARCH(6T2);
10574 if ((insn & 0xef000000) == 0xef000000) {
10576 * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
10577 * transform into
10578 * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
10580 uint32_t a32_insn = (insn & 0xe2ffffff) |
10581 ((insn & (1 << 28)) >> 4) | (1 << 28);
10583 if (disas_neon_dp(s, a32_insn)) {
10584 return;
10588 if ((insn & 0xff100000) == 0xf9000000) {
10590 * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
10591 * transform into
10592 * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
10594 uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
10596 if (disas_neon_ls(s, a32_insn)) {
10597 return;
10602 * TODO: Perhaps merge these into one decodetree output file.
10603 * Note disas_vfp is written for a32 with cond field in the
10604 * top nibble. The t32 encoding requires 0xe in the top nibble.
10606 if (disas_t32(s, insn) ||
10607 disas_vfp_uncond(s, insn) ||
10608 disas_neon_shared(s, insn) ||
10609 ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
10610 return;
10612 /* fall back to legacy decoder */
10614 switch ((insn >> 25) & 0xf) {
10615 case 0: case 1: case 2: case 3:
10616 /* 16-bit instructions. Should never happen. */
10617 abort();
10618 case 6: case 7: case 14: case 15:
10619 /* Coprocessor. */
10620 if (arm_dc_feature(s, ARM_FEATURE_M)) {
10621 /* 0b111x_11xx_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx */
10622 if (extract32(insn, 24, 2) == 3) {
10623 goto illegal_op; /* op0 = 0b11 : unallocated */
10626 if (((insn >> 8) & 0xe) == 10 &&
10627 dc_isar_feature(aa32_fpsp_v2, s)) {
10628 /* FP, and the CPU supports it */
10629 goto illegal_op;
10630 } else {
10631 /* All other insns: NOCP */
10632 gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
10633 syn_uncategorized(),
10634 default_exception_el(s));
10636 break;
10638 if (((insn >> 24) & 3) == 3) {
10639 /* Translate into the equivalent ARM encoding. */
10640 insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
10641 if (disas_neon_data_insn(s, insn)) {
10642 goto illegal_op;
10644 } else if (((insn >> 8) & 0xe) == 10) {
10645 /* VFP, but failed disas_vfp. */
10646 goto illegal_op;
10647 } else {
10648 if (insn & (1 << 28))
10649 goto illegal_op;
10650 if (disas_coproc_insn(s, insn)) {
10651 goto illegal_op;
10654 break;
10655 case 12:
10656 goto illegal_op;
10657 default:
10658 illegal_op:
10659 unallocated_encoding(s);
10663 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
10665 if (!disas_t16(s, insn)) {
10666 unallocated_encoding(s);
10670 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
10672 /* Return true if the insn at dc->base.pc_next might cross a page boundary.
10673 * (False positives are OK, false negatives are not.)
10674 * We know this is a Thumb insn, and our caller ensures we are
10675 * only called if dc->base.pc_next is less than 4 bytes from the page
10676 * boundary, so we cross the page if the first 16 bits indicate
10677 * that this is a 32 bit insn.
10679 uint16_t insn = arm_lduw_code(env, s->base.pc_next, s->sctlr_b);
10681 return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
10684 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
10686 DisasContext *dc = container_of(dcbase, DisasContext, base);
10687 CPUARMState *env = cs->env_ptr;
10688 ARMCPU *cpu = env_archcpu(env);
10689 uint32_t tb_flags = dc->base.tb->flags;
10690 uint32_t condexec, core_mmu_idx;
10692 dc->isar = &cpu->isar;
10693 dc->condjmp = 0;
10695 dc->aarch64 = 0;
10696 /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
10697 * there is no secure EL1, so we route exceptions to EL3.
10699 dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
10700 !arm_el_is_aa64(env, 3);
10701 dc->thumb = FIELD_EX32(tb_flags, TBFLAG_AM32, THUMB);
10702 dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
10703 condexec = FIELD_EX32(tb_flags, TBFLAG_AM32, CONDEXEC);
10704 dc->condexec_mask = (condexec & 0xf) << 1;
10705 dc->condexec_cond = condexec >> 4;
10707 core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
10708 dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
10709 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
10710 #if !defined(CONFIG_USER_ONLY)
10711 dc->user = (dc->current_el == 0);
10712 #endif
10713 dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
10715 if (arm_feature(env, ARM_FEATURE_M)) {
10716 dc->vfp_enabled = 1;
10717 dc->be_data = MO_TE;
10718 dc->v7m_handler_mode = FIELD_EX32(tb_flags, TBFLAG_M32, HANDLER);
10719 dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
10720 regime_is_secure(env, dc->mmu_idx);
10721 dc->v8m_stackcheck = FIELD_EX32(tb_flags, TBFLAG_M32, STACKCHECK);
10722 dc->v8m_fpccr_s_wrong =
10723 FIELD_EX32(tb_flags, TBFLAG_M32, FPCCR_S_WRONG);
10724 dc->v7m_new_fp_ctxt_needed =
10725 FIELD_EX32(tb_flags, TBFLAG_M32, NEW_FP_CTXT_NEEDED);
10726 dc->v7m_lspact = FIELD_EX32(tb_flags, TBFLAG_M32, LSPACT);
10727 } else {
10728 dc->be_data =
10729 FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
10730 dc->debug_target_el =
10731 FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL);
10732 dc->sctlr_b = FIELD_EX32(tb_flags, TBFLAG_A32, SCTLR_B);
10733 dc->hstr_active = FIELD_EX32(tb_flags, TBFLAG_A32, HSTR_ACTIVE);
10734 dc->ns = FIELD_EX32(tb_flags, TBFLAG_A32, NS);
10735 dc->vfp_enabled = FIELD_EX32(tb_flags, TBFLAG_A32, VFPEN);
10736 if (arm_feature(env, ARM_FEATURE_XSCALE)) {
10737 dc->c15_cpar = FIELD_EX32(tb_flags, TBFLAG_A32, XSCALE_CPAR);
10738 } else {
10739 dc->vec_len = FIELD_EX32(tb_flags, TBFLAG_A32, VECLEN);
10740 dc->vec_stride = FIELD_EX32(tb_flags, TBFLAG_A32, VECSTRIDE);
10743 dc->cp_regs = cpu->cp_regs;
10744 dc->features = env->features;
10746 /* Single step state. The code-generation logic here is:
10747 * SS_ACTIVE == 0:
10748 * generate code with no special handling for single-stepping (except
10749 * that anything that can make us go to SS_ACTIVE == 1 must end the TB;
10750 * this happens anyway because those changes are all system register or
10751 * PSTATE writes).
10752 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
10753 * emit code for one insn
10754 * emit code to clear PSTATE.SS
10755 * emit code to generate software step exception for completed step
10756 * end TB (as usual for having generated an exception)
10757 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
10758 * emit code to generate a software step exception
10759 * end the TB
10761 dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
10762 dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
10763 dc->is_ldex = false;
10765 dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
10767 /* If architectural single step active, limit to 1. */
10768 if (is_singlestepping(dc)) {
10769 dc->base.max_insns = 1;
10772 /* ARM is a fixed-length ISA. Bound the number of insns to execute
10773 to those left on the page. */
10774 if (!dc->thumb) {
10775 int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
10776 dc->base.max_insns = MIN(dc->base.max_insns, bound);
10779 cpu_V0 = tcg_temp_new_i64();
10780 cpu_V1 = tcg_temp_new_i64();
10781 /* FIXME: cpu_M0 can probably be the same as cpu_V0. */
10782 cpu_M0 = tcg_temp_new_i64();
10785 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
10787 DisasContext *dc = container_of(dcbase, DisasContext, base);
10789 /* A note on handling of the condexec (IT) bits:
10791 * We want to avoid the overhead of having to write the updated condexec
10792 * bits back to the CPUARMState for every instruction in an IT block. So:
10793 * (1) if the condexec bits are not already zero then we write
10794 * zero back into the CPUARMState now. This avoids complications trying
10795 * to do it at the end of the block. (For example if we don't do this
10796 * it's hard to identify whether we can safely skip writing condexec
10797 * at the end of the TB, which we definitely want to do for the case
10798 * where a TB doesn't do anything with the IT state at all.)
10799 * (2) if we are going to leave the TB then we call gen_set_condexec()
10800 * which will write the correct value into CPUARMState if zero is wrong.
10801 * This is done both for leaving the TB at the end, and for leaving
10802 * it because of an exception we know will happen, which is done in
10803 * gen_exception_insn(). The latter is necessary because we need to
10804 * leave the TB with the PC/IT state just prior to execution of the
10805 * instruction which caused the exception.
10806 * (3) if we leave the TB unexpectedly (eg a data abort on a load)
10807 * then the CPUARMState will be wrong and we need to reset it.
10808 * This is handled in the same way as restoration of the
10809 * PC in these situations; we save the value of the condexec bits
10810 * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
10811 * then uses this to restore them after an exception.
10813 * Note that there are no instructions which can read the condexec
10814 * bits, and none which can write non-static values to them, so
10815 * we don't need to care about whether CPUARMState is correct in the
10816 * middle of a TB.
10819 /* Reset the conditional execution bits immediately. This avoids
10820 complications trying to do it at the end of the block. */
10821 if (dc->condexec_mask || dc->condexec_cond) {
10822 TCGv_i32 tmp = tcg_temp_new_i32();
10823 tcg_gen_movi_i32(tmp, 0);
10824 store_cpu_field(tmp, condexec_bits);
10828 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
10830 DisasContext *dc = container_of(dcbase, DisasContext, base);
10832 tcg_gen_insn_start(dc->base.pc_next,
10833 (dc->condexec_cond << 4) | (dc->condexec_mask >> 1),
10835 dc->insn_start = tcg_last_op();
10838 static bool arm_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
10839 const CPUBreakpoint *bp)
10841 DisasContext *dc = container_of(dcbase, DisasContext, base);
10843 if (bp->flags & BP_CPU) {
10844 gen_set_condexec(dc);
10845 gen_set_pc_im(dc, dc->base.pc_next);
10846 gen_helper_check_breakpoints(cpu_env);
10847 /* End the TB early; it's likely not going to be executed */
10848 dc->base.is_jmp = DISAS_TOO_MANY;
10849 } else {
10850 gen_exception_internal_insn(dc, dc->base.pc_next, EXCP_DEBUG);
10851 /* The address covered by the breakpoint must be
10852 included in [tb->pc, tb->pc + tb->size) in order
10853 to for it to be properly cleared -- thus we
10854 increment the PC here so that the logic setting
10855 tb->size below does the right thing. */
10856 /* TODO: Advance PC by correct instruction length to
10857 * avoid disassembler error messages */
10858 dc->base.pc_next += 2;
10859 dc->base.is_jmp = DISAS_NORETURN;
10862 return true;
10865 static bool arm_pre_translate_insn(DisasContext *dc)
10867 #ifdef CONFIG_USER_ONLY
10868 /* Intercept jump to the magic kernel page. */
10869 if (dc->base.pc_next >= 0xffff0000) {
10870 /* We always get here via a jump, so know we are not in a
10871 conditional execution block. */
10872 gen_exception_internal(EXCP_KERNEL_TRAP);
10873 dc->base.is_jmp = DISAS_NORETURN;
10874 return true;
10876 #endif
10878 if (dc->ss_active && !dc->pstate_ss) {
10879 /* Singlestep state is Active-pending.
10880 * If we're in this state at the start of a TB then either
10881 * a) we just took an exception to an EL which is being debugged
10882 * and this is the first insn in the exception handler
10883 * b) debug exceptions were masked and we just unmasked them
10884 * without changing EL (eg by clearing PSTATE.D)
10885 * In either case we're going to take a swstep exception in the
10886 * "did not step an insn" case, and so the syndrome ISV and EX
10887 * bits should be zero.
10889 assert(dc->base.num_insns == 1);
10890 gen_swstep_exception(dc, 0, 0);
10891 dc->base.is_jmp = DISAS_NORETURN;
10892 return true;
10895 return false;
10898 static void arm_post_translate_insn(DisasContext *dc)
10900 if (dc->condjmp && !dc->base.is_jmp) {
10901 gen_set_label(dc->condlabel);
10902 dc->condjmp = 0;
10904 translator_loop_temp_check(&dc->base);
10907 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
10909 DisasContext *dc = container_of(dcbase, DisasContext, base);
10910 CPUARMState *env = cpu->env_ptr;
10911 unsigned int insn;
10913 if (arm_pre_translate_insn(dc)) {
10914 return;
10917 dc->pc_curr = dc->base.pc_next;
10918 insn = arm_ldl_code(env, dc->base.pc_next, dc->sctlr_b);
10919 dc->insn = insn;
10920 dc->base.pc_next += 4;
10921 disas_arm_insn(dc, insn);
10923 arm_post_translate_insn(dc);
10925 /* ARM is a fixed-length ISA. We performed the cross-page check
10926 in init_disas_context by adjusting max_insns. */
10929 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
10931 /* Return true if this Thumb insn is always unconditional,
10932 * even inside an IT block. This is true of only a very few
10933 * instructions: BKPT, HLT, and SG.
10935 * A larger class of instructions are UNPREDICTABLE if used
10936 * inside an IT block; we do not need to detect those here, because
10937 * what we do by default (perform the cc check and update the IT
10938 * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
10939 * choice for those situations.
10941 * insn is either a 16-bit or a 32-bit instruction; the two are
10942 * distinguishable because for the 16-bit case the top 16 bits
10943 * are zeroes, and that isn't a valid 32-bit encoding.
10945 if ((insn & 0xffffff00) == 0xbe00) {
10946 /* BKPT */
10947 return true;
10950 if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
10951 !arm_dc_feature(s, ARM_FEATURE_M)) {
10952 /* HLT: v8A only. This is unconditional even when it is going to
10953 * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
10954 * For v7 cores this was a plain old undefined encoding and so
10955 * honours its cc check. (We might be using the encoding as
10956 * a semihosting trap, but we don't change the cc check behaviour
10957 * on that account, because a debugger connected to a real v7A
10958 * core and emulating semihosting traps by catching the UNDEF
10959 * exception would also only see cases where the cc check passed.
10960 * No guest code should be trying to do a HLT semihosting trap
10961 * in an IT block anyway.
10963 return true;
10966 if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
10967 arm_dc_feature(s, ARM_FEATURE_M)) {
10968 /* SG: v8M only */
10969 return true;
10972 return false;
10975 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
10977 DisasContext *dc = container_of(dcbase, DisasContext, base);
10978 CPUARMState *env = cpu->env_ptr;
10979 uint32_t insn;
10980 bool is_16bit;
10982 if (arm_pre_translate_insn(dc)) {
10983 return;
10986 dc->pc_curr = dc->base.pc_next;
10987 insn = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
10988 is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
10989 dc->base.pc_next += 2;
10990 if (!is_16bit) {
10991 uint32_t insn2 = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
10993 insn = insn << 16 | insn2;
10994 dc->base.pc_next += 2;
10996 dc->insn = insn;
10998 if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
10999 uint32_t cond = dc->condexec_cond;
11002 * Conditionally skip the insn. Note that both 0xe and 0xf mean
11003 * "always"; 0xf is not "never".
11005 if (cond < 0x0e) {
11006 arm_skip_unless(dc, cond);
11010 if (is_16bit) {
11011 disas_thumb_insn(dc, insn);
11012 } else {
11013 disas_thumb2_insn(dc, insn);
11016 /* Advance the Thumb condexec condition. */
11017 if (dc->condexec_mask) {
11018 dc->condexec_cond = ((dc->condexec_cond & 0xe) |
11019 ((dc->condexec_mask >> 4) & 1));
11020 dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
11021 if (dc->condexec_mask == 0) {
11022 dc->condexec_cond = 0;
11026 arm_post_translate_insn(dc);
11028 /* Thumb is a variable-length ISA. Stop translation when the next insn
11029 * will touch a new page. This ensures that prefetch aborts occur at
11030 * the right place.
11032 * We want to stop the TB if the next insn starts in a new page,
11033 * or if it spans between this page and the next. This means that
11034 * if we're looking at the last halfword in the page we need to
11035 * see if it's a 16-bit Thumb insn (which will fit in this TB)
11036 * or a 32-bit Thumb insn (which won't).
11037 * This is to avoid generating a silly TB with a single 16-bit insn
11038 * in it at the end of this page (which would execute correctly
11039 * but isn't very efficient).
11041 if (dc->base.is_jmp == DISAS_NEXT
11042 && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
11043 || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
11044 && insn_crosses_page(env, dc)))) {
11045 dc->base.is_jmp = DISAS_TOO_MANY;
11049 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
11051 DisasContext *dc = container_of(dcbase, DisasContext, base);
11053 if (tb_cflags(dc->base.tb) & CF_LAST_IO && dc->condjmp) {
11054 /* FIXME: This can theoretically happen with self-modifying code. */
11055 cpu_abort(cpu, "IO on conditional branch instruction");
11058 /* At this stage dc->condjmp will only be set when the skipped
11059 instruction was a conditional branch or trap, and the PC has
11060 already been written. */
11061 gen_set_condexec(dc);
11062 if (dc->base.is_jmp == DISAS_BX_EXCRET) {
11063 /* Exception return branches need some special case code at the
11064 * end of the TB, which is complex enough that it has to
11065 * handle the single-step vs not and the condition-failed
11066 * insn codepath itself.
11068 gen_bx_excret_final_code(dc);
11069 } else if (unlikely(is_singlestepping(dc))) {
11070 /* Unconditional and "condition passed" instruction codepath. */
11071 switch (dc->base.is_jmp) {
11072 case DISAS_SWI:
11073 gen_ss_advance(dc);
11074 gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11075 default_exception_el(dc));
11076 break;
11077 case DISAS_HVC:
11078 gen_ss_advance(dc);
11079 gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11080 break;
11081 case DISAS_SMC:
11082 gen_ss_advance(dc);
11083 gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11084 break;
11085 case DISAS_NEXT:
11086 case DISAS_TOO_MANY:
11087 case DISAS_UPDATE:
11088 gen_set_pc_im(dc, dc->base.pc_next);
11089 /* fall through */
11090 default:
11091 /* FIXME: Single stepping a WFI insn will not halt the CPU. */
11092 gen_singlestep_exception(dc);
11093 break;
11094 case DISAS_NORETURN:
11095 break;
11097 } else {
11098 /* While branches must always occur at the end of an IT block,
11099 there are a few other things that can cause us to terminate
11100 the TB in the middle of an IT block:
11101 - Exception generating instructions (bkpt, swi, undefined).
11102 - Page boundaries.
11103 - Hardware watchpoints.
11104 Hardware breakpoints have already been handled and skip this code.
11106 switch(dc->base.is_jmp) {
11107 case DISAS_NEXT:
11108 case DISAS_TOO_MANY:
11109 gen_goto_tb(dc, 1, dc->base.pc_next);
11110 break;
11111 case DISAS_JUMP:
11112 gen_goto_ptr();
11113 break;
11114 case DISAS_UPDATE:
11115 gen_set_pc_im(dc, dc->base.pc_next);
11116 /* fall through */
11117 default:
11118 /* indicate that the hash table must be used to find the next TB */
11119 tcg_gen_exit_tb(NULL, 0);
11120 break;
11121 case DISAS_NORETURN:
11122 /* nothing more to generate */
11123 break;
11124 case DISAS_WFI:
11126 TCGv_i32 tmp = tcg_const_i32((dc->thumb &&
11127 !(dc->insn & (1U << 31))) ? 2 : 4);
11129 gen_helper_wfi(cpu_env, tmp);
11130 tcg_temp_free_i32(tmp);
11131 /* The helper doesn't necessarily throw an exception, but we
11132 * must go back to the main loop to check for interrupts anyway.
11134 tcg_gen_exit_tb(NULL, 0);
11135 break;
11137 case DISAS_WFE:
11138 gen_helper_wfe(cpu_env);
11139 break;
11140 case DISAS_YIELD:
11141 gen_helper_yield(cpu_env);
11142 break;
11143 case DISAS_SWI:
11144 gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11145 default_exception_el(dc));
11146 break;
11147 case DISAS_HVC:
11148 gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11149 break;
11150 case DISAS_SMC:
11151 gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11152 break;
11156 if (dc->condjmp) {
11157 /* "Condition failed" instruction codepath for the branch/trap insn */
11158 gen_set_label(dc->condlabel);
11159 gen_set_condexec(dc);
11160 if (unlikely(is_singlestepping(dc))) {
11161 gen_set_pc_im(dc, dc->base.pc_next);
11162 gen_singlestep_exception(dc);
11163 } else {
11164 gen_goto_tb(dc, 1, dc->base.pc_next);
11169 static void arm_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
11171 DisasContext *dc = container_of(dcbase, DisasContext, base);
11173 qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
11174 log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
11177 static const TranslatorOps arm_translator_ops = {
11178 .init_disas_context = arm_tr_init_disas_context,
11179 .tb_start = arm_tr_tb_start,
11180 .insn_start = arm_tr_insn_start,
11181 .breakpoint_check = arm_tr_breakpoint_check,
11182 .translate_insn = arm_tr_translate_insn,
11183 .tb_stop = arm_tr_tb_stop,
11184 .disas_log = arm_tr_disas_log,
11187 static const TranslatorOps thumb_translator_ops = {
11188 .init_disas_context = arm_tr_init_disas_context,
11189 .tb_start = arm_tr_tb_start,
11190 .insn_start = arm_tr_insn_start,
11191 .breakpoint_check = arm_tr_breakpoint_check,
11192 .translate_insn = thumb_tr_translate_insn,
11193 .tb_stop = arm_tr_tb_stop,
11194 .disas_log = arm_tr_disas_log,
11197 /* generate intermediate code for basic block 'tb'. */
11198 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
11200 DisasContext dc = { };
11201 const TranslatorOps *ops = &arm_translator_ops;
11203 if (FIELD_EX32(tb->flags, TBFLAG_AM32, THUMB)) {
11204 ops = &thumb_translator_ops;
11206 #ifdef TARGET_AARCH64
11207 if (FIELD_EX32(tb->flags, TBFLAG_ANY, AARCH64_STATE)) {
11208 ops = &aarch64_translator_ops;
11210 #endif
11212 translator_loop(ops, &dc.base, cpu, tb, max_insns);
11215 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
11216 target_ulong *data)
11218 if (is_a64(env)) {
11219 env->pc = data[0];
11220 env->condexec_bits = 0;
11221 env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
11222 } else {
11223 env->regs[15] = data[0];
11224 env->condexec_bits = data[1];
11225 env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;