2 * ARM translation: AArch32 Neon instructions
4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
7 * Copyright (c) 2020 Linaro, Ltd.
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
24 * This file is intended to be included from translate.c; it uses
25 * some macros and definitions provided by that file.
26 * It might be possible to convert it to a standalone .c file eventually.
29 static inline int plus1(DisasContext *s, int x)
34 static inline int rsub_64(DisasContext *s, int x)
39 static inline int rsub_32(DisasContext *s, int x)
43 static inline int rsub_16(DisasContext *s, int x)
47 static inline int rsub_8(DisasContext *s, int x)
52 /* Include the generated Neon decoder */
53 #include "decode-neon-dp.c.inc"
54 #include "decode-neon-ls.c.inc"
55 #include "decode-neon-shared.c.inc"
57 /* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
58 * where 0 is the least significant end of the register.
61 neon_element_offset(int reg, int element, MemOp size)
63 int element_size = 1 << size;
64 int ofs = element * element_size;
65 #ifdef HOST_WORDS_BIGENDIAN
66 /* Calculate the offset assuming fully little-endian,
67 * then XOR to account for the order of the 8-byte units.
69 if (element_size < 8) {
70 ofs ^= 8 - element_size;
73 return neon_reg_offset(reg, 0) + ofs;
76 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
78 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
82 tcg_gen_ld8u_i32(var, cpu_env, offset);
85 tcg_gen_ld16u_i32(var, cpu_env, offset);
88 tcg_gen_ld_i32(var, cpu_env, offset);
91 g_assert_not_reached();
95 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
97 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
101 tcg_gen_ld8u_i64(var, cpu_env, offset);
104 tcg_gen_ld16u_i64(var, cpu_env, offset);
107 tcg_gen_ld32u_i64(var, cpu_env, offset);
110 tcg_gen_ld_i64(var, cpu_env, offset);
113 g_assert_not_reached();
117 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
119 long offset = neon_element_offset(reg, ele, size);
123 tcg_gen_st8_i32(var, cpu_env, offset);
126 tcg_gen_st16_i32(var, cpu_env, offset);
129 tcg_gen_st_i32(var, cpu_env, offset);
132 g_assert_not_reached();
136 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
138 long offset = neon_element_offset(reg, ele, size);
142 tcg_gen_st8_i64(var, cpu_env, offset);
145 tcg_gen_st16_i64(var, cpu_env, offset);
148 tcg_gen_st32_i64(var, cpu_env, offset);
151 tcg_gen_st_i64(var, cpu_env, offset);
154 g_assert_not_reached();
158 static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
162 gen_helper_gvec_3_ptr *fn_gvec_ptr;
164 if (!dc_isar_feature(aa32_vcma, s)
165 || (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) {
169 /* UNDEF accesses to D16-D31 if they don't exist. */
170 if (!dc_isar_feature(aa32_simd_r32, s) &&
171 ((a->vd | a->vn | a->vm) & 0x10)) {
175 if ((a->vn | a->vm | a->vd) & a->q) {
179 if (!vfp_access_check(s)) {
183 opr_sz = (1 + a->q) * 8;
184 fpst = fpstatus_ptr(a->size == 0 ? FPST_STD_F16 : FPST_STD);
185 fn_gvec_ptr = a->size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
186 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
187 vfp_reg_offset(1, a->vn),
188 vfp_reg_offset(1, a->vm),
189 fpst, opr_sz, opr_sz, a->rot,
191 tcg_temp_free_ptr(fpst);
195 static bool trans_VCADD(DisasContext *s, arg_VCADD *a)
199 gen_helper_gvec_3_ptr *fn_gvec_ptr;
201 if (!dc_isar_feature(aa32_vcma, s)
202 || (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) {
206 /* UNDEF accesses to D16-D31 if they don't exist. */
207 if (!dc_isar_feature(aa32_simd_r32, s) &&
208 ((a->vd | a->vn | a->vm) & 0x10)) {
212 if ((a->vn | a->vm | a->vd) & a->q) {
216 if (!vfp_access_check(s)) {
220 opr_sz = (1 + a->q) * 8;
221 fpst = fpstatus_ptr(a->size == 0 ? FPST_STD_F16 : FPST_STD);
222 fn_gvec_ptr = a->size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
223 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
224 vfp_reg_offset(1, a->vn),
225 vfp_reg_offset(1, a->vm),
226 fpst, opr_sz, opr_sz, a->rot,
228 tcg_temp_free_ptr(fpst);
232 static bool trans_VDOT(DisasContext *s, arg_VDOT *a)
235 gen_helper_gvec_3 *fn_gvec;
237 if (!dc_isar_feature(aa32_dp, s)) {
241 /* UNDEF accesses to D16-D31 if they don't exist. */
242 if (!dc_isar_feature(aa32_simd_r32, s) &&
243 ((a->vd | a->vn | a->vm) & 0x10)) {
247 if ((a->vn | a->vm | a->vd) & a->q) {
251 if (!vfp_access_check(s)) {
255 opr_sz = (1 + a->q) * 8;
256 fn_gvec = a->u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
257 tcg_gen_gvec_3_ool(vfp_reg_offset(1, a->vd),
258 vfp_reg_offset(1, a->vn),
259 vfp_reg_offset(1, a->vm),
260 opr_sz, opr_sz, 0, fn_gvec);
264 static bool trans_VFML(DisasContext *s, arg_VFML *a)
268 if (!dc_isar_feature(aa32_fhm, s)) {
272 /* UNDEF accesses to D16-D31 if they don't exist. */
273 if (!dc_isar_feature(aa32_simd_r32, s) &&
282 if (!vfp_access_check(s)) {
286 opr_sz = (1 + a->q) * 8;
287 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
288 vfp_reg_offset(a->q, a->vn),
289 vfp_reg_offset(a->q, a->vm),
290 cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */
291 gen_helper_gvec_fmlal_a32);
295 static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a)
297 gen_helper_gvec_3_ptr *fn_gvec_ptr;
301 if (!dc_isar_feature(aa32_vcma, s)) {
304 if (a->size == 0 && !dc_isar_feature(aa32_fp16_arith, s)) {
308 /* UNDEF accesses to D16-D31 if they don't exist. */
309 if (!dc_isar_feature(aa32_simd_r32, s) &&
310 ((a->vd | a->vn | a->vm) & 0x10)) {
314 if ((a->vd | a->vn) & a->q) {
318 if (!vfp_access_check(s)) {
322 fn_gvec_ptr = (a->size ? gen_helper_gvec_fcmlas_idx
323 : gen_helper_gvec_fcmlah_idx);
324 opr_sz = (1 + a->q) * 8;
325 fpst = fpstatus_ptr(a->size == 0 ? FPST_STD_F16 : FPST_STD);
326 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
327 vfp_reg_offset(1, a->vn),
328 vfp_reg_offset(1, a->vm),
329 fpst, opr_sz, opr_sz,
330 (a->index << 2) | a->rot, fn_gvec_ptr);
331 tcg_temp_free_ptr(fpst);
335 static bool trans_VDOT_scalar(DisasContext *s, arg_VDOT_scalar *a)
337 gen_helper_gvec_3 *fn_gvec;
341 if (!dc_isar_feature(aa32_dp, s)) {
345 /* UNDEF accesses to D16-D31 if they don't exist. */
346 if (!dc_isar_feature(aa32_simd_r32, s) &&
347 ((a->vd | a->vn) & 0x10)) {
351 if ((a->vd | a->vn) & a->q) {
355 if (!vfp_access_check(s)) {
359 fn_gvec = a->u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
360 opr_sz = (1 + a->q) * 8;
361 fpst = fpstatus_ptr(FPST_STD);
362 tcg_gen_gvec_3_ool(vfp_reg_offset(1, a->vd),
363 vfp_reg_offset(1, a->vn),
364 vfp_reg_offset(1, a->rm),
365 opr_sz, opr_sz, a->index, fn_gvec);
366 tcg_temp_free_ptr(fpst);
370 static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
374 if (!dc_isar_feature(aa32_fhm, s)) {
378 /* UNDEF accesses to D16-D31 if they don't exist. */
379 if (!dc_isar_feature(aa32_simd_r32, s) &&
380 ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) {
388 if (!vfp_access_check(s)) {
392 opr_sz = (1 + a->q) * 8;
393 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
394 vfp_reg_offset(a->q, a->vn),
395 vfp_reg_offset(a->q, a->rm),
396 cpu_env, opr_sz, opr_sz,
397 (a->index << 2) | a->s, /* is_2 == 0 */
398 gen_helper_gvec_fmlal_idx_a32);
406 } const neon_ls_element_type[11] = {
420 static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn,
426 base = load_reg(s, rn);
428 tcg_gen_addi_i32(base, base, stride);
431 index = load_reg(s, rm);
432 tcg_gen_add_i32(base, base, index);
433 tcg_temp_free_i32(index);
435 store_reg(s, rn, base);
439 static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
441 /* Neon load/store multiple structures */
442 int nregs, interleave, spacing, reg, n;
443 MemOp endian = s->be_data;
444 int mmu_idx = get_mem_index(s);
449 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
453 /* UNDEF accesses to D16-D31 if they don't exist */
454 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
460 /* Catch UNDEF cases for bad values of align field */
461 switch (a->itype & 0xc) {
475 nregs = neon_ls_element_type[a->itype].nregs;
476 interleave = neon_ls_element_type[a->itype].interleave;
477 spacing = neon_ls_element_type[a->itype].spacing;
478 if (size == 3 && (interleave | spacing) != 1) {
482 if (!vfp_access_check(s)) {
486 /* For our purposes, bytes are always little-endian. */
491 * Consecutive little-endian elements from a single register
492 * can be promoted to a larger little-endian operation.
494 if (interleave == 1 && endian == MO_LE) {
497 tmp64 = tcg_temp_new_i64();
498 addr = tcg_temp_new_i32();
499 tmp = tcg_const_i32(1 << size);
500 load_reg_var(s, addr, a->rn);
501 for (reg = 0; reg < nregs; reg++) {
502 for (n = 0; n < 8 >> size; n++) {
504 for (xs = 0; xs < interleave; xs++) {
505 int tt = a->vd + reg + spacing * xs;
508 gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size);
509 neon_store_element64(tt, n, size, tmp64);
511 neon_load_element64(tmp64, tt, n, size);
512 gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size);
514 tcg_gen_add_i32(addr, addr, tmp);
518 tcg_temp_free_i32(addr);
519 tcg_temp_free_i32(tmp);
520 tcg_temp_free_i64(tmp64);
522 gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
526 static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
528 /* Neon load single structure to all lanes */
529 int reg, stride, vec_size;
532 int nregs = a->n + 1;
535 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
539 /* UNDEF accesses to D16-D31 if they don't exist */
540 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
545 if (nregs != 4 || a->a == 0) {
548 /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */
551 if (nregs == 1 && a->a == 1 && size == 0) {
554 if (nregs == 3 && a->a == 1) {
558 if (!vfp_access_check(s)) {
563 * VLD1 to all lanes: T bit indicates how many Dregs to write.
564 * VLD2/3/4 to all lanes: T bit indicates register stride.
566 stride = a->t ? 2 : 1;
567 vec_size = nregs == 1 ? stride * 8 : 8;
569 tmp = tcg_temp_new_i32();
570 addr = tcg_temp_new_i32();
571 load_reg_var(s, addr, a->rn);
572 for (reg = 0; reg < nregs; reg++) {
573 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
575 if ((vd & 1) && vec_size == 16) {
577 * We cannot write 16 bytes at once because the
578 * destination is unaligned.
580 tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
582 tcg_gen_gvec_mov(0, neon_reg_offset(vd + 1, 0),
583 neon_reg_offset(vd, 0), 8, 8);
585 tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
586 vec_size, vec_size, tmp);
588 tcg_gen_addi_i32(addr, addr, 1 << size);
591 tcg_temp_free_i32(tmp);
592 tcg_temp_free_i32(addr);
594 gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs);
599 static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
601 /* Neon load/store single structure to one lane */
603 int nregs = a->n + 1;
607 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
611 /* UNDEF accesses to D16-D31 if they don't exist */
612 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
616 /* Catch the UNDEF cases. This is unavoidably a bit messy. */
619 if (((a->align & (1 << a->size)) != 0) ||
620 (a->size == 2 && ((a->align & 3) == 1 || (a->align & 3) == 2))) {
625 if ((a->align & 1) != 0) {
630 if (a->size == 2 && (a->align & 2) != 0) {
635 if ((a->size == 2) && ((a->align & 3) == 3)) {
642 if ((vd + a->stride * (nregs - 1)) > 31) {
644 * Attempts to write off the end of the register file are
645 * UNPREDICTABLE; we choose to UNDEF because otherwise we would
646 * access off the end of the array that holds the register data.
651 if (!vfp_access_check(s)) {
655 tmp = tcg_temp_new_i32();
656 addr = tcg_temp_new_i32();
657 load_reg_var(s, addr, a->rn);
659 * TODO: if we implemented alignment exceptions, we should check
660 * addr against the alignment encoded in a->align here.
662 for (reg = 0; reg < nregs; reg++) {
664 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
665 s->be_data | a->size);
666 neon_store_element(vd, a->reg_idx, a->size, tmp);
668 neon_load_element(tmp, vd, a->reg_idx, a->size);
669 gen_aa32_st_i32(s, tmp, addr, get_mem_index(s),
670 s->be_data | a->size);
673 tcg_gen_addi_i32(addr, addr, 1 << a->size);
675 tcg_temp_free_i32(addr);
676 tcg_temp_free_i32(tmp);
678 gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs);
683 static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn)
685 int vec_size = a->q ? 16 : 8;
686 int rd_ofs = neon_reg_offset(a->vd, 0);
687 int rn_ofs = neon_reg_offset(a->vn, 0);
688 int rm_ofs = neon_reg_offset(a->vm, 0);
690 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
694 /* UNDEF accesses to D16-D31 if they don't exist. */
695 if (!dc_isar_feature(aa32_simd_r32, s) &&
696 ((a->vd | a->vn | a->vm) & 0x10)) {
700 if ((a->vn | a->vm | a->vd) & a->q) {
704 if (!vfp_access_check(s)) {
708 fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
712 #define DO_3SAME(INSN, FUNC) \
713 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
715 return do_3same(s, a, FUNC); \
718 DO_3SAME(VADD, tcg_gen_gvec_add)
719 DO_3SAME(VSUB, tcg_gen_gvec_sub)
720 DO_3SAME(VAND, tcg_gen_gvec_and)
721 DO_3SAME(VBIC, tcg_gen_gvec_andc)
722 DO_3SAME(VORR, tcg_gen_gvec_or)
723 DO_3SAME(VORN, tcg_gen_gvec_orc)
724 DO_3SAME(VEOR, tcg_gen_gvec_xor)
725 DO_3SAME(VSHL_S, gen_gvec_sshl)
726 DO_3SAME(VSHL_U, gen_gvec_ushl)
727 DO_3SAME(VQADD_S, gen_gvec_sqadd_qc)
728 DO_3SAME(VQADD_U, gen_gvec_uqadd_qc)
729 DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc)
730 DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc)
732 /* These insns are all gvec_bitsel but with the inputs in various orders. */
733 #define DO_3SAME_BITSEL(INSN, O1, O2, O3) \
734 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
735 uint32_t rn_ofs, uint32_t rm_ofs, \
736 uint32_t oprsz, uint32_t maxsz) \
738 tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz); \
740 DO_3SAME(INSN, gen_##INSN##_3s)
742 DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs)
743 DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs)
744 DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs)
746 #define DO_3SAME_NO_SZ_3(INSN, FUNC) \
747 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
749 if (a->size == 3) { \
752 return do_3same(s, a, FUNC); \
755 DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax)
756 DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
757 DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
758 DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
759 DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
760 DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla)
761 DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls)
762 DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst)
763 DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd)
764 DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba)
765 DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd)
766 DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba)
768 #define DO_3SAME_CMP(INSN, COND) \
769 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
770 uint32_t rn_ofs, uint32_t rm_ofs, \
771 uint32_t oprsz, uint32_t maxsz) \
773 tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \
775 DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
777 DO_3SAME_CMP(VCGT_S, TCG_COND_GT)
778 DO_3SAME_CMP(VCGT_U, TCG_COND_GTU)
779 DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
780 DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
781 DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
783 #define WRAP_OOL_FN(WRAPNAME, FUNC) \
784 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, \
785 uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz) \
787 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \
790 WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b)
792 static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
797 return do_3same(s, a, gen_VMUL_p_3s);
800 #define DO_VQRDMLAH(INSN, FUNC) \
801 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
803 if (!dc_isar_feature(aa32_rdm, s)) { \
806 if (a->size != 1 && a->size != 2) { \
809 return do_3same(s, a, FUNC); \
812 DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc)
813 DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc)
815 #define DO_SHA1(NAME, FUNC) \
816 WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \
817 static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \
819 if (!dc_isar_feature(aa32_sha1, s)) { \
822 return do_3same(s, a, gen_##NAME##_3s); \
825 DO_SHA1(SHA1C, gen_helper_crypto_sha1c)
826 DO_SHA1(SHA1P, gen_helper_crypto_sha1p)
827 DO_SHA1(SHA1M, gen_helper_crypto_sha1m)
828 DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0)
830 #define DO_SHA2(NAME, FUNC) \
831 WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \
832 static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \
834 if (!dc_isar_feature(aa32_sha2, s)) { \
837 return do_3same(s, a, gen_##NAME##_3s); \
840 DO_SHA2(SHA256H, gen_helper_crypto_sha256h)
841 DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2)
842 DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1)
844 #define DO_3SAME_64(INSN, FUNC) \
845 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
846 uint32_t rn_ofs, uint32_t rm_ofs, \
847 uint32_t oprsz, uint32_t maxsz) \
849 static const GVecGen3 op = { .fni8 = FUNC }; \
850 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op); \
852 DO_3SAME(INSN, gen_##INSN##_3s)
854 #define DO_3SAME_64_ENV(INSN, FUNC) \
855 static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) \
857 FUNC(d, cpu_env, n, m); \
859 DO_3SAME_64(INSN, gen_##INSN##_elt)
861 DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64)
862 DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64)
863 DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64)
864 DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64)
865 DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64)
866 DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
868 #define DO_3SAME_32(INSN, FUNC) \
869 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
870 uint32_t rn_ofs, uint32_t rm_ofs, \
871 uint32_t oprsz, uint32_t maxsz) \
873 static const GVecGen3 ops[4] = { \
874 { .fni4 = gen_helper_neon_##FUNC##8 }, \
875 { .fni4 = gen_helper_neon_##FUNC##16 }, \
876 { .fni4 = gen_helper_neon_##FUNC##32 }, \
879 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
881 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
886 return do_3same(s, a, gen_##INSN##_3s); \
890 * Some helper functions need to be passed the cpu_env. In order
891 * to use those with the gvec APIs like tcg_gen_gvec_3() we need
892 * to create wrapper functions whose prototype is a NeonGenTwoOpFn()
893 * and which call a NeonGenTwoOpEnvFn().
895 #define WRAP_ENV_FN(WRAPNAME, FUNC) \
896 static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m) \
898 FUNC(d, cpu_env, n, m); \
901 #define DO_3SAME_32_ENV(INSN, FUNC) \
902 WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8); \
903 WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16); \
904 WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32); \
905 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
906 uint32_t rn_ofs, uint32_t rm_ofs, \
907 uint32_t oprsz, uint32_t maxsz) \
909 static const GVecGen3 ops[4] = { \
910 { .fni4 = gen_##INSN##_tramp8 }, \
911 { .fni4 = gen_##INSN##_tramp16 }, \
912 { .fni4 = gen_##INSN##_tramp32 }, \
915 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
917 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
922 return do_3same(s, a, gen_##INSN##_3s); \
925 DO_3SAME_32(VHADD_S, hadd_s)
926 DO_3SAME_32(VHADD_U, hadd_u)
927 DO_3SAME_32(VHSUB_S, hsub_s)
928 DO_3SAME_32(VHSUB_U, hsub_u)
929 DO_3SAME_32(VRHADD_S, rhadd_s)
930 DO_3SAME_32(VRHADD_U, rhadd_u)
931 DO_3SAME_32(VRSHL_S, rshl_s)
932 DO_3SAME_32(VRSHL_U, rshl_u)
934 DO_3SAME_32_ENV(VQSHL_S, qshl_s)
935 DO_3SAME_32_ENV(VQSHL_U, qshl_u)
936 DO_3SAME_32_ENV(VQRSHL_S, qrshl_s)
937 DO_3SAME_32_ENV(VQRSHL_U, qrshl_u)
939 static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
941 /* Operations handled pairwise 32 bits at a time */
942 TCGv_i32 tmp, tmp2, tmp3;
944 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
948 /* UNDEF accesses to D16-D31 if they don't exist. */
949 if (!dc_isar_feature(aa32_simd_r32, s) &&
950 ((a->vd | a->vn | a->vm) & 0x10)) {
958 if (!vfp_access_check(s)) {
962 assert(a->q == 0); /* enforced by decode patterns */
965 * Note that we have to be careful not to clobber the source operands
966 * in the "vm == vd" case by storing the result of the first pass too
967 * early. Since Q is 0 there are always just two passes, so instead
968 * of a complicated loop over each pass we just unroll.
970 tmp = neon_load_reg(a->vn, 0);
971 tmp2 = neon_load_reg(a->vn, 1);
973 tcg_temp_free_i32(tmp2);
975 tmp3 = neon_load_reg(a->vm, 0);
976 tmp2 = neon_load_reg(a->vm, 1);
977 fn(tmp3, tmp3, tmp2);
978 tcg_temp_free_i32(tmp2);
980 neon_store_reg(a->vd, 0, tmp);
981 neon_store_reg(a->vd, 1, tmp3);
985 #define DO_3SAME_PAIR(INSN, func) \
986 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
988 static NeonGenTwoOpFn * const fns[] = { \
989 gen_helper_neon_##func##8, \
990 gen_helper_neon_##func##16, \
991 gen_helper_neon_##func##32, \
996 return do_3same_pair(s, a, fns[a->size]); \
999 /* 32-bit pairwise ops end up the same as the elementwise versions. */
1000 #define gen_helper_neon_pmax_s32 tcg_gen_smax_i32
1001 #define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
1002 #define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
1003 #define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
1004 #define gen_helper_neon_padd_u32 tcg_gen_add_i32
1006 DO_3SAME_PAIR(VPMAX_S, pmax_s)
1007 DO_3SAME_PAIR(VPMIN_S, pmin_s)
1008 DO_3SAME_PAIR(VPMAX_U, pmax_u)
1009 DO_3SAME_PAIR(VPMIN_U, pmin_u)
1010 DO_3SAME_PAIR(VPADD, padd_u)
1012 #define DO_3SAME_VQDMULH(INSN, FUNC) \
1013 WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16); \
1014 WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32); \
1015 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
1016 uint32_t rn_ofs, uint32_t rm_ofs, \
1017 uint32_t oprsz, uint32_t maxsz) \
1019 static const GVecGen3 ops[2] = { \
1020 { .fni4 = gen_##INSN##_tramp16 }, \
1021 { .fni4 = gen_##INSN##_tramp32 }, \
1023 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \
1025 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
1027 if (a->size != 1 && a->size != 2) { \
1030 return do_3same(s, a, gen_##INSN##_3s); \
1033 DO_3SAME_VQDMULH(VQDMULH, qdmulh)
1034 DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
1036 #define WRAP_FP_GVEC(WRAPNAME, FPST, FUNC) \
1037 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
1038 uint32_t rn_ofs, uint32_t rm_ofs, \
1039 uint32_t oprsz, uint32_t maxsz) \
1041 TCGv_ptr fpst = fpstatus_ptr(FPST); \
1042 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst, \
1043 oprsz, maxsz, 0, FUNC); \
1044 tcg_temp_free_ptr(fpst); \
1047 #define DO_3S_FP_GVEC(INSN,SFUNC,HFUNC) \
1048 WRAP_FP_GVEC(gen_##INSN##_fp32_3s, FPST_STD, SFUNC) \
1049 WRAP_FP_GVEC(gen_##INSN##_fp16_3s, FPST_STD_F16, HFUNC) \
1050 static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
1052 if (a->size != 0) { \
1053 if (!dc_isar_feature(aa32_fp16_arith, s)) { \
1056 return do_3same(s, a, gen_##INSN##_fp16_3s); \
1058 return do_3same(s, a, gen_##INSN##_fp32_3s); \
1062 DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s, gen_helper_gvec_fadd_h)
1063 DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s, gen_helper_gvec_fsub_h)
1064 DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s, gen_helper_gvec_fabd_h)
1065 DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s, gen_helper_gvec_fmul_h)
1066 DO_3S_FP_GVEC(VCEQ, gen_helper_gvec_fceq_s, gen_helper_gvec_fceq_h)
1067 DO_3S_FP_GVEC(VCGE, gen_helper_gvec_fcge_s, gen_helper_gvec_fcge_h)
1068 DO_3S_FP_GVEC(VCGT, gen_helper_gvec_fcgt_s, gen_helper_gvec_fcgt_h)
1069 DO_3S_FP_GVEC(VACGE, gen_helper_gvec_facge_s, gen_helper_gvec_facge_h)
1070 DO_3S_FP_GVEC(VACGT, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_h)
1071 DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h)
1072 DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h)
1073 DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h)
1074 DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h)
1075 DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h)
1076 DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h)
1077 DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h)
1078 DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h)
1080 WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s)
1081 WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h)
1082 WRAP_FP_GVEC(gen_VMINNM_fp32_3s, FPST_STD, gen_helper_gvec_fminnum_s)
1083 WRAP_FP_GVEC(gen_VMINNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fminnum_h)
1085 static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a)
1087 if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
1092 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1095 return do_3same(s, a, gen_VMAXNM_fp16_3s);
1097 return do_3same(s, a, gen_VMAXNM_fp32_3s);
1100 static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
1102 if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
1107 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1110 return do_3same(s, a, gen_VMINNM_fp16_3s);
1112 return do_3same(s, a, gen_VMINNM_fp32_3s);
1115 static bool do_3same_fp_pair(DisasContext *s, arg_3same *a,
1116 gen_helper_gvec_3_ptr *fn)
1118 /* FP pairwise operations */
1121 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1125 /* UNDEF accesses to D16-D31 if they don't exist. */
1126 if (!dc_isar_feature(aa32_simd_r32, s) &&
1127 ((a->vd | a->vn | a->vm) & 0x10)) {
1131 if (!vfp_access_check(s)) {
1135 assert(a->q == 0); /* enforced by decode patterns */
1138 fpstatus = fpstatus_ptr(a->size != 0 ? FPST_STD_F16 : FPST_STD);
1139 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
1140 vfp_reg_offset(1, a->vn),
1141 vfp_reg_offset(1, a->vm),
1142 fpstatus, 8, 8, 0, fn);
1143 tcg_temp_free_ptr(fpstatus);
1149 * For all the functions using this macro, size == 1 means fp16,
1150 * which is an architecture extension we don't implement yet.
1152 #define DO_3S_FP_PAIR(INSN,FUNC) \
1153 static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
1155 if (a->size != 0) { \
1156 if (!dc_isar_feature(aa32_fp16_arith, s)) { \
1159 return do_3same_fp_pair(s, a, FUNC##h); \
1161 return do_3same_fp_pair(s, a, FUNC##s); \
1164 DO_3S_FP_PAIR(VPADD, gen_helper_neon_padd)
1165 DO_3S_FP_PAIR(VPMAX, gen_helper_neon_pmax)
1166 DO_3S_FP_PAIR(VPMIN, gen_helper_neon_pmin)
1168 static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
1170 /* Handle a 2-reg-shift insn which can be vectorized. */
1171 int vec_size = a->q ? 16 : 8;
1172 int rd_ofs = neon_reg_offset(a->vd, 0);
1173 int rm_ofs = neon_reg_offset(a->vm, 0);
1175 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1179 /* UNDEF accesses to D16-D31 if they don't exist. */
1180 if (!dc_isar_feature(aa32_simd_r32, s) &&
1181 ((a->vd | a->vm) & 0x10)) {
1185 if ((a->vm | a->vd) & a->q) {
1189 if (!vfp_access_check(s)) {
1193 fn(a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size);
1197 #define DO_2SH(INSN, FUNC) \
1198 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1200 return do_vector_2sh(s, a, FUNC); \
1203 DO_2SH(VSHL, tcg_gen_gvec_shli)
1204 DO_2SH(VSLI, gen_gvec_sli)
1205 DO_2SH(VSRI, gen_gvec_sri)
1206 DO_2SH(VSRA_S, gen_gvec_ssra)
1207 DO_2SH(VSRA_U, gen_gvec_usra)
1208 DO_2SH(VRSHR_S, gen_gvec_srshr)
1209 DO_2SH(VRSHR_U, gen_gvec_urshr)
1210 DO_2SH(VRSRA_S, gen_gvec_srsra)
1211 DO_2SH(VRSRA_U, gen_gvec_ursra)
1213 static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a)
1215 /* Signed shift out of range results in all-sign-bits */
1216 a->shift = MIN(a->shift, (8 << a->size) - 1);
1217 return do_vector_2sh(s, a, tcg_gen_gvec_sari);
1220 static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
1221 int64_t shift, uint32_t oprsz, uint32_t maxsz)
1223 tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0);
1226 static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a)
1228 /* Shift out of range is architecturally valid and results in zero. */
1229 if (a->shift >= (8 << a->size)) {
1230 return do_vector_2sh(s, a, gen_zero_rd_2sh);
1232 return do_vector_2sh(s, a, tcg_gen_gvec_shri);
1236 static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
1237 NeonGenTwo64OpEnvFn *fn)
1240 * 2-reg-and-shift operations, size == 3 case, where the
1241 * function needs to be passed cpu_env.
1246 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1250 /* UNDEF accesses to D16-D31 if they don't exist. */
1251 if (!dc_isar_feature(aa32_simd_r32, s) &&
1252 ((a->vd | a->vm) & 0x10)) {
1256 if ((a->vm | a->vd) & a->q) {
1260 if (!vfp_access_check(s)) {
1265 * To avoid excessive duplication of ops we implement shift
1266 * by immediate using the variable shift operations.
1268 constimm = tcg_const_i64(dup_const(a->size, a->shift));
1270 for (pass = 0; pass < a->q + 1; pass++) {
1271 TCGv_i64 tmp = tcg_temp_new_i64();
1273 neon_load_reg64(tmp, a->vm + pass);
1274 fn(tmp, cpu_env, tmp, constimm);
1275 neon_store_reg64(tmp, a->vd + pass);
1276 tcg_temp_free_i64(tmp);
1278 tcg_temp_free_i64(constimm);
1282 static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
1283 NeonGenTwoOpEnvFn *fn)
1286 * 2-reg-and-shift operations, size < 3 case, where the
1287 * helper needs to be passed cpu_env.
1292 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1296 /* UNDEF accesses to D16-D31 if they don't exist. */
1297 if (!dc_isar_feature(aa32_simd_r32, s) &&
1298 ((a->vd | a->vm) & 0x10)) {
1302 if ((a->vm | a->vd) & a->q) {
1306 if (!vfp_access_check(s)) {
1311 * To avoid excessive duplication of ops we implement shift
1312 * by immediate using the variable shift operations.
1314 constimm = tcg_const_i32(dup_const(a->size, a->shift));
1316 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
1317 TCGv_i32 tmp = neon_load_reg(a->vm, pass);
1318 fn(tmp, cpu_env, tmp, constimm);
1319 neon_store_reg(a->vd, pass, tmp);
1321 tcg_temp_free_i32(constimm);
1325 #define DO_2SHIFT_ENV(INSN, FUNC) \
1326 static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \
1328 return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64); \
1330 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1332 static NeonGenTwoOpEnvFn * const fns[] = { \
1333 gen_helper_neon_##FUNC##8, \
1334 gen_helper_neon_##FUNC##16, \
1335 gen_helper_neon_##FUNC##32, \
1337 assert(a->size < ARRAY_SIZE(fns)); \
1338 return do_2shift_env_32(s, a, fns[a->size]); \
1341 DO_2SHIFT_ENV(VQSHLU, qshlu_s)
1342 DO_2SHIFT_ENV(VQSHL_U, qshl_u)
1343 DO_2SHIFT_ENV(VQSHL_S, qshl_s)
1345 static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
1346 NeonGenTwo64OpFn *shiftfn,
1347 NeonGenNarrowEnvFn *narrowfn)
1349 /* 2-reg-and-shift narrowing-shift operations, size == 3 case */
1350 TCGv_i64 constimm, rm1, rm2;
1353 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1357 /* UNDEF accesses to D16-D31 if they don't exist. */
1358 if (!dc_isar_feature(aa32_simd_r32, s) &&
1359 ((a->vd | a->vm) & 0x10)) {
1367 if (!vfp_access_check(s)) {
1372 * This is always a right shift, and the shiftfn is always a
1373 * left-shift helper, which thus needs the negated shift count.
1375 constimm = tcg_const_i64(-a->shift);
1376 rm1 = tcg_temp_new_i64();
1377 rm2 = tcg_temp_new_i64();
1379 /* Load both inputs first to avoid potential overwrite if rm == rd */
1380 neon_load_reg64(rm1, a->vm);
1381 neon_load_reg64(rm2, a->vm + 1);
1383 shiftfn(rm1, rm1, constimm);
1384 rd = tcg_temp_new_i32();
1385 narrowfn(rd, cpu_env, rm1);
1386 neon_store_reg(a->vd, 0, rd);
1388 shiftfn(rm2, rm2, constimm);
1389 rd = tcg_temp_new_i32();
1390 narrowfn(rd, cpu_env, rm2);
1391 neon_store_reg(a->vd, 1, rd);
1393 tcg_temp_free_i64(rm1);
1394 tcg_temp_free_i64(rm2);
1395 tcg_temp_free_i64(constimm);
1400 static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
1401 NeonGenTwoOpFn *shiftfn,
1402 NeonGenNarrowEnvFn *narrowfn)
1404 /* 2-reg-and-shift narrowing-shift operations, size < 3 case */
1405 TCGv_i32 constimm, rm1, rm2, rm3, rm4;
1409 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1413 /* UNDEF accesses to D16-D31 if they don't exist. */
1414 if (!dc_isar_feature(aa32_simd_r32, s) &&
1415 ((a->vd | a->vm) & 0x10)) {
1423 if (!vfp_access_check(s)) {
1428 * This is always a right shift, and the shiftfn is always a
1429 * left-shift helper, which thus needs the negated shift count
1430 * duplicated into each lane of the immediate value.
1433 imm = (uint16_t)(-a->shift);
1439 constimm = tcg_const_i32(imm);
1441 /* Load all inputs first to avoid potential overwrite */
1442 rm1 = neon_load_reg(a->vm, 0);
1443 rm2 = neon_load_reg(a->vm, 1);
1444 rm3 = neon_load_reg(a->vm + 1, 0);
1445 rm4 = neon_load_reg(a->vm + 1, 1);
1446 rtmp = tcg_temp_new_i64();
1448 shiftfn(rm1, rm1, constimm);
1449 shiftfn(rm2, rm2, constimm);
1451 tcg_gen_concat_i32_i64(rtmp, rm1, rm2);
1452 tcg_temp_free_i32(rm2);
1454 narrowfn(rm1, cpu_env, rtmp);
1455 neon_store_reg(a->vd, 0, rm1);
1457 shiftfn(rm3, rm3, constimm);
1458 shiftfn(rm4, rm4, constimm);
1459 tcg_temp_free_i32(constimm);
1461 tcg_gen_concat_i32_i64(rtmp, rm3, rm4);
1462 tcg_temp_free_i32(rm4);
1464 narrowfn(rm3, cpu_env, rtmp);
1465 tcg_temp_free_i64(rtmp);
1466 neon_store_reg(a->vd, 1, rm3);
1470 #define DO_2SN_64(INSN, FUNC, NARROWFUNC) \
1471 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1473 return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC); \
1475 #define DO_2SN_32(INSN, FUNC, NARROWFUNC) \
1476 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1478 return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC); \
1481 static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1483 tcg_gen_extrl_i64_i32(dest, src);
1486 static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1488 gen_helper_neon_narrow_u16(dest, src);
1491 static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1493 gen_helper_neon_narrow_u8(dest, src);
1496 DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32)
1497 DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16)
1498 DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8)
1500 DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32)
1501 DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16)
1502 DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8)
1504 DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32)
1505 DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16)
1506 DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8)
1508 DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32)
1509 DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16)
1510 DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8)
1511 DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32)
1512 DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16)
1513 DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8)
1515 DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32)
1516 DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16)
1517 DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8)
1519 DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32)
1520 DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16)
1521 DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8)
1523 DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32)
1524 DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16)
1525 DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8)
1527 static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
1528 NeonGenWidenFn *widenfn, bool u)
1532 uint64_t widen_mask = 0;
1534 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1538 /* UNDEF accesses to D16-D31 if they don't exist. */
1539 if (!dc_isar_feature(aa32_simd_r32, s) &&
1540 ((a->vd | a->vm) & 0x10)) {
1548 if (!vfp_access_check(s)) {
1553 * This is a widen-and-shift operation. The shift is always less
1554 * than the width of the source type, so after widening the input
1555 * vector we can simply shift the whole 64-bit widened register,
1556 * and then clear the potential overflow bits resulting from left
1557 * bits of the narrow input appearing as right bits of the left
1558 * neighbour narrow input. Calculate a mask of bits to clear.
1560 if ((a->shift != 0) && (a->size < 2 || u)) {
1561 int esize = 8 << a->size;
1562 widen_mask = MAKE_64BIT_MASK(0, esize);
1563 widen_mask >>= esize - a->shift;
1564 widen_mask = dup_const(a->size + 1, widen_mask);
1567 rm0 = neon_load_reg(a->vm, 0);
1568 rm1 = neon_load_reg(a->vm, 1);
1569 tmp = tcg_temp_new_i64();
1572 tcg_temp_free_i32(rm0);
1573 if (a->shift != 0) {
1574 tcg_gen_shli_i64(tmp, tmp, a->shift);
1575 tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
1577 neon_store_reg64(tmp, a->vd);
1580 tcg_temp_free_i32(rm1);
1581 if (a->shift != 0) {
1582 tcg_gen_shli_i64(tmp, tmp, a->shift);
1583 tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
1585 neon_store_reg64(tmp, a->vd + 1);
1586 tcg_temp_free_i64(tmp);
1590 static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
1592 static NeonGenWidenFn * const widenfn[] = {
1593 gen_helper_neon_widen_s8,
1594 gen_helper_neon_widen_s16,
1595 tcg_gen_ext_i32_i64,
1597 return do_vshll_2sh(s, a, widenfn[a->size], false);
1600 static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
1602 static NeonGenWidenFn * const widenfn[] = {
1603 gen_helper_neon_widen_u8,
1604 gen_helper_neon_widen_u16,
1605 tcg_gen_extu_i32_i64,
1607 return do_vshll_2sh(s, a, widenfn[a->size], true);
1610 static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
1611 gen_helper_gvec_2_ptr *fn)
1613 /* FP operations in 2-reg-and-shift group */
1614 int vec_size = a->q ? 16 : 8;
1615 int rd_ofs = neon_reg_offset(a->vd, 0);
1616 int rm_ofs = neon_reg_offset(a->vm, 0);
1619 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1624 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1629 /* UNDEF accesses to D16-D31 if they don't exist. */
1630 if (!dc_isar_feature(aa32_simd_r32, s) &&
1631 ((a->vd | a->vm) & 0x10)) {
1635 if ((a->vm | a->vd) & a->q) {
1639 if (!vfp_access_check(s)) {
1643 fpst = fpstatus_ptr(a->size ? FPST_STD_F16 : FPST_STD);
1644 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, vec_size, vec_size, a->shift, fn);
1645 tcg_temp_free_ptr(fpst);
1649 #define DO_FP_2SH(INSN, FUNC) \
1650 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1652 return do_fp_2sh(s, a, FUNC); \
1655 DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf)
1656 DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf)
1657 DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs)
1658 DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu)
1660 DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh)
1661 DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh)
1662 DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs)
1663 DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu)
1665 static uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
1668 * Expand the encoded constant.
1669 * Note that cmode = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
1670 * We choose to not special-case this and will behave as if a
1671 * valid constant encoding of 0 had been given.
1672 * cmode = 15 op = 1 must UNDEF; we assume decode has handled that.
1691 imm = (imm << 8) | (imm << 24);
1694 imm = (imm << 8) | 0xff;
1697 imm = (imm << 16) | 0xffff;
1702 * This is the only case where the top and bottom 32 bits
1703 * of the encoded constant differ.
1708 for (n = 0; n < 8; n++) {
1709 if (imm & (1 << n)) {
1710 imm64 |= (0xffULL << (n * 8));
1715 imm |= (imm << 8) | (imm << 16) | (imm << 24);
1718 imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
1719 | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
1725 return dup_const(MO_32, imm);
1728 static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
1732 int reg_ofs, vec_size;
1734 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1738 /* UNDEF accesses to D16-D31 if they don't exist. */
1739 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
1747 if (!vfp_access_check(s)) {
1751 reg_ofs = neon_reg_offset(a->vd, 0);
1752 vec_size = a->q ? 16 : 8;
1753 imm = asimd_imm_const(a->imm, a->cmode, a->op);
1755 fn(MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size);
1759 static void gen_VMOV_1r(unsigned vece, uint32_t dofs, uint32_t aofs,
1760 int64_t c, uint32_t oprsz, uint32_t maxsz)
1762 tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
1765 static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
1767 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
1770 if ((a->cmode & 1) && a->cmode < 12) {
1771 /* for op=1, the imm will be inverted, so BIC becomes AND. */
1772 fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
1774 /* There is one unallocated cmode/op combination in this space */
1775 if (a->cmode == 15 && a->op == 1) {
1780 return do_1reg_imm(s, a, fn);
1783 static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
1784 NeonGenWidenFn *widenfn,
1785 NeonGenTwo64OpFn *opfn,
1788 /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
1789 TCGv_i64 rn0_64, rn1_64, rm_64;
1792 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1796 /* UNDEF accesses to D16-D31 if they don't exist. */
1797 if (!dc_isar_feature(aa32_simd_r32, s) &&
1798 ((a->vd | a->vn | a->vm) & 0x10)) {
1802 if (!widenfn || !opfn) {
1803 /* size == 3 case, which is an entirely different insn group */
1807 if ((a->vd & 1) || (src1_wide && (a->vn & 1))) {
1811 if (!vfp_access_check(s)) {
1815 rn0_64 = tcg_temp_new_i64();
1816 rn1_64 = tcg_temp_new_i64();
1817 rm_64 = tcg_temp_new_i64();
1820 neon_load_reg64(rn0_64, a->vn);
1822 TCGv_i32 tmp = neon_load_reg(a->vn, 0);
1823 widenfn(rn0_64, tmp);
1824 tcg_temp_free_i32(tmp);
1826 rm = neon_load_reg(a->vm, 0);
1829 tcg_temp_free_i32(rm);
1830 opfn(rn0_64, rn0_64, rm_64);
1833 * Load second pass inputs before storing the first pass result, to
1834 * avoid incorrect results if a narrow input overlaps with the result.
1837 neon_load_reg64(rn1_64, a->vn + 1);
1839 TCGv_i32 tmp = neon_load_reg(a->vn, 1);
1840 widenfn(rn1_64, tmp);
1841 tcg_temp_free_i32(tmp);
1843 rm = neon_load_reg(a->vm, 1);
1845 neon_store_reg64(rn0_64, a->vd);
1848 tcg_temp_free_i32(rm);
1849 opfn(rn1_64, rn1_64, rm_64);
1850 neon_store_reg64(rn1_64, a->vd + 1);
1852 tcg_temp_free_i64(rn0_64);
1853 tcg_temp_free_i64(rn1_64);
1854 tcg_temp_free_i64(rm_64);
1859 #define DO_PREWIDEN(INSN, S, EXT, OP, SRC1WIDE) \
1860 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
1862 static NeonGenWidenFn * const widenfn[] = { \
1863 gen_helper_neon_widen_##S##8, \
1864 gen_helper_neon_widen_##S##16, \
1865 tcg_gen_##EXT##_i32_i64, \
1868 static NeonGenTwo64OpFn * const addfn[] = { \
1869 gen_helper_neon_##OP##l_u16, \
1870 gen_helper_neon_##OP##l_u32, \
1871 tcg_gen_##OP##_i64, \
1874 return do_prewiden_3d(s, a, widenfn[a->size], \
1875 addfn[a->size], SRC1WIDE); \
1878 DO_PREWIDEN(VADDL_S, s, ext, add, false)
1879 DO_PREWIDEN(VADDL_U, u, extu, add, false)
1880 DO_PREWIDEN(VSUBL_S, s, ext, sub, false)
1881 DO_PREWIDEN(VSUBL_U, u, extu, sub, false)
1882 DO_PREWIDEN(VADDW_S, s, ext, add, true)
1883 DO_PREWIDEN(VADDW_U, u, extu, add, true)
1884 DO_PREWIDEN(VSUBW_S, s, ext, sub, true)
1885 DO_PREWIDEN(VSUBW_U, u, extu, sub, true)
1887 static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
1888 NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
1890 /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */
1891 TCGv_i64 rn_64, rm_64;
1894 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1898 /* UNDEF accesses to D16-D31 if they don't exist. */
1899 if (!dc_isar_feature(aa32_simd_r32, s) &&
1900 ((a->vd | a->vn | a->vm) & 0x10)) {
1904 if (!opfn || !narrowfn) {
1905 /* size == 3 case, which is an entirely different insn group */
1909 if ((a->vn | a->vm) & 1) {
1913 if (!vfp_access_check(s)) {
1917 rn_64 = tcg_temp_new_i64();
1918 rm_64 = tcg_temp_new_i64();
1919 rd0 = tcg_temp_new_i32();
1920 rd1 = tcg_temp_new_i32();
1922 neon_load_reg64(rn_64, a->vn);
1923 neon_load_reg64(rm_64, a->vm);
1925 opfn(rn_64, rn_64, rm_64);
1927 narrowfn(rd0, rn_64);
1929 neon_load_reg64(rn_64, a->vn + 1);
1930 neon_load_reg64(rm_64, a->vm + 1);
1932 opfn(rn_64, rn_64, rm_64);
1934 narrowfn(rd1, rn_64);
1936 neon_store_reg(a->vd, 0, rd0);
1937 neon_store_reg(a->vd, 1, rd1);
1939 tcg_temp_free_i64(rn_64);
1940 tcg_temp_free_i64(rm_64);
1945 #define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP) \
1946 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
1948 static NeonGenTwo64OpFn * const addfn[] = { \
1949 gen_helper_neon_##OP##l_u16, \
1950 gen_helper_neon_##OP##l_u32, \
1951 tcg_gen_##OP##_i64, \
1954 static NeonGenNarrowFn * const narrowfn[] = { \
1955 gen_helper_neon_##NARROWTYPE##_high_u8, \
1956 gen_helper_neon_##NARROWTYPE##_high_u16, \
1960 return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]); \
1963 static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn)
1965 tcg_gen_addi_i64(rn, rn, 1u << 31);
1966 tcg_gen_extrh_i64_i32(rd, rn);
1969 DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
1970 DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
1971 DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
1972 DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
1974 static bool do_long_3d(DisasContext *s, arg_3diff *a,
1975 NeonGenTwoOpWidenFn *opfn,
1976 NeonGenTwo64OpFn *accfn)
1979 * 3-regs different lengths, long operations.
1980 * These perform an operation on two inputs that returns a double-width
1981 * result, and then possibly perform an accumulation operation of
1982 * that result into the double-width destination.
1984 TCGv_i64 rd0, rd1, tmp;
1987 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1991 /* UNDEF accesses to D16-D31 if they don't exist. */
1992 if (!dc_isar_feature(aa32_simd_r32, s) &&
1993 ((a->vd | a->vn | a->vm) & 0x10)) {
1998 /* size == 3 case, which is an entirely different insn group */
2006 if (!vfp_access_check(s)) {
2010 rd0 = tcg_temp_new_i64();
2011 rd1 = tcg_temp_new_i64();
2013 rn = neon_load_reg(a->vn, 0);
2014 rm = neon_load_reg(a->vm, 0);
2016 tcg_temp_free_i32(rn);
2017 tcg_temp_free_i32(rm);
2019 rn = neon_load_reg(a->vn, 1);
2020 rm = neon_load_reg(a->vm, 1);
2022 tcg_temp_free_i32(rn);
2023 tcg_temp_free_i32(rm);
2025 /* Don't store results until after all loads: they might overlap */
2027 tmp = tcg_temp_new_i64();
2028 neon_load_reg64(tmp, a->vd);
2029 accfn(tmp, tmp, rd0);
2030 neon_store_reg64(tmp, a->vd);
2031 neon_load_reg64(tmp, a->vd + 1);
2032 accfn(tmp, tmp, rd1);
2033 neon_store_reg64(tmp, a->vd + 1);
2034 tcg_temp_free_i64(tmp);
2036 neon_store_reg64(rd0, a->vd);
2037 neon_store_reg64(rd1, a->vd + 1);
2040 tcg_temp_free_i64(rd0);
2041 tcg_temp_free_i64(rd1);
2046 static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a)
2048 static NeonGenTwoOpWidenFn * const opfn[] = {
2049 gen_helper_neon_abdl_s16,
2050 gen_helper_neon_abdl_s32,
2051 gen_helper_neon_abdl_s64,
2055 return do_long_3d(s, a, opfn[a->size], NULL);
2058 static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a)
2060 static NeonGenTwoOpWidenFn * const opfn[] = {
2061 gen_helper_neon_abdl_u16,
2062 gen_helper_neon_abdl_u32,
2063 gen_helper_neon_abdl_u64,
2067 return do_long_3d(s, a, opfn[a->size], NULL);
2070 static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a)
2072 static NeonGenTwoOpWidenFn * const opfn[] = {
2073 gen_helper_neon_abdl_s16,
2074 gen_helper_neon_abdl_s32,
2075 gen_helper_neon_abdl_s64,
2078 static NeonGenTwo64OpFn * const addfn[] = {
2079 gen_helper_neon_addl_u16,
2080 gen_helper_neon_addl_u32,
2085 return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
2088 static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
2090 static NeonGenTwoOpWidenFn * const opfn[] = {
2091 gen_helper_neon_abdl_u16,
2092 gen_helper_neon_abdl_u32,
2093 gen_helper_neon_abdl_u64,
2096 static NeonGenTwo64OpFn * const addfn[] = {
2097 gen_helper_neon_addl_u16,
2098 gen_helper_neon_addl_u32,
2103 return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
2106 static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2108 TCGv_i32 lo = tcg_temp_new_i32();
2109 TCGv_i32 hi = tcg_temp_new_i32();
2111 tcg_gen_muls2_i32(lo, hi, rn, rm);
2112 tcg_gen_concat_i32_i64(rd, lo, hi);
2114 tcg_temp_free_i32(lo);
2115 tcg_temp_free_i32(hi);
2118 static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2120 TCGv_i32 lo = tcg_temp_new_i32();
2121 TCGv_i32 hi = tcg_temp_new_i32();
2123 tcg_gen_mulu2_i32(lo, hi, rn, rm);
2124 tcg_gen_concat_i32_i64(rd, lo, hi);
2126 tcg_temp_free_i32(lo);
2127 tcg_temp_free_i32(hi);
2130 static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a)
2132 static NeonGenTwoOpWidenFn * const opfn[] = {
2133 gen_helper_neon_mull_s8,
2134 gen_helper_neon_mull_s16,
2139 return do_long_3d(s, a, opfn[a->size], NULL);
2142 static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a)
2144 static NeonGenTwoOpWidenFn * const opfn[] = {
2145 gen_helper_neon_mull_u8,
2146 gen_helper_neon_mull_u16,
2151 return do_long_3d(s, a, opfn[a->size], NULL);
2154 #define DO_VMLAL(INSN,MULL,ACC) \
2155 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
2157 static NeonGenTwoOpWidenFn * const opfn[] = { \
2158 gen_helper_neon_##MULL##8, \
2159 gen_helper_neon_##MULL##16, \
2163 static NeonGenTwo64OpFn * const accfn[] = { \
2164 gen_helper_neon_##ACC##l_u16, \
2165 gen_helper_neon_##ACC##l_u32, \
2166 tcg_gen_##ACC##_i64, \
2169 return do_long_3d(s, a, opfn[a->size], accfn[a->size]); \
2172 DO_VMLAL(VMLAL_S,mull_s,add)
2173 DO_VMLAL(VMLAL_U,mull_u,add)
2174 DO_VMLAL(VMLSL_S,mull_s,sub)
2175 DO_VMLAL(VMLSL_U,mull_u,sub)
2177 static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2179 gen_helper_neon_mull_s16(rd, rn, rm);
2180 gen_helper_neon_addl_saturate_s32(rd, cpu_env, rd, rd);
2183 static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2185 gen_mull_s32(rd, rn, rm);
2186 gen_helper_neon_addl_saturate_s64(rd, cpu_env, rd, rd);
2189 static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a)
2191 static NeonGenTwoOpWidenFn * const opfn[] = {
2198 return do_long_3d(s, a, opfn[a->size], NULL);
2201 static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2203 gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
2206 static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2208 gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
2211 static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a)
2213 static NeonGenTwoOpWidenFn * const opfn[] = {
2219 static NeonGenTwo64OpFn * const accfn[] = {
2226 return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
2229 static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2231 gen_helper_neon_negl_u32(rm, rm);
2232 gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
2235 static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2237 tcg_gen_neg_i64(rm, rm);
2238 gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
2241 static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a)
2243 static NeonGenTwoOpWidenFn * const opfn[] = {
2249 static NeonGenTwo64OpFn * const accfn[] = {
2256 return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
2259 static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
2261 gen_helper_gvec_3 *fn_gvec;
2263 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2267 /* UNDEF accesses to D16-D31 if they don't exist. */
2268 if (!dc_isar_feature(aa32_simd_r32, s) &&
2269 ((a->vd | a->vn | a->vm) & 0x10)) {
2279 fn_gvec = gen_helper_neon_pmull_h;
2282 if (!dc_isar_feature(aa32_pmull, s)) {
2285 fn_gvec = gen_helper_gvec_pmull_q;
2291 if (!vfp_access_check(s)) {
2295 tcg_gen_gvec_3_ool(neon_reg_offset(a->vd, 0),
2296 neon_reg_offset(a->vn, 0),
2297 neon_reg_offset(a->vm, 0),
2298 16, 16, 0, fn_gvec);
2302 static void gen_neon_dup_low16(TCGv_i32 var)
2304 TCGv_i32 tmp = tcg_temp_new_i32();
2305 tcg_gen_ext16u_i32(var, var);
2306 tcg_gen_shli_i32(tmp, var, 16);
2307 tcg_gen_or_i32(var, var, tmp);
2308 tcg_temp_free_i32(tmp);
2311 static void gen_neon_dup_high16(TCGv_i32 var)
2313 TCGv_i32 tmp = tcg_temp_new_i32();
2314 tcg_gen_andi_i32(var, var, 0xffff0000);
2315 tcg_gen_shri_i32(tmp, var, 16);
2316 tcg_gen_or_i32(var, var, tmp);
2317 tcg_temp_free_i32(tmp);
2320 static inline TCGv_i32 neon_get_scalar(int size, int reg)
2324 tmp = neon_load_reg(reg & 7, reg >> 4);
2326 gen_neon_dup_high16(tmp);
2328 gen_neon_dup_low16(tmp);
2331 tmp = neon_load_reg(reg & 15, reg >> 4);
2336 static bool do_2scalar(DisasContext *s, arg_2scalar *a,
2337 NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn)
2340 * Two registers and a scalar: perform an operation between
2341 * the input elements and the scalar, and then possibly
2342 * perform an accumulation operation of that result into the
2348 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2352 /* UNDEF accesses to D16-D31 if they don't exist. */
2353 if (!dc_isar_feature(aa32_simd_r32, s) &&
2354 ((a->vd | a->vn | a->vm) & 0x10)) {
2359 /* Bad size (including size == 3, which is a different insn group) */
2363 if (a->q && ((a->vd | a->vn) & 1)) {
2367 if (!vfp_access_check(s)) {
2371 scalar = neon_get_scalar(a->size, a->vm);
2373 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
2374 TCGv_i32 tmp = neon_load_reg(a->vn, pass);
2375 opfn(tmp, tmp, scalar);
2377 TCGv_i32 rd = neon_load_reg(a->vd, pass);
2378 accfn(tmp, rd, tmp);
2379 tcg_temp_free_i32(rd);
2381 neon_store_reg(a->vd, pass, tmp);
2383 tcg_temp_free_i32(scalar);
2387 static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a)
2389 static NeonGenTwoOpFn * const opfn[] = {
2391 gen_helper_neon_mul_u16,
2396 return do_2scalar(s, a, opfn[a->size], NULL);
2399 static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a)
2401 static NeonGenTwoOpFn * const opfn[] = {
2403 gen_helper_neon_mul_u16,
2407 static NeonGenTwoOpFn * const accfn[] = {
2409 gen_helper_neon_add_u16,
2414 return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
2417 static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
2419 static NeonGenTwoOpFn * const opfn[] = {
2421 gen_helper_neon_mul_u16,
2425 static NeonGenTwoOpFn * const accfn[] = {
2427 gen_helper_neon_sub_u16,
2432 return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
2435 static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a,
2436 gen_helper_gvec_3_ptr *fn)
2438 /* Two registers and a scalar, using gvec */
2439 int vec_size = a->q ? 16 : 8;
2440 int rd_ofs = neon_reg_offset(a->vd, 0);
2441 int rn_ofs = neon_reg_offset(a->vn, 0);
2446 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2450 /* UNDEF accesses to D16-D31 if they don't exist. */
2451 if (!dc_isar_feature(aa32_simd_r32, s) &&
2452 ((a->vd | a->vn | a->vm) & 0x10)) {
2457 /* Bad size (including size == 3, which is a different insn group) */
2461 if (a->q && ((a->vd | a->vn) & 1)) {
2465 if (!vfp_access_check(s)) {
2469 /* a->vm is M:Vm, which encodes both register and index */
2470 idx = extract32(a->vm, a->size + 2, 2);
2471 a->vm = extract32(a->vm, 0, a->size + 2);
2472 rm_ofs = neon_reg_offset(a->vm, 0);
2474 fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD);
2475 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus,
2476 vec_size, vec_size, idx, fn);
2477 tcg_temp_free_ptr(fpstatus);
2481 #define DO_VMUL_F_2sc(NAME, FUNC) \
2482 static bool trans_##NAME##_F_2sc(DisasContext *s, arg_2scalar *a) \
2484 static gen_helper_gvec_3_ptr * const opfn[] = { \
2486 gen_helper_##FUNC##_h, \
2487 gen_helper_##FUNC##_s, \
2490 if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) { \
2493 return do_2scalar_fp_vec(s, a, opfn[a->size]); \
2496 DO_VMUL_F_2sc(VMUL, gvec_fmul_idx)
2497 DO_VMUL_F_2sc(VMLA, gvec_fmla_nf_idx)
2498 DO_VMUL_F_2sc(VMLS, gvec_fmls_nf_idx)
2500 WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16)
2501 WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32)
2502 WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16)
2503 WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32)
2505 static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a)
2507 static NeonGenTwoOpFn * const opfn[] = {
2514 return do_2scalar(s, a, opfn[a->size], NULL);
2517 static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)
2519 static NeonGenTwoOpFn * const opfn[] = {
2526 return do_2scalar(s, a, opfn[a->size], NULL);
2529 static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
2530 NeonGenThreeOpEnvFn *opfn)
2533 * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn
2534 * performs a kind of fused op-then-accumulate using a helper
2535 * function that takes all of rd, rn and the scalar at once.
2540 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2544 if (!dc_isar_feature(aa32_rdm, s)) {
2548 /* UNDEF accesses to D16-D31 if they don't exist. */
2549 if (!dc_isar_feature(aa32_simd_r32, s) &&
2550 ((a->vd | a->vn | a->vm) & 0x10)) {
2555 /* Bad size (including size == 3, which is a different insn group) */
2559 if (a->q && ((a->vd | a->vn) & 1)) {
2563 if (!vfp_access_check(s)) {
2567 scalar = neon_get_scalar(a->size, a->vm);
2569 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
2570 TCGv_i32 rn = neon_load_reg(a->vn, pass);
2571 TCGv_i32 rd = neon_load_reg(a->vd, pass);
2572 opfn(rd, cpu_env, rn, scalar, rd);
2573 tcg_temp_free_i32(rn);
2574 neon_store_reg(a->vd, pass, rd);
2576 tcg_temp_free_i32(scalar);
2581 static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a)
2583 static NeonGenThreeOpEnvFn *opfn[] = {
2585 gen_helper_neon_qrdmlah_s16,
2586 gen_helper_neon_qrdmlah_s32,
2589 return do_vqrdmlah_2sc(s, a, opfn[a->size]);
2592 static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
2594 static NeonGenThreeOpEnvFn *opfn[] = {
2596 gen_helper_neon_qrdmlsh_s16,
2597 gen_helper_neon_qrdmlsh_s32,
2600 return do_vqrdmlah_2sc(s, a, opfn[a->size]);
2603 static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
2604 NeonGenTwoOpWidenFn *opfn,
2605 NeonGenTwo64OpFn *accfn)
2608 * Two registers and a scalar, long operations: perform an
2609 * operation on the input elements and the scalar which produces
2610 * a double-width result, and then possibly perform an accumulation
2611 * operation of that result into the destination.
2613 TCGv_i32 scalar, rn;
2614 TCGv_i64 rn0_64, rn1_64;
2616 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2620 /* UNDEF accesses to D16-D31 if they don't exist. */
2621 if (!dc_isar_feature(aa32_simd_r32, s) &&
2622 ((a->vd | a->vn | a->vm) & 0x10)) {
2627 /* Bad size (including size == 3, which is a different insn group) */
2635 if (!vfp_access_check(s)) {
2639 scalar = neon_get_scalar(a->size, a->vm);
2641 /* Load all inputs before writing any outputs, in case of overlap */
2642 rn = neon_load_reg(a->vn, 0);
2643 rn0_64 = tcg_temp_new_i64();
2644 opfn(rn0_64, rn, scalar);
2645 tcg_temp_free_i32(rn);
2647 rn = neon_load_reg(a->vn, 1);
2648 rn1_64 = tcg_temp_new_i64();
2649 opfn(rn1_64, rn, scalar);
2650 tcg_temp_free_i32(rn);
2651 tcg_temp_free_i32(scalar);
2654 TCGv_i64 t64 = tcg_temp_new_i64();
2655 neon_load_reg64(t64, a->vd);
2656 accfn(t64, t64, rn0_64);
2657 neon_store_reg64(t64, a->vd);
2658 neon_load_reg64(t64, a->vd + 1);
2659 accfn(t64, t64, rn1_64);
2660 neon_store_reg64(t64, a->vd + 1);
2661 tcg_temp_free_i64(t64);
2663 neon_store_reg64(rn0_64, a->vd);
2664 neon_store_reg64(rn1_64, a->vd + 1);
2666 tcg_temp_free_i64(rn0_64);
2667 tcg_temp_free_i64(rn1_64);
2671 static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a)
2673 static NeonGenTwoOpWidenFn * const opfn[] = {
2675 gen_helper_neon_mull_s16,
2680 return do_2scalar_long(s, a, opfn[a->size], NULL);
2683 static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a)
2685 static NeonGenTwoOpWidenFn * const opfn[] = {
2687 gen_helper_neon_mull_u16,
2692 return do_2scalar_long(s, a, opfn[a->size], NULL);
2695 #define DO_VMLAL_2SC(INSN, MULL, ACC) \
2696 static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a) \
2698 static NeonGenTwoOpWidenFn * const opfn[] = { \
2700 gen_helper_neon_##MULL##16, \
2704 static NeonGenTwo64OpFn * const accfn[] = { \
2706 gen_helper_neon_##ACC##l_u32, \
2707 tcg_gen_##ACC##_i64, \
2710 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); \
2713 DO_VMLAL_2SC(VMLAL_S, mull_s, add)
2714 DO_VMLAL_2SC(VMLAL_U, mull_u, add)
2715 DO_VMLAL_2SC(VMLSL_S, mull_s, sub)
2716 DO_VMLAL_2SC(VMLSL_U, mull_u, sub)
2718 static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a)
2720 static NeonGenTwoOpWidenFn * const opfn[] = {
2727 return do_2scalar_long(s, a, opfn[a->size], NULL);
2730 static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a)
2732 static NeonGenTwoOpWidenFn * const opfn[] = {
2738 static NeonGenTwo64OpFn * const accfn[] = {
2745 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
2748 static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
2750 static NeonGenTwoOpWidenFn * const opfn[] = {
2756 static NeonGenTwo64OpFn * const accfn[] = {
2763 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
2766 static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
2768 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2772 /* UNDEF accesses to D16-D31 if they don't exist. */
2773 if (!dc_isar_feature(aa32_simd_r32, s) &&
2774 ((a->vd | a->vn | a->vm) & 0x10)) {
2778 if ((a->vn | a->vm | a->vd) & a->q) {
2782 if (a->imm > 7 && !a->q) {
2786 if (!vfp_access_check(s)) {
2791 /* Extract 64 bits from <Vm:Vn> */
2792 TCGv_i64 left, right, dest;
2794 left = tcg_temp_new_i64();
2795 right = tcg_temp_new_i64();
2796 dest = tcg_temp_new_i64();
2798 neon_load_reg64(right, a->vn);
2799 neon_load_reg64(left, a->vm);
2800 tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
2801 neon_store_reg64(dest, a->vd);
2803 tcg_temp_free_i64(left);
2804 tcg_temp_free_i64(right);
2805 tcg_temp_free_i64(dest);
2807 /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */
2808 TCGv_i64 left, middle, right, destleft, destright;
2810 left = tcg_temp_new_i64();
2811 middle = tcg_temp_new_i64();
2812 right = tcg_temp_new_i64();
2813 destleft = tcg_temp_new_i64();
2814 destright = tcg_temp_new_i64();
2817 neon_load_reg64(right, a->vn);
2818 neon_load_reg64(middle, a->vn + 1);
2819 tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
2820 neon_load_reg64(left, a->vm);
2821 tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
2823 neon_load_reg64(right, a->vn + 1);
2824 neon_load_reg64(middle, a->vm);
2825 tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
2826 neon_load_reg64(left, a->vm + 1);
2827 tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
2830 neon_store_reg64(destright, a->vd);
2831 neon_store_reg64(destleft, a->vd + 1);
2833 tcg_temp_free_i64(destright);
2834 tcg_temp_free_i64(destleft);
2835 tcg_temp_free_i64(right);
2836 tcg_temp_free_i64(middle);
2837 tcg_temp_free_i64(left);
2842 static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
2845 TCGv_i32 tmp, tmp2, tmp3, tmp4;
2848 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2852 /* UNDEF accesses to D16-D31 if they don't exist. */
2853 if (!dc_isar_feature(aa32_simd_r32, s) &&
2854 ((a->vd | a->vn | a->vm) & 0x10)) {
2858 if (!vfp_access_check(s)) {
2863 if ((a->vn + n) > 32) {
2865 * This is UNPREDICTABLE; we choose to UNDEF to avoid the
2866 * helper function running off the end of the register file.
2872 tmp = neon_load_reg(a->vd, 0);
2874 tmp = tcg_temp_new_i32();
2875 tcg_gen_movi_i32(tmp, 0);
2877 tmp2 = neon_load_reg(a->vm, 0);
2878 ptr1 = vfp_reg_ptr(true, a->vn);
2879 tmp4 = tcg_const_i32(n);
2880 gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp4);
2881 tcg_temp_free_i32(tmp);
2883 tmp = neon_load_reg(a->vd, 1);
2885 tmp = tcg_temp_new_i32();
2886 tcg_gen_movi_i32(tmp, 0);
2888 tmp3 = neon_load_reg(a->vm, 1);
2889 gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp4);
2890 tcg_temp_free_i32(tmp4);
2891 tcg_temp_free_ptr(ptr1);
2892 neon_store_reg(a->vd, 0, tmp2);
2893 neon_store_reg(a->vd, 1, tmp3);
2894 tcg_temp_free_i32(tmp);
2898 static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
2900 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2904 /* UNDEF accesses to D16-D31 if they don't exist. */
2905 if (!dc_isar_feature(aa32_simd_r32, s) &&
2906 ((a->vd | a->vm) & 0x10)) {
2914 if (!vfp_access_check(s)) {
2918 tcg_gen_gvec_dup_mem(a->size, neon_reg_offset(a->vd, 0),
2919 neon_element_offset(a->vm, a->index, a->size),
2920 a->q ? 16 : 8, a->q ? 16 : 8);
2924 static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
2928 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2932 /* UNDEF accesses to D16-D31 if they don't exist. */
2933 if (!dc_isar_feature(aa32_simd_r32, s) &&
2934 ((a->vd | a->vm) & 0x10)) {
2938 if ((a->vd | a->vm) & a->q) {
2946 if (!vfp_access_check(s)) {
2950 for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
2953 for (half = 0; half < 2; half++) {
2954 tmp[half] = neon_load_reg(a->vm, pass * 2 + half);
2957 tcg_gen_bswap32_i32(tmp[half], tmp[half]);
2960 gen_swap_half(tmp[half], tmp[half]);
2965 g_assert_not_reached();
2968 neon_store_reg(a->vd, pass * 2, tmp[1]);
2969 neon_store_reg(a->vd, pass * 2 + 1, tmp[0]);
2974 static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
2975 NeonGenWidenFn *widenfn,
2976 NeonGenTwo64OpFn *opfn,
2977 NeonGenTwo64OpFn *accfn)
2980 * Pairwise long operations: widen both halves of the pair,
2981 * combine the pairs with the opfn, and then possibly accumulate
2982 * into the destination with the accfn.
2986 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2990 /* UNDEF accesses to D16-D31 if they don't exist. */
2991 if (!dc_isar_feature(aa32_simd_r32, s) &&
2992 ((a->vd | a->vm) & 0x10)) {
2996 if ((a->vd | a->vm) & a->q) {
3004 if (!vfp_access_check(s)) {
3008 for (pass = 0; pass < a->q + 1; pass++) {
3010 TCGv_i64 rm0_64, rm1_64, rd_64;
3012 rm0_64 = tcg_temp_new_i64();
3013 rm1_64 = tcg_temp_new_i64();
3014 rd_64 = tcg_temp_new_i64();
3015 tmp = neon_load_reg(a->vm, pass * 2);
3016 widenfn(rm0_64, tmp);
3017 tcg_temp_free_i32(tmp);
3018 tmp = neon_load_reg(a->vm, pass * 2 + 1);
3019 widenfn(rm1_64, tmp);
3020 tcg_temp_free_i32(tmp);
3021 opfn(rd_64, rm0_64, rm1_64);
3022 tcg_temp_free_i64(rm0_64);
3023 tcg_temp_free_i64(rm1_64);
3026 TCGv_i64 tmp64 = tcg_temp_new_i64();
3027 neon_load_reg64(tmp64, a->vd + pass);
3028 accfn(rd_64, tmp64, rd_64);
3029 tcg_temp_free_i64(tmp64);
3031 neon_store_reg64(rd_64, a->vd + pass);
3032 tcg_temp_free_i64(rd_64);
3037 static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a)
3039 static NeonGenWidenFn * const widenfn[] = {
3040 gen_helper_neon_widen_s8,
3041 gen_helper_neon_widen_s16,
3042 tcg_gen_ext_i32_i64,
3045 static NeonGenTwo64OpFn * const opfn[] = {
3046 gen_helper_neon_paddl_u16,
3047 gen_helper_neon_paddl_u32,
3052 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
3055 static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a)
3057 static NeonGenWidenFn * const widenfn[] = {
3058 gen_helper_neon_widen_u8,
3059 gen_helper_neon_widen_u16,
3060 tcg_gen_extu_i32_i64,
3063 static NeonGenTwo64OpFn * const opfn[] = {
3064 gen_helper_neon_paddl_u16,
3065 gen_helper_neon_paddl_u32,
3070 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
3073 static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a)
3075 static NeonGenWidenFn * const widenfn[] = {
3076 gen_helper_neon_widen_s8,
3077 gen_helper_neon_widen_s16,
3078 tcg_gen_ext_i32_i64,
3081 static NeonGenTwo64OpFn * const opfn[] = {
3082 gen_helper_neon_paddl_u16,
3083 gen_helper_neon_paddl_u32,
3087 static NeonGenTwo64OpFn * const accfn[] = {
3088 gen_helper_neon_addl_u16,
3089 gen_helper_neon_addl_u32,
3094 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
3098 static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a)
3100 static NeonGenWidenFn * const widenfn[] = {
3101 gen_helper_neon_widen_u8,
3102 gen_helper_neon_widen_u16,
3103 tcg_gen_extu_i32_i64,
3106 static NeonGenTwo64OpFn * const opfn[] = {
3107 gen_helper_neon_paddl_u16,
3108 gen_helper_neon_paddl_u32,
3112 static NeonGenTwo64OpFn * const accfn[] = {
3113 gen_helper_neon_addl_u16,
3114 gen_helper_neon_addl_u32,
3119 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
3123 typedef void ZipFn(TCGv_ptr, TCGv_ptr);
3125 static bool do_zip_uzp(DisasContext *s, arg_2misc *a,
3130 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3134 /* UNDEF accesses to D16-D31 if they don't exist. */
3135 if (!dc_isar_feature(aa32_simd_r32, s) &&
3136 ((a->vd | a->vm) & 0x10)) {
3140 if ((a->vd | a->vm) & a->q) {
3145 /* Bad size or size/q combination */
3149 if (!vfp_access_check(s)) {
3153 pd = vfp_reg_ptr(true, a->vd);
3154 pm = vfp_reg_ptr(true, a->vm);
3156 tcg_temp_free_ptr(pd);
3157 tcg_temp_free_ptr(pm);
3161 static bool trans_VUZP(DisasContext *s, arg_2misc *a)
3163 static ZipFn * const fn[2][4] = {
3165 gen_helper_neon_unzip8,
3166 gen_helper_neon_unzip16,
3170 gen_helper_neon_qunzip8,
3171 gen_helper_neon_qunzip16,
3172 gen_helper_neon_qunzip32,
3176 return do_zip_uzp(s, a, fn[a->q][a->size]);
3179 static bool trans_VZIP(DisasContext *s, arg_2misc *a)
3181 static ZipFn * const fn[2][4] = {
3183 gen_helper_neon_zip8,
3184 gen_helper_neon_zip16,
3188 gen_helper_neon_qzip8,
3189 gen_helper_neon_qzip16,
3190 gen_helper_neon_qzip32,
3194 return do_zip_uzp(s, a, fn[a->q][a->size]);
3197 static bool do_vmovn(DisasContext *s, arg_2misc *a,
3198 NeonGenNarrowEnvFn *narrowfn)
3203 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3207 /* UNDEF accesses to D16-D31 if they don't exist. */
3208 if (!dc_isar_feature(aa32_simd_r32, s) &&
3209 ((a->vd | a->vm) & 0x10)) {
3221 if (!vfp_access_check(s)) {
3225 rm = tcg_temp_new_i64();
3226 rd0 = tcg_temp_new_i32();
3227 rd1 = tcg_temp_new_i32();
3229 neon_load_reg64(rm, a->vm);
3230 narrowfn(rd0, cpu_env, rm);
3231 neon_load_reg64(rm, a->vm + 1);
3232 narrowfn(rd1, cpu_env, rm);
3233 neon_store_reg(a->vd, 0, rd0);
3234 neon_store_reg(a->vd, 1, rd1);
3235 tcg_temp_free_i64(rm);
3239 #define DO_VMOVN(INSN, FUNC) \
3240 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3242 static NeonGenNarrowEnvFn * const narrowfn[] = { \
3248 return do_vmovn(s, a, narrowfn[a->size]); \
3251 DO_VMOVN(VMOVN, gen_neon_narrow_u)
3252 DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat)
3253 DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s)
3254 DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u)
3256 static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
3260 static NeonGenWidenFn * const widenfns[] = {
3261 gen_helper_neon_widen_u8,
3262 gen_helper_neon_widen_u16,
3263 tcg_gen_extu_i32_i64,
3266 NeonGenWidenFn *widenfn = widenfns[a->size];
3268 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3272 /* UNDEF accesses to D16-D31 if they don't exist. */
3273 if (!dc_isar_feature(aa32_simd_r32, s) &&
3274 ((a->vd | a->vm) & 0x10)) {
3286 if (!vfp_access_check(s)) {
3290 rd = tcg_temp_new_i64();
3292 rm0 = neon_load_reg(a->vm, 0);
3293 rm1 = neon_load_reg(a->vm, 1);
3296 tcg_gen_shli_i64(rd, rd, 8 << a->size);
3297 neon_store_reg64(rd, a->vd);
3299 tcg_gen_shli_i64(rd, rd, 8 << a->size);
3300 neon_store_reg64(rd, a->vd + 1);
3302 tcg_temp_free_i64(rd);
3303 tcg_temp_free_i32(rm0);
3304 tcg_temp_free_i32(rm1);
3308 static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
3311 TCGv_i32 ahp, tmp, tmp2, tmp3;
3313 if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
3314 !dc_isar_feature(aa32_fp16_spconv, s)) {
3318 /* UNDEF accesses to D16-D31 if they don't exist. */
3319 if (!dc_isar_feature(aa32_simd_r32, s) &&
3320 ((a->vd | a->vm) & 0x10)) {
3324 if ((a->vm & 1) || (a->size != 1)) {
3328 if (!vfp_access_check(s)) {
3332 fpst = fpstatus_ptr(FPST_STD);
3333 ahp = get_ahp_flag();
3334 tmp = neon_load_reg(a->vm, 0);
3335 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
3336 tmp2 = neon_load_reg(a->vm, 1);
3337 gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
3338 tcg_gen_shli_i32(tmp2, tmp2, 16);
3339 tcg_gen_or_i32(tmp2, tmp2, tmp);
3340 tcg_temp_free_i32(tmp);
3341 tmp = neon_load_reg(a->vm, 2);
3342 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
3343 tmp3 = neon_load_reg(a->vm, 3);
3344 neon_store_reg(a->vd, 0, tmp2);
3345 gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
3346 tcg_gen_shli_i32(tmp3, tmp3, 16);
3347 tcg_gen_or_i32(tmp3, tmp3, tmp);
3348 neon_store_reg(a->vd, 1, tmp3);
3349 tcg_temp_free_i32(tmp);
3350 tcg_temp_free_i32(ahp);
3351 tcg_temp_free_ptr(fpst);
3356 static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
3359 TCGv_i32 ahp, tmp, tmp2, tmp3;
3361 if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
3362 !dc_isar_feature(aa32_fp16_spconv, s)) {
3366 /* UNDEF accesses to D16-D31 if they don't exist. */
3367 if (!dc_isar_feature(aa32_simd_r32, s) &&
3368 ((a->vd | a->vm) & 0x10)) {
3372 if ((a->vd & 1) || (a->size != 1)) {
3376 if (!vfp_access_check(s)) {
3380 fpst = fpstatus_ptr(FPST_STD);
3381 ahp = get_ahp_flag();
3382 tmp3 = tcg_temp_new_i32();
3383 tmp = neon_load_reg(a->vm, 0);
3384 tmp2 = neon_load_reg(a->vm, 1);
3385 tcg_gen_ext16u_i32(tmp3, tmp);
3386 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
3387 neon_store_reg(a->vd, 0, tmp3);
3388 tcg_gen_shri_i32(tmp, tmp, 16);
3389 gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
3390 neon_store_reg(a->vd, 1, tmp);
3391 tmp3 = tcg_temp_new_i32();
3392 tcg_gen_ext16u_i32(tmp3, tmp2);
3393 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
3394 neon_store_reg(a->vd, 2, tmp3);
3395 tcg_gen_shri_i32(tmp2, tmp2, 16);
3396 gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
3397 neon_store_reg(a->vd, 3, tmp2);
3398 tcg_temp_free_i32(ahp);
3399 tcg_temp_free_ptr(fpst);
3404 static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn)
3406 int vec_size = a->q ? 16 : 8;
3407 int rd_ofs = neon_reg_offset(a->vd, 0);
3408 int rm_ofs = neon_reg_offset(a->vm, 0);
3410 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3414 /* UNDEF accesses to D16-D31 if they don't exist. */
3415 if (!dc_isar_feature(aa32_simd_r32, s) &&
3416 ((a->vd | a->vm) & 0x10)) {
3424 if ((a->vd | a->vm) & a->q) {
3428 if (!vfp_access_check(s)) {
3432 fn(a->size, rd_ofs, rm_ofs, vec_size, vec_size);
3437 #define DO_2MISC_VEC(INSN, FN) \
3438 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3440 return do_2misc_vec(s, a, FN); \
3443 DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg)
3444 DO_2MISC_VEC(VABS, tcg_gen_gvec_abs)
3445 DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0)
3446 DO_2MISC_VEC(VCGT0, gen_gvec_cgt0)
3447 DO_2MISC_VEC(VCLE0, gen_gvec_cle0)
3448 DO_2MISC_VEC(VCGE0, gen_gvec_cge0)
3449 DO_2MISC_VEC(VCLT0, gen_gvec_clt0)
3451 static bool trans_VMVN(DisasContext *s, arg_2misc *a)
3456 return do_2misc_vec(s, a, tcg_gen_gvec_not);
3459 #define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA) \
3460 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
3461 uint32_t rm_ofs, uint32_t oprsz, \
3464 tcg_gen_gvec_3_ool(rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz, \
3468 #define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA) \
3469 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
3470 uint32_t rm_ofs, uint32_t oprsz, \
3473 tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC); \
3476 WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0)
3477 WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aese, 1)
3478 WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0)
3479 WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesmc, 1)
3480 WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0)
3481 WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0)
3482 WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0)
3484 #define DO_2M_CRYPTO(INSN, FEATURE, SIZE) \
3485 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3487 if (!dc_isar_feature(FEATURE, s) || a->size != SIZE) { \
3490 return do_2misc_vec(s, a, gen_##INSN); \
3493 DO_2M_CRYPTO(AESE, aa32_aes, 0)
3494 DO_2M_CRYPTO(AESD, aa32_aes, 0)
3495 DO_2M_CRYPTO(AESMC, aa32_aes, 0)
3496 DO_2M_CRYPTO(AESIMC, aa32_aes, 0)
3497 DO_2M_CRYPTO(SHA1H, aa32_sha1, 2)
3498 DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2)
3499 DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2)
3501 static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
3505 /* Handle a 2-reg-misc operation by iterating 32 bits at a time */
3506 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3510 /* UNDEF accesses to D16-D31 if they don't exist. */
3511 if (!dc_isar_feature(aa32_simd_r32, s) &&
3512 ((a->vd | a->vm) & 0x10)) {
3520 if ((a->vd | a->vm) & a->q) {
3524 if (!vfp_access_check(s)) {
3528 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
3529 TCGv_i32 tmp = neon_load_reg(a->vm, pass);
3531 neon_store_reg(a->vd, pass, tmp);
3537 static bool trans_VREV32(DisasContext *s, arg_2misc *a)
3539 static NeonGenOneOpFn * const fn[] = {
3540 tcg_gen_bswap32_i32,
3545 return do_2misc(s, a, fn[a->size]);
3548 static bool trans_VREV16(DisasContext *s, arg_2misc *a)
3553 return do_2misc(s, a, gen_rev16);
3556 static bool trans_VCLS(DisasContext *s, arg_2misc *a)
3558 static NeonGenOneOpFn * const fn[] = {
3559 gen_helper_neon_cls_s8,
3560 gen_helper_neon_cls_s16,
3561 gen_helper_neon_cls_s32,
3564 return do_2misc(s, a, fn[a->size]);
3567 static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm)
3569 tcg_gen_clzi_i32(rd, rm, 32);
3572 static bool trans_VCLZ(DisasContext *s, arg_2misc *a)
3574 static NeonGenOneOpFn * const fn[] = {
3575 gen_helper_neon_clz_u8,
3576 gen_helper_neon_clz_u16,
3580 return do_2misc(s, a, fn[a->size]);
3583 static bool trans_VCNT(DisasContext *s, arg_2misc *a)
3588 return do_2misc(s, a, gen_helper_neon_cnt_u8);
3591 static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3592 uint32_t oprsz, uint32_t maxsz)
3594 tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs,
3595 vece == MO_16 ? 0x7fff : 0x7fffffff,
3599 static bool trans_VABS_F(DisasContext *s, arg_2misc *a)
3601 if (a->size == MO_16) {
3602 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3605 } else if (a->size != MO_32) {
3608 return do_2misc_vec(s, a, gen_VABS_F);
3611 static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3612 uint32_t oprsz, uint32_t maxsz)
3614 tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs,
3615 vece == MO_16 ? 0x8000 : 0x80000000,
3619 static bool trans_VNEG_F(DisasContext *s, arg_2misc *a)
3621 if (a->size == MO_16) {
3622 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3625 } else if (a->size != MO_32) {
3628 return do_2misc_vec(s, a, gen_VNEG_F);
3631 static bool trans_VRECPE(DisasContext *s, arg_2misc *a)
3636 return do_2misc(s, a, gen_helper_recpe_u32);
3639 static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a)
3644 return do_2misc(s, a, gen_helper_rsqrte_u32);
3647 #define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \
3648 static void WRAPNAME(TCGv_i32 d, TCGv_i32 m) \
3650 FUNC(d, cpu_env, m); \
3653 WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8)
3654 WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16)
3655 WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32)
3656 WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8)
3657 WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16)
3658 WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32)
3660 static bool trans_VQABS(DisasContext *s, arg_2misc *a)
3662 static NeonGenOneOpFn * const fn[] = {
3668 return do_2misc(s, a, fn[a->size]);
3671 static bool trans_VQNEG(DisasContext *s, arg_2misc *a)
3673 static NeonGenOneOpFn * const fn[] = {
3679 return do_2misc(s, a, fn[a->size]);
3682 #define DO_2MISC_FP_VEC(INSN, HFUNC, SFUNC) \
3683 static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \
3685 uint32_t oprsz, uint32_t maxsz) \
3687 static gen_helper_gvec_2_ptr * const fns[4] = { \
3688 NULL, HFUNC, SFUNC, NULL, \
3691 fpst = fpstatus_ptr(vece == MO_16 ? FPST_STD_F16 : FPST_STD); \
3692 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, 0, \
3694 tcg_temp_free_ptr(fpst); \
3696 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3698 if (a->size == MO_16) { \
3699 if (!dc_isar_feature(aa32_fp16_arith, s)) { \
3702 } else if (a->size != MO_32) { \
3705 return do_2misc_vec(s, a, gen_##INSN); \
3708 DO_2MISC_FP_VEC(VRECPE_F, gen_helper_gvec_frecpe_h, gen_helper_gvec_frecpe_s)
3709 DO_2MISC_FP_VEC(VRSQRTE_F, gen_helper_gvec_frsqrte_h, gen_helper_gvec_frsqrte_s)
3710 DO_2MISC_FP_VEC(VCGT0_F, gen_helper_gvec_fcgt0_h, gen_helper_gvec_fcgt0_s)
3711 DO_2MISC_FP_VEC(VCGE0_F, gen_helper_gvec_fcge0_h, gen_helper_gvec_fcge0_s)
3712 DO_2MISC_FP_VEC(VCEQ0_F, gen_helper_gvec_fceq0_h, gen_helper_gvec_fceq0_s)
3713 DO_2MISC_FP_VEC(VCLT0_F, gen_helper_gvec_fclt0_h, gen_helper_gvec_fclt0_s)
3714 DO_2MISC_FP_VEC(VCLE0_F, gen_helper_gvec_fcle0_h, gen_helper_gvec_fcle0_s)
3715 DO_2MISC_FP_VEC(VCVT_FS, gen_helper_gvec_sstoh, gen_helper_gvec_sitos)
3716 DO_2MISC_FP_VEC(VCVT_FU, gen_helper_gvec_ustoh, gen_helper_gvec_uitos)
3717 DO_2MISC_FP_VEC(VCVT_SF, gen_helper_gvec_tosszh, gen_helper_gvec_tosizs)
3718 DO_2MISC_FP_VEC(VCVT_UF, gen_helper_gvec_touszh, gen_helper_gvec_touizs)
3720 DO_2MISC_FP_VEC(VRINTX_impl, gen_helper_gvec_vrintx_h, gen_helper_gvec_vrintx_s)
3722 static bool trans_VRINTX(DisasContext *s, arg_2misc *a)
3724 if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
3727 return trans_VRINTX_impl(s, a);
3730 #define DO_VEC_RMODE(INSN, RMODE, OP) \
3731 static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \
3733 uint32_t oprsz, uint32_t maxsz) \
3735 static gen_helper_gvec_2_ptr * const fns[4] = { \
3737 gen_helper_gvec_##OP##h, \
3738 gen_helper_gvec_##OP##s, \
3742 fpst = fpstatus_ptr(vece == 1 ? FPST_STD_F16 : FPST_STD); \
3743 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, \
3744 arm_rmode_to_sf(RMODE), fns[vece]); \
3745 tcg_temp_free_ptr(fpst); \
3747 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3749 if (!arm_dc_feature(s, ARM_FEATURE_V8)) { \
3752 if (a->size == MO_16) { \
3753 if (!dc_isar_feature(aa32_fp16_arith, s)) { \
3756 } else if (a->size != MO_32) { \
3759 return do_2misc_vec(s, a, gen_##INSN); \
3762 DO_VEC_RMODE(VCVTAU, FPROUNDING_TIEAWAY, vcvt_rm_u)
3763 DO_VEC_RMODE(VCVTAS, FPROUNDING_TIEAWAY, vcvt_rm_s)
3764 DO_VEC_RMODE(VCVTNU, FPROUNDING_TIEEVEN, vcvt_rm_u)
3765 DO_VEC_RMODE(VCVTNS, FPROUNDING_TIEEVEN, vcvt_rm_s)
3766 DO_VEC_RMODE(VCVTPU, FPROUNDING_POSINF, vcvt_rm_u)
3767 DO_VEC_RMODE(VCVTPS, FPROUNDING_POSINF, vcvt_rm_s)
3768 DO_VEC_RMODE(VCVTMU, FPROUNDING_NEGINF, vcvt_rm_u)
3769 DO_VEC_RMODE(VCVTMS, FPROUNDING_NEGINF, vcvt_rm_s)
3771 DO_VEC_RMODE(VRINTN, FPROUNDING_TIEEVEN, vrint_rm_)
3772 DO_VEC_RMODE(VRINTA, FPROUNDING_TIEAWAY, vrint_rm_)
3773 DO_VEC_RMODE(VRINTZ, FPROUNDING_ZERO, vrint_rm_)
3774 DO_VEC_RMODE(VRINTM, FPROUNDING_NEGINF, vrint_rm_)
3775 DO_VEC_RMODE(VRINTP, FPROUNDING_POSINF, vrint_rm_)
3777 static bool trans_VSWP(DisasContext *s, arg_2misc *a)
3782 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3786 /* UNDEF accesses to D16-D31 if they don't exist. */
3787 if (!dc_isar_feature(aa32_simd_r32, s) &&
3788 ((a->vd | a->vm) & 0x10)) {
3796 if ((a->vd | a->vm) & a->q) {
3800 if (!vfp_access_check(s)) {
3804 rm = tcg_temp_new_i64();
3805 rd = tcg_temp_new_i64();
3806 for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
3807 neon_load_reg64(rm, a->vm + pass);
3808 neon_load_reg64(rd, a->vd + pass);
3809 neon_store_reg64(rm, a->vd + pass);
3810 neon_store_reg64(rd, a->vm + pass);
3812 tcg_temp_free_i64(rm);
3813 tcg_temp_free_i64(rd);
3817 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3821 rd = tcg_temp_new_i32();
3822 tmp = tcg_temp_new_i32();
3824 tcg_gen_shli_i32(rd, t0, 8);
3825 tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3826 tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3827 tcg_gen_or_i32(rd, rd, tmp);
3829 tcg_gen_shri_i32(t1, t1, 8);
3830 tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3831 tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3832 tcg_gen_or_i32(t1, t1, tmp);
3833 tcg_gen_mov_i32(t0, rd);
3835 tcg_temp_free_i32(tmp);
3836 tcg_temp_free_i32(rd);
3839 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3843 rd = tcg_temp_new_i32();
3844 tmp = tcg_temp_new_i32();
3846 tcg_gen_shli_i32(rd, t0, 16);
3847 tcg_gen_andi_i32(tmp, t1, 0xffff);
3848 tcg_gen_or_i32(rd, rd, tmp);
3849 tcg_gen_shri_i32(t1, t1, 16);
3850 tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3851 tcg_gen_or_i32(t1, t1, tmp);
3852 tcg_gen_mov_i32(t0, rd);
3854 tcg_temp_free_i32(tmp);
3855 tcg_temp_free_i32(rd);
3858 static bool trans_VTRN(DisasContext *s, arg_2misc *a)
3863 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3867 /* UNDEF accesses to D16-D31 if they don't exist. */
3868 if (!dc_isar_feature(aa32_simd_r32, s) &&
3869 ((a->vd | a->vm) & 0x10)) {
3873 if ((a->vd | a->vm) & a->q) {
3881 if (!vfp_access_check(s)) {
3886 for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) {
3887 tmp = neon_load_reg(a->vm, pass);
3888 tmp2 = neon_load_reg(a->vd, pass + 1);
3889 neon_store_reg(a->vm, pass, tmp2);
3890 neon_store_reg(a->vd, pass + 1, tmp);
3893 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
3894 tmp = neon_load_reg(a->vm, pass);
3895 tmp2 = neon_load_reg(a->vd, pass);
3897 gen_neon_trn_u8(tmp, tmp2);
3899 gen_neon_trn_u16(tmp, tmp2);
3901 neon_store_reg(a->vm, pass, tmp2);
3902 neon_store_reg(a->vd, pass, tmp);