target/arm: Implement bfloat widening fma (indexed)
[qemu/ar7.git] / target / arm / translate-neon.c
blob633fef3bf76d46b5efcc06297b00a573c64cda4f
1 /*
2 * ARM translation: AArch32 Neon instructions
4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
7 * Copyright (c) 2020 Linaro, Ltd.
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
23 #include "qemu/osdep.h"
24 #include "tcg/tcg-op.h"
25 #include "tcg/tcg-op-gvec.h"
26 #include "exec/exec-all.h"
27 #include "exec/gen-icount.h"
28 #include "translate.h"
29 #include "translate-a32.h"
31 static inline int plus1(DisasContext *s, int x)
33 return x + 1;
36 static inline int rsub_64(DisasContext *s, int x)
38 return 64 - x;
41 static inline int rsub_32(DisasContext *s, int x)
43 return 32 - x;
45 static inline int rsub_16(DisasContext *s, int x)
47 return 16 - x;
49 static inline int rsub_8(DisasContext *s, int x)
51 return 8 - x;
54 static inline int neon_3same_fp_size(DisasContext *s, int x)
56 /* Convert 0==fp32, 1==fp16 into a MO_* value */
57 return MO_32 - x;
60 /* Include the generated Neon decoder */
61 #include "decode-neon-dp.c.inc"
62 #include "decode-neon-ls.c.inc"
63 #include "decode-neon-shared.c.inc"
65 static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
67 TCGv_ptr ret = tcg_temp_new_ptr();
68 tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
69 return ret;
72 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
74 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
76 switch (mop) {
77 case MO_UB:
78 tcg_gen_ld8u_i32(var, cpu_env, offset);
79 break;
80 case MO_UW:
81 tcg_gen_ld16u_i32(var, cpu_env, offset);
82 break;
83 case MO_UL:
84 tcg_gen_ld_i32(var, cpu_env, offset);
85 break;
86 default:
87 g_assert_not_reached();
91 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
93 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
95 switch (mop) {
96 case MO_UB:
97 tcg_gen_ld8u_i64(var, cpu_env, offset);
98 break;
99 case MO_UW:
100 tcg_gen_ld16u_i64(var, cpu_env, offset);
101 break;
102 case MO_UL:
103 tcg_gen_ld32u_i64(var, cpu_env, offset);
104 break;
105 case MO_Q:
106 tcg_gen_ld_i64(var, cpu_env, offset);
107 break;
108 default:
109 g_assert_not_reached();
113 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
115 long offset = neon_element_offset(reg, ele, size);
117 switch (size) {
118 case MO_8:
119 tcg_gen_st8_i32(var, cpu_env, offset);
120 break;
121 case MO_16:
122 tcg_gen_st16_i32(var, cpu_env, offset);
123 break;
124 case MO_32:
125 tcg_gen_st_i32(var, cpu_env, offset);
126 break;
127 default:
128 g_assert_not_reached();
132 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
134 long offset = neon_element_offset(reg, ele, size);
136 switch (size) {
137 case MO_8:
138 tcg_gen_st8_i64(var, cpu_env, offset);
139 break;
140 case MO_16:
141 tcg_gen_st16_i64(var, cpu_env, offset);
142 break;
143 case MO_32:
144 tcg_gen_st32_i64(var, cpu_env, offset);
145 break;
146 case MO_64:
147 tcg_gen_st_i64(var, cpu_env, offset);
148 break;
149 default:
150 g_assert_not_reached();
154 static bool do_neon_ddda(DisasContext *s, int q, int vd, int vn, int vm,
155 int data, gen_helper_gvec_4 *fn_gvec)
157 /* UNDEF accesses to D16-D31 if they don't exist. */
158 if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
159 return false;
163 * UNDEF accesses to odd registers for each bit of Q.
164 * Q will be 0b111 for all Q-reg instructions, otherwise
165 * when we have mixed Q- and D-reg inputs.
167 if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
168 return false;
171 if (!vfp_access_check(s)) {
172 return true;
175 int opr_sz = q ? 16 : 8;
176 tcg_gen_gvec_4_ool(vfp_reg_offset(1, vd),
177 vfp_reg_offset(1, vn),
178 vfp_reg_offset(1, vm),
179 vfp_reg_offset(1, vd),
180 opr_sz, opr_sz, data, fn_gvec);
181 return true;
184 static bool do_neon_ddda_fpst(DisasContext *s, int q, int vd, int vn, int vm,
185 int data, ARMFPStatusFlavour fp_flavour,
186 gen_helper_gvec_4_ptr *fn_gvec_ptr)
188 /* UNDEF accesses to D16-D31 if they don't exist. */
189 if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
190 return false;
194 * UNDEF accesses to odd registers for each bit of Q.
195 * Q will be 0b111 for all Q-reg instructions, otherwise
196 * when we have mixed Q- and D-reg inputs.
198 if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
199 return false;
202 if (!vfp_access_check(s)) {
203 return true;
206 int opr_sz = q ? 16 : 8;
207 TCGv_ptr fpst = fpstatus_ptr(fp_flavour);
209 tcg_gen_gvec_4_ptr(vfp_reg_offset(1, vd),
210 vfp_reg_offset(1, vn),
211 vfp_reg_offset(1, vm),
212 vfp_reg_offset(1, vd),
213 fpst, opr_sz, opr_sz, data, fn_gvec_ptr);
214 tcg_temp_free_ptr(fpst);
215 return true;
218 static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
220 if (!dc_isar_feature(aa32_vcma, s)) {
221 return false;
223 if (a->size == MO_16) {
224 if (!dc_isar_feature(aa32_fp16_arith, s)) {
225 return false;
227 return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot,
228 FPST_STD_F16, gen_helper_gvec_fcmlah);
230 return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot,
231 FPST_STD, gen_helper_gvec_fcmlas);
234 static bool trans_VCADD(DisasContext *s, arg_VCADD *a)
236 int opr_sz;
237 TCGv_ptr fpst;
238 gen_helper_gvec_3_ptr *fn_gvec_ptr;
240 if (!dc_isar_feature(aa32_vcma, s)
241 || (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s))) {
242 return false;
245 /* UNDEF accesses to D16-D31 if they don't exist. */
246 if (!dc_isar_feature(aa32_simd_r32, s) &&
247 ((a->vd | a->vn | a->vm) & 0x10)) {
248 return false;
251 if ((a->vn | a->vm | a->vd) & a->q) {
252 return false;
255 if (!vfp_access_check(s)) {
256 return true;
259 opr_sz = (1 + a->q) * 8;
260 fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
261 fn_gvec_ptr = (a->size == MO_16) ?
262 gen_helper_gvec_fcaddh : gen_helper_gvec_fcadds;
263 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
264 vfp_reg_offset(1, a->vn),
265 vfp_reg_offset(1, a->vm),
266 fpst, opr_sz, opr_sz, a->rot,
267 fn_gvec_ptr);
268 tcg_temp_free_ptr(fpst);
269 return true;
272 static bool trans_VSDOT(DisasContext *s, arg_VSDOT *a)
274 if (!dc_isar_feature(aa32_dp, s)) {
275 return false;
277 return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
278 gen_helper_gvec_sdot_b);
281 static bool trans_VUDOT(DisasContext *s, arg_VUDOT *a)
283 if (!dc_isar_feature(aa32_dp, s)) {
284 return false;
286 return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
287 gen_helper_gvec_udot_b);
290 static bool trans_VUSDOT(DisasContext *s, arg_VUSDOT *a)
292 if (!dc_isar_feature(aa32_i8mm, s)) {
293 return false;
295 return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
296 gen_helper_gvec_usdot_b);
299 static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a)
301 if (!dc_isar_feature(aa32_bf16, s)) {
302 return false;
304 return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
305 gen_helper_gvec_bfdot);
308 static bool trans_VFML(DisasContext *s, arg_VFML *a)
310 int opr_sz;
312 if (!dc_isar_feature(aa32_fhm, s)) {
313 return false;
316 /* UNDEF accesses to D16-D31 if they don't exist. */
317 if (!dc_isar_feature(aa32_simd_r32, s) &&
318 (a->vd & 0x10)) {
319 return false;
322 if (a->vd & a->q) {
323 return false;
326 if (!vfp_access_check(s)) {
327 return true;
330 opr_sz = (1 + a->q) * 8;
331 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
332 vfp_reg_offset(a->q, a->vn),
333 vfp_reg_offset(a->q, a->vm),
334 cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */
335 gen_helper_gvec_fmlal_a32);
336 return true;
339 static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a)
341 int data = (a->index << 2) | a->rot;
343 if (!dc_isar_feature(aa32_vcma, s)) {
344 return false;
346 if (a->size == MO_16) {
347 if (!dc_isar_feature(aa32_fp16_arith, s)) {
348 return false;
350 return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data,
351 FPST_STD_F16, gen_helper_gvec_fcmlah_idx);
353 return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data,
354 FPST_STD, gen_helper_gvec_fcmlas_idx);
357 static bool trans_VSDOT_scalar(DisasContext *s, arg_VSDOT_scalar *a)
359 if (!dc_isar_feature(aa32_dp, s)) {
360 return false;
362 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
363 gen_helper_gvec_sdot_idx_b);
366 static bool trans_VUDOT_scalar(DisasContext *s, arg_VUDOT_scalar *a)
368 if (!dc_isar_feature(aa32_dp, s)) {
369 return false;
371 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
372 gen_helper_gvec_udot_idx_b);
375 static bool trans_VUSDOT_scalar(DisasContext *s, arg_VUSDOT_scalar *a)
377 if (!dc_isar_feature(aa32_i8mm, s)) {
378 return false;
380 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
381 gen_helper_gvec_usdot_idx_b);
384 static bool trans_VSUDOT_scalar(DisasContext *s, arg_VSUDOT_scalar *a)
386 if (!dc_isar_feature(aa32_i8mm, s)) {
387 return false;
389 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
390 gen_helper_gvec_sudot_idx_b);
393 static bool trans_VDOT_b16_scal(DisasContext *s, arg_VDOT_b16_scal *a)
395 if (!dc_isar_feature(aa32_bf16, s)) {
396 return false;
398 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
399 gen_helper_gvec_bfdot_idx);
402 static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
404 int opr_sz;
406 if (!dc_isar_feature(aa32_fhm, s)) {
407 return false;
410 /* UNDEF accesses to D16-D31 if they don't exist. */
411 if (!dc_isar_feature(aa32_simd_r32, s) &&
412 ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) {
413 return false;
416 if (a->vd & a->q) {
417 return false;
420 if (!vfp_access_check(s)) {
421 return true;
424 opr_sz = (1 + a->q) * 8;
425 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
426 vfp_reg_offset(a->q, a->vn),
427 vfp_reg_offset(a->q, a->rm),
428 cpu_env, opr_sz, opr_sz,
429 (a->index << 2) | a->s, /* is_2 == 0 */
430 gen_helper_gvec_fmlal_idx_a32);
431 return true;
434 static struct {
435 int nregs;
436 int interleave;
437 int spacing;
438 } const neon_ls_element_type[11] = {
439 {1, 4, 1},
440 {1, 4, 2},
441 {4, 1, 1},
442 {2, 2, 2},
443 {1, 3, 1},
444 {1, 3, 2},
445 {3, 1, 1},
446 {1, 1, 1},
447 {1, 2, 1},
448 {1, 2, 2},
449 {2, 1, 1}
452 static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn,
453 int stride)
455 if (rm != 15) {
456 TCGv_i32 base;
458 base = load_reg(s, rn);
459 if (rm == 13) {
460 tcg_gen_addi_i32(base, base, stride);
461 } else {
462 TCGv_i32 index;
463 index = load_reg(s, rm);
464 tcg_gen_add_i32(base, base, index);
465 tcg_temp_free_i32(index);
467 store_reg(s, rn, base);
471 static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
473 /* Neon load/store multiple structures */
474 int nregs, interleave, spacing, reg, n;
475 MemOp mop, align, endian;
476 int mmu_idx = get_mem_index(s);
477 int size = a->size;
478 TCGv_i64 tmp64;
479 TCGv_i32 addr, tmp;
481 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
482 return false;
485 /* UNDEF accesses to D16-D31 if they don't exist */
486 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
487 return false;
489 if (a->itype > 10) {
490 return false;
492 /* Catch UNDEF cases for bad values of align field */
493 switch (a->itype & 0xc) {
494 case 4:
495 if (a->align >= 2) {
496 return false;
498 break;
499 case 8:
500 if (a->align == 3) {
501 return false;
503 break;
504 default:
505 break;
507 nregs = neon_ls_element_type[a->itype].nregs;
508 interleave = neon_ls_element_type[a->itype].interleave;
509 spacing = neon_ls_element_type[a->itype].spacing;
510 if (size == 3 && (interleave | spacing) != 1) {
511 return false;
514 if (!vfp_access_check(s)) {
515 return true;
518 /* For our purposes, bytes are always little-endian. */
519 endian = s->be_data;
520 if (size == 0) {
521 endian = MO_LE;
524 /* Enforce alignment requested by the instruction */
525 if (a->align) {
526 align = pow2_align(a->align + 2); /* 4 ** a->align */
527 } else {
528 align = s->align_mem ? MO_ALIGN : 0;
532 * Consecutive little-endian elements from a single register
533 * can be promoted to a larger little-endian operation.
535 if (interleave == 1 && endian == MO_LE) {
536 /* Retain any natural alignment. */
537 if (align == MO_ALIGN) {
538 align = pow2_align(size);
540 size = 3;
543 tmp64 = tcg_temp_new_i64();
544 addr = tcg_temp_new_i32();
545 tmp = tcg_const_i32(1 << size);
546 load_reg_var(s, addr, a->rn);
548 mop = endian | size | align;
549 for (reg = 0; reg < nregs; reg++) {
550 for (n = 0; n < 8 >> size; n++) {
551 int xs;
552 for (xs = 0; xs < interleave; xs++) {
553 int tt = a->vd + reg + spacing * xs;
555 if (a->l) {
556 gen_aa32_ld_internal_i64(s, tmp64, addr, mmu_idx, mop);
557 neon_store_element64(tt, n, size, tmp64);
558 } else {
559 neon_load_element64(tmp64, tt, n, size);
560 gen_aa32_st_internal_i64(s, tmp64, addr, mmu_idx, mop);
562 tcg_gen_add_i32(addr, addr, tmp);
564 /* Subsequent memory operations inherit alignment */
565 mop &= ~MO_AMASK;
569 tcg_temp_free_i32(addr);
570 tcg_temp_free_i32(tmp);
571 tcg_temp_free_i64(tmp64);
573 gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
574 return true;
577 static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
579 /* Neon load single structure to all lanes */
580 int reg, stride, vec_size;
581 int vd = a->vd;
582 int size = a->size;
583 int nregs = a->n + 1;
584 TCGv_i32 addr, tmp;
585 MemOp mop, align;
587 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
588 return false;
591 /* UNDEF accesses to D16-D31 if they don't exist */
592 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
593 return false;
596 align = 0;
597 if (size == 3) {
598 if (nregs != 4 || a->a == 0) {
599 return false;
601 /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */
602 size = MO_32;
603 align = MO_ALIGN_16;
604 } else if (a->a) {
605 switch (nregs) {
606 case 1:
607 if (size == 0) {
608 return false;
610 align = MO_ALIGN;
611 break;
612 case 2:
613 align = pow2_align(size + 1);
614 break;
615 case 3:
616 return false;
617 case 4:
618 align = pow2_align(size + 2);
619 break;
620 default:
621 g_assert_not_reached();
625 if (!vfp_access_check(s)) {
626 return true;
630 * VLD1 to all lanes: T bit indicates how many Dregs to write.
631 * VLD2/3/4 to all lanes: T bit indicates register stride.
633 stride = a->t ? 2 : 1;
634 vec_size = nregs == 1 ? stride * 8 : 8;
635 mop = size | align;
636 tmp = tcg_temp_new_i32();
637 addr = tcg_temp_new_i32();
638 load_reg_var(s, addr, a->rn);
639 for (reg = 0; reg < nregs; reg++) {
640 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop);
641 if ((vd & 1) && vec_size == 16) {
643 * We cannot write 16 bytes at once because the
644 * destination is unaligned.
646 tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
647 8, 8, tmp);
648 tcg_gen_gvec_mov(0, neon_full_reg_offset(vd + 1),
649 neon_full_reg_offset(vd), 8, 8);
650 } else {
651 tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
652 vec_size, vec_size, tmp);
654 tcg_gen_addi_i32(addr, addr, 1 << size);
655 vd += stride;
657 /* Subsequent memory operations inherit alignment */
658 mop &= ~MO_AMASK;
660 tcg_temp_free_i32(tmp);
661 tcg_temp_free_i32(addr);
663 gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs);
665 return true;
668 static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
670 /* Neon load/store single structure to one lane */
671 int reg;
672 int nregs = a->n + 1;
673 int vd = a->vd;
674 TCGv_i32 addr, tmp;
675 MemOp mop;
677 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
678 return false;
681 /* UNDEF accesses to D16-D31 if they don't exist */
682 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
683 return false;
686 /* Catch the UNDEF cases. This is unavoidably a bit messy. */
687 switch (nregs) {
688 case 1:
689 if (((a->align & (1 << a->size)) != 0) ||
690 (a->size == 2 && (a->align == 1 || a->align == 2))) {
691 return false;
693 break;
694 case 3:
695 if ((a->align & 1) != 0) {
696 return false;
698 /* fall through */
699 case 2:
700 if (a->size == 2 && (a->align & 2) != 0) {
701 return false;
703 break;
704 case 4:
705 if (a->size == 2 && a->align == 3) {
706 return false;
708 break;
709 default:
710 abort();
712 if ((vd + a->stride * (nregs - 1)) > 31) {
714 * Attempts to write off the end of the register file are
715 * UNPREDICTABLE; we choose to UNDEF because otherwise we would
716 * access off the end of the array that holds the register data.
718 return false;
721 if (!vfp_access_check(s)) {
722 return true;
725 /* Pick up SCTLR settings */
726 mop = finalize_memop(s, a->size);
728 if (a->align) {
729 MemOp align_op;
731 switch (nregs) {
732 case 1:
733 /* For VLD1, use natural alignment. */
734 align_op = MO_ALIGN;
735 break;
736 case 2:
737 /* For VLD2, use double alignment. */
738 align_op = pow2_align(a->size + 1);
739 break;
740 case 4:
741 if (a->size == MO_32) {
743 * For VLD4.32, align = 1 is double alignment, align = 2 is
744 * quad alignment; align = 3 is rejected above.
746 align_op = pow2_align(a->size + a->align);
747 } else {
748 /* For VLD4.8 and VLD.16, we want quad alignment. */
749 align_op = pow2_align(a->size + 2);
751 break;
752 default:
753 /* For VLD3, the alignment field is zero and rejected above. */
754 g_assert_not_reached();
757 mop = (mop & ~MO_AMASK) | align_op;
760 tmp = tcg_temp_new_i32();
761 addr = tcg_temp_new_i32();
762 load_reg_var(s, addr, a->rn);
764 for (reg = 0; reg < nregs; reg++) {
765 if (a->l) {
766 gen_aa32_ld_internal_i32(s, tmp, addr, get_mem_index(s), mop);
767 neon_store_element(vd, a->reg_idx, a->size, tmp);
768 } else { /* Store */
769 neon_load_element(tmp, vd, a->reg_idx, a->size);
770 gen_aa32_st_internal_i32(s, tmp, addr, get_mem_index(s), mop);
772 vd += a->stride;
773 tcg_gen_addi_i32(addr, addr, 1 << a->size);
775 /* Subsequent memory operations inherit alignment */
776 mop &= ~MO_AMASK;
778 tcg_temp_free_i32(addr);
779 tcg_temp_free_i32(tmp);
781 gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs);
783 return true;
786 static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn)
788 int vec_size = a->q ? 16 : 8;
789 int rd_ofs = neon_full_reg_offset(a->vd);
790 int rn_ofs = neon_full_reg_offset(a->vn);
791 int rm_ofs = neon_full_reg_offset(a->vm);
793 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
794 return false;
797 /* UNDEF accesses to D16-D31 if they don't exist. */
798 if (!dc_isar_feature(aa32_simd_r32, s) &&
799 ((a->vd | a->vn | a->vm) & 0x10)) {
800 return false;
803 if ((a->vn | a->vm | a->vd) & a->q) {
804 return false;
807 if (!vfp_access_check(s)) {
808 return true;
811 fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
812 return true;
815 #define DO_3SAME(INSN, FUNC) \
816 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
818 return do_3same(s, a, FUNC); \
821 DO_3SAME(VADD, tcg_gen_gvec_add)
822 DO_3SAME(VSUB, tcg_gen_gvec_sub)
823 DO_3SAME(VAND, tcg_gen_gvec_and)
824 DO_3SAME(VBIC, tcg_gen_gvec_andc)
825 DO_3SAME(VORR, tcg_gen_gvec_or)
826 DO_3SAME(VORN, tcg_gen_gvec_orc)
827 DO_3SAME(VEOR, tcg_gen_gvec_xor)
828 DO_3SAME(VSHL_S, gen_gvec_sshl)
829 DO_3SAME(VSHL_U, gen_gvec_ushl)
830 DO_3SAME(VQADD_S, gen_gvec_sqadd_qc)
831 DO_3SAME(VQADD_U, gen_gvec_uqadd_qc)
832 DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc)
833 DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc)
835 /* These insns are all gvec_bitsel but with the inputs in various orders. */
836 #define DO_3SAME_BITSEL(INSN, O1, O2, O3) \
837 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
838 uint32_t rn_ofs, uint32_t rm_ofs, \
839 uint32_t oprsz, uint32_t maxsz) \
841 tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz); \
843 DO_3SAME(INSN, gen_##INSN##_3s)
845 DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs)
846 DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs)
847 DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs)
849 #define DO_3SAME_NO_SZ_3(INSN, FUNC) \
850 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
852 if (a->size == 3) { \
853 return false; \
855 return do_3same(s, a, FUNC); \
858 DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax)
859 DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
860 DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
861 DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
862 DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
863 DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla)
864 DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls)
865 DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst)
866 DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd)
867 DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba)
868 DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd)
869 DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba)
871 #define DO_3SAME_CMP(INSN, COND) \
872 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
873 uint32_t rn_ofs, uint32_t rm_ofs, \
874 uint32_t oprsz, uint32_t maxsz) \
876 tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \
878 DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
880 DO_3SAME_CMP(VCGT_S, TCG_COND_GT)
881 DO_3SAME_CMP(VCGT_U, TCG_COND_GTU)
882 DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
883 DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
884 DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
886 #define WRAP_OOL_FN(WRAPNAME, FUNC) \
887 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, \
888 uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz) \
890 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \
893 WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b)
895 static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
897 if (a->size != 0) {
898 return false;
900 return do_3same(s, a, gen_VMUL_p_3s);
903 #define DO_VQRDMLAH(INSN, FUNC) \
904 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
906 if (!dc_isar_feature(aa32_rdm, s)) { \
907 return false; \
909 if (a->size != 1 && a->size != 2) { \
910 return false; \
912 return do_3same(s, a, FUNC); \
915 DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc)
916 DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc)
918 #define DO_SHA1(NAME, FUNC) \
919 WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \
920 static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \
922 if (!dc_isar_feature(aa32_sha1, s)) { \
923 return false; \
925 return do_3same(s, a, gen_##NAME##_3s); \
928 DO_SHA1(SHA1C, gen_helper_crypto_sha1c)
929 DO_SHA1(SHA1P, gen_helper_crypto_sha1p)
930 DO_SHA1(SHA1M, gen_helper_crypto_sha1m)
931 DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0)
933 #define DO_SHA2(NAME, FUNC) \
934 WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \
935 static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \
937 if (!dc_isar_feature(aa32_sha2, s)) { \
938 return false; \
940 return do_3same(s, a, gen_##NAME##_3s); \
943 DO_SHA2(SHA256H, gen_helper_crypto_sha256h)
944 DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2)
945 DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1)
947 #define DO_3SAME_64(INSN, FUNC) \
948 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
949 uint32_t rn_ofs, uint32_t rm_ofs, \
950 uint32_t oprsz, uint32_t maxsz) \
952 static const GVecGen3 op = { .fni8 = FUNC }; \
953 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op); \
955 DO_3SAME(INSN, gen_##INSN##_3s)
957 #define DO_3SAME_64_ENV(INSN, FUNC) \
958 static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) \
960 FUNC(d, cpu_env, n, m); \
962 DO_3SAME_64(INSN, gen_##INSN##_elt)
964 DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64)
965 DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64)
966 DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64)
967 DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64)
968 DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64)
969 DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
971 #define DO_3SAME_32(INSN, FUNC) \
972 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
973 uint32_t rn_ofs, uint32_t rm_ofs, \
974 uint32_t oprsz, uint32_t maxsz) \
976 static const GVecGen3 ops[4] = { \
977 { .fni4 = gen_helper_neon_##FUNC##8 }, \
978 { .fni4 = gen_helper_neon_##FUNC##16 }, \
979 { .fni4 = gen_helper_neon_##FUNC##32 }, \
980 { 0 }, \
981 }; \
982 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
984 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
986 if (a->size > 2) { \
987 return false; \
989 return do_3same(s, a, gen_##INSN##_3s); \
993 * Some helper functions need to be passed the cpu_env. In order
994 * to use those with the gvec APIs like tcg_gen_gvec_3() we need
995 * to create wrapper functions whose prototype is a NeonGenTwoOpFn()
996 * and which call a NeonGenTwoOpEnvFn().
998 #define WRAP_ENV_FN(WRAPNAME, FUNC) \
999 static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m) \
1001 FUNC(d, cpu_env, n, m); \
1004 #define DO_3SAME_32_ENV(INSN, FUNC) \
1005 WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8); \
1006 WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16); \
1007 WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32); \
1008 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
1009 uint32_t rn_ofs, uint32_t rm_ofs, \
1010 uint32_t oprsz, uint32_t maxsz) \
1012 static const GVecGen3 ops[4] = { \
1013 { .fni4 = gen_##INSN##_tramp8 }, \
1014 { .fni4 = gen_##INSN##_tramp16 }, \
1015 { .fni4 = gen_##INSN##_tramp32 }, \
1016 { 0 }, \
1017 }; \
1018 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
1020 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
1022 if (a->size > 2) { \
1023 return false; \
1025 return do_3same(s, a, gen_##INSN##_3s); \
1028 DO_3SAME_32(VHADD_S, hadd_s)
1029 DO_3SAME_32(VHADD_U, hadd_u)
1030 DO_3SAME_32(VHSUB_S, hsub_s)
1031 DO_3SAME_32(VHSUB_U, hsub_u)
1032 DO_3SAME_32(VRHADD_S, rhadd_s)
1033 DO_3SAME_32(VRHADD_U, rhadd_u)
1034 DO_3SAME_32(VRSHL_S, rshl_s)
1035 DO_3SAME_32(VRSHL_U, rshl_u)
1037 DO_3SAME_32_ENV(VQSHL_S, qshl_s)
1038 DO_3SAME_32_ENV(VQSHL_U, qshl_u)
1039 DO_3SAME_32_ENV(VQRSHL_S, qrshl_s)
1040 DO_3SAME_32_ENV(VQRSHL_U, qrshl_u)
1042 static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
1044 /* Operations handled pairwise 32 bits at a time */
1045 TCGv_i32 tmp, tmp2, tmp3;
1047 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1048 return false;
1051 /* UNDEF accesses to D16-D31 if they don't exist. */
1052 if (!dc_isar_feature(aa32_simd_r32, s) &&
1053 ((a->vd | a->vn | a->vm) & 0x10)) {
1054 return false;
1057 if (a->size == 3) {
1058 return false;
1061 if (!vfp_access_check(s)) {
1062 return true;
1065 assert(a->q == 0); /* enforced by decode patterns */
1068 * Note that we have to be careful not to clobber the source operands
1069 * in the "vm == vd" case by storing the result of the first pass too
1070 * early. Since Q is 0 there are always just two passes, so instead
1071 * of a complicated loop over each pass we just unroll.
1073 tmp = tcg_temp_new_i32();
1074 tmp2 = tcg_temp_new_i32();
1075 tmp3 = tcg_temp_new_i32();
1077 read_neon_element32(tmp, a->vn, 0, MO_32);
1078 read_neon_element32(tmp2, a->vn, 1, MO_32);
1079 fn(tmp, tmp, tmp2);
1081 read_neon_element32(tmp3, a->vm, 0, MO_32);
1082 read_neon_element32(tmp2, a->vm, 1, MO_32);
1083 fn(tmp3, tmp3, tmp2);
1085 write_neon_element32(tmp, a->vd, 0, MO_32);
1086 write_neon_element32(tmp3, a->vd, 1, MO_32);
1088 tcg_temp_free_i32(tmp);
1089 tcg_temp_free_i32(tmp2);
1090 tcg_temp_free_i32(tmp3);
1091 return true;
1094 #define DO_3SAME_PAIR(INSN, func) \
1095 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
1097 static NeonGenTwoOpFn * const fns[] = { \
1098 gen_helper_neon_##func##8, \
1099 gen_helper_neon_##func##16, \
1100 gen_helper_neon_##func##32, \
1101 }; \
1102 if (a->size > 2) { \
1103 return false; \
1105 return do_3same_pair(s, a, fns[a->size]); \
1108 /* 32-bit pairwise ops end up the same as the elementwise versions. */
1109 #define gen_helper_neon_pmax_s32 tcg_gen_smax_i32
1110 #define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
1111 #define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
1112 #define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
1113 #define gen_helper_neon_padd_u32 tcg_gen_add_i32
1115 DO_3SAME_PAIR(VPMAX_S, pmax_s)
1116 DO_3SAME_PAIR(VPMIN_S, pmin_s)
1117 DO_3SAME_PAIR(VPMAX_U, pmax_u)
1118 DO_3SAME_PAIR(VPMIN_U, pmin_u)
1119 DO_3SAME_PAIR(VPADD, padd_u)
1121 #define DO_3SAME_VQDMULH(INSN, FUNC) \
1122 WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16); \
1123 WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32); \
1124 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
1125 uint32_t rn_ofs, uint32_t rm_ofs, \
1126 uint32_t oprsz, uint32_t maxsz) \
1128 static const GVecGen3 ops[2] = { \
1129 { .fni4 = gen_##INSN##_tramp16 }, \
1130 { .fni4 = gen_##INSN##_tramp32 }, \
1131 }; \
1132 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \
1134 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
1136 if (a->size != 1 && a->size != 2) { \
1137 return false; \
1139 return do_3same(s, a, gen_##INSN##_3s); \
1142 DO_3SAME_VQDMULH(VQDMULH, qdmulh)
1143 DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
1145 #define WRAP_FP_GVEC(WRAPNAME, FPST, FUNC) \
1146 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
1147 uint32_t rn_ofs, uint32_t rm_ofs, \
1148 uint32_t oprsz, uint32_t maxsz) \
1150 TCGv_ptr fpst = fpstatus_ptr(FPST); \
1151 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst, \
1152 oprsz, maxsz, 0, FUNC); \
1153 tcg_temp_free_ptr(fpst); \
1156 #define DO_3S_FP_GVEC(INSN,SFUNC,HFUNC) \
1157 WRAP_FP_GVEC(gen_##INSN##_fp32_3s, FPST_STD, SFUNC) \
1158 WRAP_FP_GVEC(gen_##INSN##_fp16_3s, FPST_STD_F16, HFUNC) \
1159 static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
1161 if (a->size == MO_16) { \
1162 if (!dc_isar_feature(aa32_fp16_arith, s)) { \
1163 return false; \
1165 return do_3same(s, a, gen_##INSN##_fp16_3s); \
1167 return do_3same(s, a, gen_##INSN##_fp32_3s); \
1171 DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s, gen_helper_gvec_fadd_h)
1172 DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s, gen_helper_gvec_fsub_h)
1173 DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s, gen_helper_gvec_fabd_h)
1174 DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s, gen_helper_gvec_fmul_h)
1175 DO_3S_FP_GVEC(VCEQ, gen_helper_gvec_fceq_s, gen_helper_gvec_fceq_h)
1176 DO_3S_FP_GVEC(VCGE, gen_helper_gvec_fcge_s, gen_helper_gvec_fcge_h)
1177 DO_3S_FP_GVEC(VCGT, gen_helper_gvec_fcgt_s, gen_helper_gvec_fcgt_h)
1178 DO_3S_FP_GVEC(VACGE, gen_helper_gvec_facge_s, gen_helper_gvec_facge_h)
1179 DO_3S_FP_GVEC(VACGT, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_h)
1180 DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h)
1181 DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h)
1182 DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h)
1183 DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h)
1184 DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h)
1185 DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h)
1186 DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h)
1187 DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h)
1189 WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s)
1190 WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h)
1191 WRAP_FP_GVEC(gen_VMINNM_fp32_3s, FPST_STD, gen_helper_gvec_fminnum_s)
1192 WRAP_FP_GVEC(gen_VMINNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fminnum_h)
1194 static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a)
1196 if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
1197 return false;
1200 if (a->size == MO_16) {
1201 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1202 return false;
1204 return do_3same(s, a, gen_VMAXNM_fp16_3s);
1206 return do_3same(s, a, gen_VMAXNM_fp32_3s);
1209 static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
1211 if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
1212 return false;
1215 if (a->size == MO_16) {
1216 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1217 return false;
1219 return do_3same(s, a, gen_VMINNM_fp16_3s);
1221 return do_3same(s, a, gen_VMINNM_fp32_3s);
1224 static bool do_3same_fp_pair(DisasContext *s, arg_3same *a,
1225 gen_helper_gvec_3_ptr *fn)
1227 /* FP pairwise operations */
1228 TCGv_ptr fpstatus;
1230 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1231 return false;
1234 /* UNDEF accesses to D16-D31 if they don't exist. */
1235 if (!dc_isar_feature(aa32_simd_r32, s) &&
1236 ((a->vd | a->vn | a->vm) & 0x10)) {
1237 return false;
1240 if (!vfp_access_check(s)) {
1241 return true;
1244 assert(a->q == 0); /* enforced by decode patterns */
1247 fpstatus = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
1248 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
1249 vfp_reg_offset(1, a->vn),
1250 vfp_reg_offset(1, a->vm),
1251 fpstatus, 8, 8, 0, fn);
1252 tcg_temp_free_ptr(fpstatus);
1254 return true;
1258 * For all the functions using this macro, size == 1 means fp16,
1259 * which is an architecture extension we don't implement yet.
1261 #define DO_3S_FP_PAIR(INSN,FUNC) \
1262 static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
1264 if (a->size == MO_16) { \
1265 if (!dc_isar_feature(aa32_fp16_arith, s)) { \
1266 return false; \
1268 return do_3same_fp_pair(s, a, FUNC##h); \
1270 return do_3same_fp_pair(s, a, FUNC##s); \
1273 DO_3S_FP_PAIR(VPADD, gen_helper_neon_padd)
1274 DO_3S_FP_PAIR(VPMAX, gen_helper_neon_pmax)
1275 DO_3S_FP_PAIR(VPMIN, gen_helper_neon_pmin)
1277 static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
1279 /* Handle a 2-reg-shift insn which can be vectorized. */
1280 int vec_size = a->q ? 16 : 8;
1281 int rd_ofs = neon_full_reg_offset(a->vd);
1282 int rm_ofs = neon_full_reg_offset(a->vm);
1284 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1285 return false;
1288 /* UNDEF accesses to D16-D31 if they don't exist. */
1289 if (!dc_isar_feature(aa32_simd_r32, s) &&
1290 ((a->vd | a->vm) & 0x10)) {
1291 return false;
1294 if ((a->vm | a->vd) & a->q) {
1295 return false;
1298 if (!vfp_access_check(s)) {
1299 return true;
1302 fn(a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size);
1303 return true;
1306 #define DO_2SH(INSN, FUNC) \
1307 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1309 return do_vector_2sh(s, a, FUNC); \
1312 DO_2SH(VSHL, tcg_gen_gvec_shli)
1313 DO_2SH(VSLI, gen_gvec_sli)
1314 DO_2SH(VSRI, gen_gvec_sri)
1315 DO_2SH(VSRA_S, gen_gvec_ssra)
1316 DO_2SH(VSRA_U, gen_gvec_usra)
1317 DO_2SH(VRSHR_S, gen_gvec_srshr)
1318 DO_2SH(VRSHR_U, gen_gvec_urshr)
1319 DO_2SH(VRSRA_S, gen_gvec_srsra)
1320 DO_2SH(VRSRA_U, gen_gvec_ursra)
1322 static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a)
1324 /* Signed shift out of range results in all-sign-bits */
1325 a->shift = MIN(a->shift, (8 << a->size) - 1);
1326 return do_vector_2sh(s, a, tcg_gen_gvec_sari);
1329 static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
1330 int64_t shift, uint32_t oprsz, uint32_t maxsz)
1332 tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0);
1335 static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a)
1337 /* Shift out of range is architecturally valid and results in zero. */
1338 if (a->shift >= (8 << a->size)) {
1339 return do_vector_2sh(s, a, gen_zero_rd_2sh);
1340 } else {
1341 return do_vector_2sh(s, a, tcg_gen_gvec_shri);
1345 static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
1346 NeonGenTwo64OpEnvFn *fn)
1349 * 2-reg-and-shift operations, size == 3 case, where the
1350 * function needs to be passed cpu_env.
1352 TCGv_i64 constimm;
1353 int pass;
1355 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1356 return false;
1359 /* UNDEF accesses to D16-D31 if they don't exist. */
1360 if (!dc_isar_feature(aa32_simd_r32, s) &&
1361 ((a->vd | a->vm) & 0x10)) {
1362 return false;
1365 if ((a->vm | a->vd) & a->q) {
1366 return false;
1369 if (!vfp_access_check(s)) {
1370 return true;
1374 * To avoid excessive duplication of ops we implement shift
1375 * by immediate using the variable shift operations.
1377 constimm = tcg_const_i64(dup_const(a->size, a->shift));
1379 for (pass = 0; pass < a->q + 1; pass++) {
1380 TCGv_i64 tmp = tcg_temp_new_i64();
1382 read_neon_element64(tmp, a->vm, pass, MO_64);
1383 fn(tmp, cpu_env, tmp, constimm);
1384 write_neon_element64(tmp, a->vd, pass, MO_64);
1385 tcg_temp_free_i64(tmp);
1387 tcg_temp_free_i64(constimm);
1388 return true;
1391 static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
1392 NeonGenTwoOpEnvFn *fn)
1395 * 2-reg-and-shift operations, size < 3 case, where the
1396 * helper needs to be passed cpu_env.
1398 TCGv_i32 constimm, tmp;
1399 int pass;
1401 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1402 return false;
1405 /* UNDEF accesses to D16-D31 if they don't exist. */
1406 if (!dc_isar_feature(aa32_simd_r32, s) &&
1407 ((a->vd | a->vm) & 0x10)) {
1408 return false;
1411 if ((a->vm | a->vd) & a->q) {
1412 return false;
1415 if (!vfp_access_check(s)) {
1416 return true;
1420 * To avoid excessive duplication of ops we implement shift
1421 * by immediate using the variable shift operations.
1423 constimm = tcg_const_i32(dup_const(a->size, a->shift));
1424 tmp = tcg_temp_new_i32();
1426 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
1427 read_neon_element32(tmp, a->vm, pass, MO_32);
1428 fn(tmp, cpu_env, tmp, constimm);
1429 write_neon_element32(tmp, a->vd, pass, MO_32);
1431 tcg_temp_free_i32(tmp);
1432 tcg_temp_free_i32(constimm);
1433 return true;
1436 #define DO_2SHIFT_ENV(INSN, FUNC) \
1437 static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \
1439 return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64); \
1441 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1443 static NeonGenTwoOpEnvFn * const fns[] = { \
1444 gen_helper_neon_##FUNC##8, \
1445 gen_helper_neon_##FUNC##16, \
1446 gen_helper_neon_##FUNC##32, \
1447 }; \
1448 assert(a->size < ARRAY_SIZE(fns)); \
1449 return do_2shift_env_32(s, a, fns[a->size]); \
1452 DO_2SHIFT_ENV(VQSHLU, qshlu_s)
1453 DO_2SHIFT_ENV(VQSHL_U, qshl_u)
1454 DO_2SHIFT_ENV(VQSHL_S, qshl_s)
1456 static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
1457 NeonGenTwo64OpFn *shiftfn,
1458 NeonGenNarrowEnvFn *narrowfn)
1460 /* 2-reg-and-shift narrowing-shift operations, size == 3 case */
1461 TCGv_i64 constimm, rm1, rm2;
1462 TCGv_i32 rd;
1464 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1465 return false;
1468 /* UNDEF accesses to D16-D31 if they don't exist. */
1469 if (!dc_isar_feature(aa32_simd_r32, s) &&
1470 ((a->vd | a->vm) & 0x10)) {
1471 return false;
1474 if (a->vm & 1) {
1475 return false;
1478 if (!vfp_access_check(s)) {
1479 return true;
1483 * This is always a right shift, and the shiftfn is always a
1484 * left-shift helper, which thus needs the negated shift count.
1486 constimm = tcg_const_i64(-a->shift);
1487 rm1 = tcg_temp_new_i64();
1488 rm2 = tcg_temp_new_i64();
1489 rd = tcg_temp_new_i32();
1491 /* Load both inputs first to avoid potential overwrite if rm == rd */
1492 read_neon_element64(rm1, a->vm, 0, MO_64);
1493 read_neon_element64(rm2, a->vm, 1, MO_64);
1495 shiftfn(rm1, rm1, constimm);
1496 narrowfn(rd, cpu_env, rm1);
1497 write_neon_element32(rd, a->vd, 0, MO_32);
1499 shiftfn(rm2, rm2, constimm);
1500 narrowfn(rd, cpu_env, rm2);
1501 write_neon_element32(rd, a->vd, 1, MO_32);
1503 tcg_temp_free_i32(rd);
1504 tcg_temp_free_i64(rm1);
1505 tcg_temp_free_i64(rm2);
1506 tcg_temp_free_i64(constimm);
1508 return true;
1511 static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
1512 NeonGenTwoOpFn *shiftfn,
1513 NeonGenNarrowEnvFn *narrowfn)
1515 /* 2-reg-and-shift narrowing-shift operations, size < 3 case */
1516 TCGv_i32 constimm, rm1, rm2, rm3, rm4;
1517 TCGv_i64 rtmp;
1518 uint32_t imm;
1520 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1521 return false;
1524 /* UNDEF accesses to D16-D31 if they don't exist. */
1525 if (!dc_isar_feature(aa32_simd_r32, s) &&
1526 ((a->vd | a->vm) & 0x10)) {
1527 return false;
1530 if (a->vm & 1) {
1531 return false;
1534 if (!vfp_access_check(s)) {
1535 return true;
1539 * This is always a right shift, and the shiftfn is always a
1540 * left-shift helper, which thus needs the negated shift count
1541 * duplicated into each lane of the immediate value.
1543 if (a->size == 1) {
1544 imm = (uint16_t)(-a->shift);
1545 imm |= imm << 16;
1546 } else {
1547 /* size == 2 */
1548 imm = -a->shift;
1550 constimm = tcg_const_i32(imm);
1552 /* Load all inputs first to avoid potential overwrite */
1553 rm1 = tcg_temp_new_i32();
1554 rm2 = tcg_temp_new_i32();
1555 rm3 = tcg_temp_new_i32();
1556 rm4 = tcg_temp_new_i32();
1557 read_neon_element32(rm1, a->vm, 0, MO_32);
1558 read_neon_element32(rm2, a->vm, 1, MO_32);
1559 read_neon_element32(rm3, a->vm, 2, MO_32);
1560 read_neon_element32(rm4, a->vm, 3, MO_32);
1561 rtmp = tcg_temp_new_i64();
1563 shiftfn(rm1, rm1, constimm);
1564 shiftfn(rm2, rm2, constimm);
1566 tcg_gen_concat_i32_i64(rtmp, rm1, rm2);
1567 tcg_temp_free_i32(rm2);
1569 narrowfn(rm1, cpu_env, rtmp);
1570 write_neon_element32(rm1, a->vd, 0, MO_32);
1571 tcg_temp_free_i32(rm1);
1573 shiftfn(rm3, rm3, constimm);
1574 shiftfn(rm4, rm4, constimm);
1575 tcg_temp_free_i32(constimm);
1577 tcg_gen_concat_i32_i64(rtmp, rm3, rm4);
1578 tcg_temp_free_i32(rm4);
1580 narrowfn(rm3, cpu_env, rtmp);
1581 tcg_temp_free_i64(rtmp);
1582 write_neon_element32(rm3, a->vd, 1, MO_32);
1583 tcg_temp_free_i32(rm3);
1584 return true;
1587 #define DO_2SN_64(INSN, FUNC, NARROWFUNC) \
1588 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1590 return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC); \
1592 #define DO_2SN_32(INSN, FUNC, NARROWFUNC) \
1593 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1595 return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC); \
1598 static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1600 tcg_gen_extrl_i64_i32(dest, src);
1603 static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1605 gen_helper_neon_narrow_u16(dest, src);
1608 static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1610 gen_helper_neon_narrow_u8(dest, src);
1613 DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32)
1614 DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16)
1615 DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8)
1617 DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32)
1618 DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16)
1619 DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8)
1621 DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32)
1622 DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16)
1623 DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8)
1625 DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32)
1626 DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16)
1627 DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8)
1628 DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32)
1629 DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16)
1630 DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8)
1632 DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32)
1633 DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16)
1634 DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8)
1636 DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32)
1637 DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16)
1638 DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8)
1640 DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32)
1641 DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16)
1642 DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8)
1644 static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
1645 NeonGenWidenFn *widenfn, bool u)
1647 TCGv_i64 tmp;
1648 TCGv_i32 rm0, rm1;
1649 uint64_t widen_mask = 0;
1651 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1652 return false;
1655 /* UNDEF accesses to D16-D31 if they don't exist. */
1656 if (!dc_isar_feature(aa32_simd_r32, s) &&
1657 ((a->vd | a->vm) & 0x10)) {
1658 return false;
1661 if (a->vd & 1) {
1662 return false;
1665 if (!vfp_access_check(s)) {
1666 return true;
1670 * This is a widen-and-shift operation. The shift is always less
1671 * than the width of the source type, so after widening the input
1672 * vector we can simply shift the whole 64-bit widened register,
1673 * and then clear the potential overflow bits resulting from left
1674 * bits of the narrow input appearing as right bits of the left
1675 * neighbour narrow input. Calculate a mask of bits to clear.
1677 if ((a->shift != 0) && (a->size < 2 || u)) {
1678 int esize = 8 << a->size;
1679 widen_mask = MAKE_64BIT_MASK(0, esize);
1680 widen_mask >>= esize - a->shift;
1681 widen_mask = dup_const(a->size + 1, widen_mask);
1684 rm0 = tcg_temp_new_i32();
1685 rm1 = tcg_temp_new_i32();
1686 read_neon_element32(rm0, a->vm, 0, MO_32);
1687 read_neon_element32(rm1, a->vm, 1, MO_32);
1688 tmp = tcg_temp_new_i64();
1690 widenfn(tmp, rm0);
1691 tcg_temp_free_i32(rm0);
1692 if (a->shift != 0) {
1693 tcg_gen_shli_i64(tmp, tmp, a->shift);
1694 tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
1696 write_neon_element64(tmp, a->vd, 0, MO_64);
1698 widenfn(tmp, rm1);
1699 tcg_temp_free_i32(rm1);
1700 if (a->shift != 0) {
1701 tcg_gen_shli_i64(tmp, tmp, a->shift);
1702 tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
1704 write_neon_element64(tmp, a->vd, 1, MO_64);
1705 tcg_temp_free_i64(tmp);
1706 return true;
1709 static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
1711 static NeonGenWidenFn * const widenfn[] = {
1712 gen_helper_neon_widen_s8,
1713 gen_helper_neon_widen_s16,
1714 tcg_gen_ext_i32_i64,
1716 return do_vshll_2sh(s, a, widenfn[a->size], false);
1719 static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
1721 static NeonGenWidenFn * const widenfn[] = {
1722 gen_helper_neon_widen_u8,
1723 gen_helper_neon_widen_u16,
1724 tcg_gen_extu_i32_i64,
1726 return do_vshll_2sh(s, a, widenfn[a->size], true);
1729 static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
1730 gen_helper_gvec_2_ptr *fn)
1732 /* FP operations in 2-reg-and-shift group */
1733 int vec_size = a->q ? 16 : 8;
1734 int rd_ofs = neon_full_reg_offset(a->vd);
1735 int rm_ofs = neon_full_reg_offset(a->vm);
1736 TCGv_ptr fpst;
1738 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1739 return false;
1742 if (a->size == MO_16) {
1743 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1744 return false;
1748 /* UNDEF accesses to D16-D31 if they don't exist. */
1749 if (!dc_isar_feature(aa32_simd_r32, s) &&
1750 ((a->vd | a->vm) & 0x10)) {
1751 return false;
1754 if ((a->vm | a->vd) & a->q) {
1755 return false;
1758 if (!vfp_access_check(s)) {
1759 return true;
1762 fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
1763 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, vec_size, vec_size, a->shift, fn);
1764 tcg_temp_free_ptr(fpst);
1765 return true;
1768 #define DO_FP_2SH(INSN, FUNC) \
1769 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1771 return do_fp_2sh(s, a, FUNC); \
1774 DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf)
1775 DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf)
1776 DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs)
1777 DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu)
1779 DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh)
1780 DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh)
1781 DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs)
1782 DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu)
1784 static uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
1787 * Expand the encoded constant.
1788 * Note that cmode = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
1789 * We choose to not special-case this and will behave as if a
1790 * valid constant encoding of 0 had been given.
1791 * cmode = 15 op = 1 must UNDEF; we assume decode has handled that.
1793 switch (cmode) {
1794 case 0: case 1:
1795 /* no-op */
1796 break;
1797 case 2: case 3:
1798 imm <<= 8;
1799 break;
1800 case 4: case 5:
1801 imm <<= 16;
1802 break;
1803 case 6: case 7:
1804 imm <<= 24;
1805 break;
1806 case 8: case 9:
1807 imm |= imm << 16;
1808 break;
1809 case 10: case 11:
1810 imm = (imm << 8) | (imm << 24);
1811 break;
1812 case 12:
1813 imm = (imm << 8) | 0xff;
1814 break;
1815 case 13:
1816 imm = (imm << 16) | 0xffff;
1817 break;
1818 case 14:
1819 if (op) {
1821 * This is the only case where the top and bottom 32 bits
1822 * of the encoded constant differ.
1824 uint64_t imm64 = 0;
1825 int n;
1827 for (n = 0; n < 8; n++) {
1828 if (imm & (1 << n)) {
1829 imm64 |= (0xffULL << (n * 8));
1832 return imm64;
1834 imm |= (imm << 8) | (imm << 16) | (imm << 24);
1835 break;
1836 case 15:
1837 imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
1838 | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
1839 break;
1841 if (op) {
1842 imm = ~imm;
1844 return dup_const(MO_32, imm);
1847 static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
1848 GVecGen2iFn *fn)
1850 uint64_t imm;
1851 int reg_ofs, vec_size;
1853 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1854 return false;
1857 /* UNDEF accesses to D16-D31 if they don't exist. */
1858 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
1859 return false;
1862 if (a->vd & a->q) {
1863 return false;
1866 if (!vfp_access_check(s)) {
1867 return true;
1870 reg_ofs = neon_full_reg_offset(a->vd);
1871 vec_size = a->q ? 16 : 8;
1872 imm = asimd_imm_const(a->imm, a->cmode, a->op);
1874 fn(MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size);
1875 return true;
1878 static void gen_VMOV_1r(unsigned vece, uint32_t dofs, uint32_t aofs,
1879 int64_t c, uint32_t oprsz, uint32_t maxsz)
1881 tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
1884 static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
1886 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
1887 GVecGen2iFn *fn;
1889 if ((a->cmode & 1) && a->cmode < 12) {
1890 /* for op=1, the imm will be inverted, so BIC becomes AND. */
1891 fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
1892 } else {
1893 /* There is one unallocated cmode/op combination in this space */
1894 if (a->cmode == 15 && a->op == 1) {
1895 return false;
1897 fn = gen_VMOV_1r;
1899 return do_1reg_imm(s, a, fn);
1902 static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
1903 NeonGenWidenFn *widenfn,
1904 NeonGenTwo64OpFn *opfn,
1905 int src1_mop, int src2_mop)
1907 /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
1908 TCGv_i64 rn0_64, rn1_64, rm_64;
1910 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1911 return false;
1914 /* UNDEF accesses to D16-D31 if they don't exist. */
1915 if (!dc_isar_feature(aa32_simd_r32, s) &&
1916 ((a->vd | a->vn | a->vm) & 0x10)) {
1917 return false;
1920 if (!opfn) {
1921 /* size == 3 case, which is an entirely different insn group */
1922 return false;
1925 if ((a->vd & 1) || (src1_mop == MO_Q && (a->vn & 1))) {
1926 return false;
1929 if (!vfp_access_check(s)) {
1930 return true;
1933 rn0_64 = tcg_temp_new_i64();
1934 rn1_64 = tcg_temp_new_i64();
1935 rm_64 = tcg_temp_new_i64();
1937 if (src1_mop >= 0) {
1938 read_neon_element64(rn0_64, a->vn, 0, src1_mop);
1939 } else {
1940 TCGv_i32 tmp = tcg_temp_new_i32();
1941 read_neon_element32(tmp, a->vn, 0, MO_32);
1942 widenfn(rn0_64, tmp);
1943 tcg_temp_free_i32(tmp);
1945 if (src2_mop >= 0) {
1946 read_neon_element64(rm_64, a->vm, 0, src2_mop);
1947 } else {
1948 TCGv_i32 tmp = tcg_temp_new_i32();
1949 read_neon_element32(tmp, a->vm, 0, MO_32);
1950 widenfn(rm_64, tmp);
1951 tcg_temp_free_i32(tmp);
1954 opfn(rn0_64, rn0_64, rm_64);
1957 * Load second pass inputs before storing the first pass result, to
1958 * avoid incorrect results if a narrow input overlaps with the result.
1960 if (src1_mop >= 0) {
1961 read_neon_element64(rn1_64, a->vn, 1, src1_mop);
1962 } else {
1963 TCGv_i32 tmp = tcg_temp_new_i32();
1964 read_neon_element32(tmp, a->vn, 1, MO_32);
1965 widenfn(rn1_64, tmp);
1966 tcg_temp_free_i32(tmp);
1968 if (src2_mop >= 0) {
1969 read_neon_element64(rm_64, a->vm, 1, src2_mop);
1970 } else {
1971 TCGv_i32 tmp = tcg_temp_new_i32();
1972 read_neon_element32(tmp, a->vm, 1, MO_32);
1973 widenfn(rm_64, tmp);
1974 tcg_temp_free_i32(tmp);
1977 write_neon_element64(rn0_64, a->vd, 0, MO_64);
1979 opfn(rn1_64, rn1_64, rm_64);
1980 write_neon_element64(rn1_64, a->vd, 1, MO_64);
1982 tcg_temp_free_i64(rn0_64);
1983 tcg_temp_free_i64(rn1_64);
1984 tcg_temp_free_i64(rm_64);
1986 return true;
1989 #define DO_PREWIDEN(INSN, S, OP, SRC1WIDE, SIGN) \
1990 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
1992 static NeonGenWidenFn * const widenfn[] = { \
1993 gen_helper_neon_widen_##S##8, \
1994 gen_helper_neon_widen_##S##16, \
1995 NULL, NULL, \
1996 }; \
1997 static NeonGenTwo64OpFn * const addfn[] = { \
1998 gen_helper_neon_##OP##l_u16, \
1999 gen_helper_neon_##OP##l_u32, \
2000 tcg_gen_##OP##_i64, \
2001 NULL, \
2002 }; \
2003 int narrow_mop = a->size == MO_32 ? MO_32 | SIGN : -1; \
2004 return do_prewiden_3d(s, a, widenfn[a->size], addfn[a->size], \
2005 SRC1WIDE ? MO_Q : narrow_mop, \
2006 narrow_mop); \
2009 DO_PREWIDEN(VADDL_S, s, add, false, MO_SIGN)
2010 DO_PREWIDEN(VADDL_U, u, add, false, 0)
2011 DO_PREWIDEN(VSUBL_S, s, sub, false, MO_SIGN)
2012 DO_PREWIDEN(VSUBL_U, u, sub, false, 0)
2013 DO_PREWIDEN(VADDW_S, s, add, true, MO_SIGN)
2014 DO_PREWIDEN(VADDW_U, u, add, true, 0)
2015 DO_PREWIDEN(VSUBW_S, s, sub, true, MO_SIGN)
2016 DO_PREWIDEN(VSUBW_U, u, sub, true, 0)
2018 static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
2019 NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
2021 /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */
2022 TCGv_i64 rn_64, rm_64;
2023 TCGv_i32 rd0, rd1;
2025 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2026 return false;
2029 /* UNDEF accesses to D16-D31 if they don't exist. */
2030 if (!dc_isar_feature(aa32_simd_r32, s) &&
2031 ((a->vd | a->vn | a->vm) & 0x10)) {
2032 return false;
2035 if (!opfn || !narrowfn) {
2036 /* size == 3 case, which is an entirely different insn group */
2037 return false;
2040 if ((a->vn | a->vm) & 1) {
2041 return false;
2044 if (!vfp_access_check(s)) {
2045 return true;
2048 rn_64 = tcg_temp_new_i64();
2049 rm_64 = tcg_temp_new_i64();
2050 rd0 = tcg_temp_new_i32();
2051 rd1 = tcg_temp_new_i32();
2053 read_neon_element64(rn_64, a->vn, 0, MO_64);
2054 read_neon_element64(rm_64, a->vm, 0, MO_64);
2056 opfn(rn_64, rn_64, rm_64);
2058 narrowfn(rd0, rn_64);
2060 read_neon_element64(rn_64, a->vn, 1, MO_64);
2061 read_neon_element64(rm_64, a->vm, 1, MO_64);
2063 opfn(rn_64, rn_64, rm_64);
2065 narrowfn(rd1, rn_64);
2067 write_neon_element32(rd0, a->vd, 0, MO_32);
2068 write_neon_element32(rd1, a->vd, 1, MO_32);
2070 tcg_temp_free_i32(rd0);
2071 tcg_temp_free_i32(rd1);
2072 tcg_temp_free_i64(rn_64);
2073 tcg_temp_free_i64(rm_64);
2075 return true;
2078 #define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP) \
2079 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
2081 static NeonGenTwo64OpFn * const addfn[] = { \
2082 gen_helper_neon_##OP##l_u16, \
2083 gen_helper_neon_##OP##l_u32, \
2084 tcg_gen_##OP##_i64, \
2085 NULL, \
2086 }; \
2087 static NeonGenNarrowFn * const narrowfn[] = { \
2088 gen_helper_neon_##NARROWTYPE##_high_u8, \
2089 gen_helper_neon_##NARROWTYPE##_high_u16, \
2090 EXTOP, \
2091 NULL, \
2092 }; \
2093 return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]); \
2096 static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn)
2098 tcg_gen_addi_i64(rn, rn, 1u << 31);
2099 tcg_gen_extrh_i64_i32(rd, rn);
2102 DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
2103 DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
2104 DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
2105 DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
2107 static bool do_long_3d(DisasContext *s, arg_3diff *a,
2108 NeonGenTwoOpWidenFn *opfn,
2109 NeonGenTwo64OpFn *accfn)
2112 * 3-regs different lengths, long operations.
2113 * These perform an operation on two inputs that returns a double-width
2114 * result, and then possibly perform an accumulation operation of
2115 * that result into the double-width destination.
2117 TCGv_i64 rd0, rd1, tmp;
2118 TCGv_i32 rn, rm;
2120 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2121 return false;
2124 /* UNDEF accesses to D16-D31 if they don't exist. */
2125 if (!dc_isar_feature(aa32_simd_r32, s) &&
2126 ((a->vd | a->vn | a->vm) & 0x10)) {
2127 return false;
2130 if (!opfn) {
2131 /* size == 3 case, which is an entirely different insn group */
2132 return false;
2135 if (a->vd & 1) {
2136 return false;
2139 if (!vfp_access_check(s)) {
2140 return true;
2143 rd0 = tcg_temp_new_i64();
2144 rd1 = tcg_temp_new_i64();
2146 rn = tcg_temp_new_i32();
2147 rm = tcg_temp_new_i32();
2148 read_neon_element32(rn, a->vn, 0, MO_32);
2149 read_neon_element32(rm, a->vm, 0, MO_32);
2150 opfn(rd0, rn, rm);
2152 read_neon_element32(rn, a->vn, 1, MO_32);
2153 read_neon_element32(rm, a->vm, 1, MO_32);
2154 opfn(rd1, rn, rm);
2155 tcg_temp_free_i32(rn);
2156 tcg_temp_free_i32(rm);
2158 /* Don't store results until after all loads: they might overlap */
2159 if (accfn) {
2160 tmp = tcg_temp_new_i64();
2161 read_neon_element64(tmp, a->vd, 0, MO_64);
2162 accfn(rd0, tmp, rd0);
2163 read_neon_element64(tmp, a->vd, 1, MO_64);
2164 accfn(rd1, tmp, rd1);
2165 tcg_temp_free_i64(tmp);
2168 write_neon_element64(rd0, a->vd, 0, MO_64);
2169 write_neon_element64(rd1, a->vd, 1, MO_64);
2170 tcg_temp_free_i64(rd0);
2171 tcg_temp_free_i64(rd1);
2173 return true;
2176 static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a)
2178 static NeonGenTwoOpWidenFn * const opfn[] = {
2179 gen_helper_neon_abdl_s16,
2180 gen_helper_neon_abdl_s32,
2181 gen_helper_neon_abdl_s64,
2182 NULL,
2185 return do_long_3d(s, a, opfn[a->size], NULL);
2188 static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a)
2190 static NeonGenTwoOpWidenFn * const opfn[] = {
2191 gen_helper_neon_abdl_u16,
2192 gen_helper_neon_abdl_u32,
2193 gen_helper_neon_abdl_u64,
2194 NULL,
2197 return do_long_3d(s, a, opfn[a->size], NULL);
2200 static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a)
2202 static NeonGenTwoOpWidenFn * const opfn[] = {
2203 gen_helper_neon_abdl_s16,
2204 gen_helper_neon_abdl_s32,
2205 gen_helper_neon_abdl_s64,
2206 NULL,
2208 static NeonGenTwo64OpFn * const addfn[] = {
2209 gen_helper_neon_addl_u16,
2210 gen_helper_neon_addl_u32,
2211 tcg_gen_add_i64,
2212 NULL,
2215 return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
2218 static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
2220 static NeonGenTwoOpWidenFn * const opfn[] = {
2221 gen_helper_neon_abdl_u16,
2222 gen_helper_neon_abdl_u32,
2223 gen_helper_neon_abdl_u64,
2224 NULL,
2226 static NeonGenTwo64OpFn * const addfn[] = {
2227 gen_helper_neon_addl_u16,
2228 gen_helper_neon_addl_u32,
2229 tcg_gen_add_i64,
2230 NULL,
2233 return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
2236 static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2238 TCGv_i32 lo = tcg_temp_new_i32();
2239 TCGv_i32 hi = tcg_temp_new_i32();
2241 tcg_gen_muls2_i32(lo, hi, rn, rm);
2242 tcg_gen_concat_i32_i64(rd, lo, hi);
2244 tcg_temp_free_i32(lo);
2245 tcg_temp_free_i32(hi);
2248 static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2250 TCGv_i32 lo = tcg_temp_new_i32();
2251 TCGv_i32 hi = tcg_temp_new_i32();
2253 tcg_gen_mulu2_i32(lo, hi, rn, rm);
2254 tcg_gen_concat_i32_i64(rd, lo, hi);
2256 tcg_temp_free_i32(lo);
2257 tcg_temp_free_i32(hi);
2260 static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a)
2262 static NeonGenTwoOpWidenFn * const opfn[] = {
2263 gen_helper_neon_mull_s8,
2264 gen_helper_neon_mull_s16,
2265 gen_mull_s32,
2266 NULL,
2269 return do_long_3d(s, a, opfn[a->size], NULL);
2272 static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a)
2274 static NeonGenTwoOpWidenFn * const opfn[] = {
2275 gen_helper_neon_mull_u8,
2276 gen_helper_neon_mull_u16,
2277 gen_mull_u32,
2278 NULL,
2281 return do_long_3d(s, a, opfn[a->size], NULL);
2284 #define DO_VMLAL(INSN,MULL,ACC) \
2285 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
2287 static NeonGenTwoOpWidenFn * const opfn[] = { \
2288 gen_helper_neon_##MULL##8, \
2289 gen_helper_neon_##MULL##16, \
2290 gen_##MULL##32, \
2291 NULL, \
2292 }; \
2293 static NeonGenTwo64OpFn * const accfn[] = { \
2294 gen_helper_neon_##ACC##l_u16, \
2295 gen_helper_neon_##ACC##l_u32, \
2296 tcg_gen_##ACC##_i64, \
2297 NULL, \
2298 }; \
2299 return do_long_3d(s, a, opfn[a->size], accfn[a->size]); \
2302 DO_VMLAL(VMLAL_S,mull_s,add)
2303 DO_VMLAL(VMLAL_U,mull_u,add)
2304 DO_VMLAL(VMLSL_S,mull_s,sub)
2305 DO_VMLAL(VMLSL_U,mull_u,sub)
2307 static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2309 gen_helper_neon_mull_s16(rd, rn, rm);
2310 gen_helper_neon_addl_saturate_s32(rd, cpu_env, rd, rd);
2313 static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2315 gen_mull_s32(rd, rn, rm);
2316 gen_helper_neon_addl_saturate_s64(rd, cpu_env, rd, rd);
2319 static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a)
2321 static NeonGenTwoOpWidenFn * const opfn[] = {
2322 NULL,
2323 gen_VQDMULL_16,
2324 gen_VQDMULL_32,
2325 NULL,
2328 return do_long_3d(s, a, opfn[a->size], NULL);
2331 static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2333 gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
2336 static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2338 gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
2341 static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a)
2343 static NeonGenTwoOpWidenFn * const opfn[] = {
2344 NULL,
2345 gen_VQDMULL_16,
2346 gen_VQDMULL_32,
2347 NULL,
2349 static NeonGenTwo64OpFn * const accfn[] = {
2350 NULL,
2351 gen_VQDMLAL_acc_16,
2352 gen_VQDMLAL_acc_32,
2353 NULL,
2356 return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
2359 static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2361 gen_helper_neon_negl_u32(rm, rm);
2362 gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
2365 static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2367 tcg_gen_neg_i64(rm, rm);
2368 gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
2371 static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a)
2373 static NeonGenTwoOpWidenFn * const opfn[] = {
2374 NULL,
2375 gen_VQDMULL_16,
2376 gen_VQDMULL_32,
2377 NULL,
2379 static NeonGenTwo64OpFn * const accfn[] = {
2380 NULL,
2381 gen_VQDMLSL_acc_16,
2382 gen_VQDMLSL_acc_32,
2383 NULL,
2386 return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
2389 static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
2391 gen_helper_gvec_3 *fn_gvec;
2393 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2394 return false;
2397 /* UNDEF accesses to D16-D31 if they don't exist. */
2398 if (!dc_isar_feature(aa32_simd_r32, s) &&
2399 ((a->vd | a->vn | a->vm) & 0x10)) {
2400 return false;
2403 if (a->vd & 1) {
2404 return false;
2407 switch (a->size) {
2408 case 0:
2409 fn_gvec = gen_helper_neon_pmull_h;
2410 break;
2411 case 2:
2412 if (!dc_isar_feature(aa32_pmull, s)) {
2413 return false;
2415 fn_gvec = gen_helper_gvec_pmull_q;
2416 break;
2417 default:
2418 return false;
2421 if (!vfp_access_check(s)) {
2422 return true;
2425 tcg_gen_gvec_3_ool(neon_full_reg_offset(a->vd),
2426 neon_full_reg_offset(a->vn),
2427 neon_full_reg_offset(a->vm),
2428 16, 16, 0, fn_gvec);
2429 return true;
2432 static void gen_neon_dup_low16(TCGv_i32 var)
2434 TCGv_i32 tmp = tcg_temp_new_i32();
2435 tcg_gen_ext16u_i32(var, var);
2436 tcg_gen_shli_i32(tmp, var, 16);
2437 tcg_gen_or_i32(var, var, tmp);
2438 tcg_temp_free_i32(tmp);
2441 static void gen_neon_dup_high16(TCGv_i32 var)
2443 TCGv_i32 tmp = tcg_temp_new_i32();
2444 tcg_gen_andi_i32(var, var, 0xffff0000);
2445 tcg_gen_shri_i32(tmp, var, 16);
2446 tcg_gen_or_i32(var, var, tmp);
2447 tcg_temp_free_i32(tmp);
2450 static inline TCGv_i32 neon_get_scalar(int size, int reg)
2452 TCGv_i32 tmp = tcg_temp_new_i32();
2453 if (size == MO_16) {
2454 read_neon_element32(tmp, reg & 7, reg >> 4, MO_32);
2455 if (reg & 8) {
2456 gen_neon_dup_high16(tmp);
2457 } else {
2458 gen_neon_dup_low16(tmp);
2460 } else {
2461 read_neon_element32(tmp, reg & 15, reg >> 4, MO_32);
2463 return tmp;
2466 static bool do_2scalar(DisasContext *s, arg_2scalar *a,
2467 NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn)
2470 * Two registers and a scalar: perform an operation between
2471 * the input elements and the scalar, and then possibly
2472 * perform an accumulation operation of that result into the
2473 * destination.
2475 TCGv_i32 scalar, tmp;
2476 int pass;
2478 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2479 return false;
2482 /* UNDEF accesses to D16-D31 if they don't exist. */
2483 if (!dc_isar_feature(aa32_simd_r32, s) &&
2484 ((a->vd | a->vn | a->vm) & 0x10)) {
2485 return false;
2488 if (!opfn) {
2489 /* Bad size (including size == 3, which is a different insn group) */
2490 return false;
2493 if (a->q && ((a->vd | a->vn) & 1)) {
2494 return false;
2497 if (!vfp_access_check(s)) {
2498 return true;
2501 scalar = neon_get_scalar(a->size, a->vm);
2502 tmp = tcg_temp_new_i32();
2504 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
2505 read_neon_element32(tmp, a->vn, pass, MO_32);
2506 opfn(tmp, tmp, scalar);
2507 if (accfn) {
2508 TCGv_i32 rd = tcg_temp_new_i32();
2509 read_neon_element32(rd, a->vd, pass, MO_32);
2510 accfn(tmp, rd, tmp);
2511 tcg_temp_free_i32(rd);
2513 write_neon_element32(tmp, a->vd, pass, MO_32);
2515 tcg_temp_free_i32(tmp);
2516 tcg_temp_free_i32(scalar);
2517 return true;
2520 static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a)
2522 static NeonGenTwoOpFn * const opfn[] = {
2523 NULL,
2524 gen_helper_neon_mul_u16,
2525 tcg_gen_mul_i32,
2526 NULL,
2529 return do_2scalar(s, a, opfn[a->size], NULL);
2532 static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a)
2534 static NeonGenTwoOpFn * const opfn[] = {
2535 NULL,
2536 gen_helper_neon_mul_u16,
2537 tcg_gen_mul_i32,
2538 NULL,
2540 static NeonGenTwoOpFn * const accfn[] = {
2541 NULL,
2542 gen_helper_neon_add_u16,
2543 tcg_gen_add_i32,
2544 NULL,
2547 return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
2550 static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
2552 static NeonGenTwoOpFn * const opfn[] = {
2553 NULL,
2554 gen_helper_neon_mul_u16,
2555 tcg_gen_mul_i32,
2556 NULL,
2558 static NeonGenTwoOpFn * const accfn[] = {
2559 NULL,
2560 gen_helper_neon_sub_u16,
2561 tcg_gen_sub_i32,
2562 NULL,
2565 return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
2568 static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a,
2569 gen_helper_gvec_3_ptr *fn)
2571 /* Two registers and a scalar, using gvec */
2572 int vec_size = a->q ? 16 : 8;
2573 int rd_ofs = neon_full_reg_offset(a->vd);
2574 int rn_ofs = neon_full_reg_offset(a->vn);
2575 int rm_ofs;
2576 int idx;
2577 TCGv_ptr fpstatus;
2579 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2580 return false;
2583 /* UNDEF accesses to D16-D31 if they don't exist. */
2584 if (!dc_isar_feature(aa32_simd_r32, s) &&
2585 ((a->vd | a->vn | a->vm) & 0x10)) {
2586 return false;
2589 if (!fn) {
2590 /* Bad size (including size == 3, which is a different insn group) */
2591 return false;
2594 if (a->q && ((a->vd | a->vn) & 1)) {
2595 return false;
2598 if (!vfp_access_check(s)) {
2599 return true;
2602 /* a->vm is M:Vm, which encodes both register and index */
2603 idx = extract32(a->vm, a->size + 2, 2);
2604 a->vm = extract32(a->vm, 0, a->size + 2);
2605 rm_ofs = neon_full_reg_offset(a->vm);
2607 fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD);
2608 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus,
2609 vec_size, vec_size, idx, fn);
2610 tcg_temp_free_ptr(fpstatus);
2611 return true;
2614 #define DO_VMUL_F_2sc(NAME, FUNC) \
2615 static bool trans_##NAME##_F_2sc(DisasContext *s, arg_2scalar *a) \
2617 static gen_helper_gvec_3_ptr * const opfn[] = { \
2618 NULL, \
2619 gen_helper_##FUNC##_h, \
2620 gen_helper_##FUNC##_s, \
2621 NULL, \
2622 }; \
2623 if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) { \
2624 return false; \
2626 return do_2scalar_fp_vec(s, a, opfn[a->size]); \
2629 DO_VMUL_F_2sc(VMUL, gvec_fmul_idx)
2630 DO_VMUL_F_2sc(VMLA, gvec_fmla_nf_idx)
2631 DO_VMUL_F_2sc(VMLS, gvec_fmls_nf_idx)
2633 WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16)
2634 WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32)
2635 WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16)
2636 WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32)
2638 static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a)
2640 static NeonGenTwoOpFn * const opfn[] = {
2641 NULL,
2642 gen_VQDMULH_16,
2643 gen_VQDMULH_32,
2644 NULL,
2647 return do_2scalar(s, a, opfn[a->size], NULL);
2650 static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)
2652 static NeonGenTwoOpFn * const opfn[] = {
2653 NULL,
2654 gen_VQRDMULH_16,
2655 gen_VQRDMULH_32,
2656 NULL,
2659 return do_2scalar(s, a, opfn[a->size], NULL);
2662 static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
2663 NeonGenThreeOpEnvFn *opfn)
2666 * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn
2667 * performs a kind of fused op-then-accumulate using a helper
2668 * function that takes all of rd, rn and the scalar at once.
2670 TCGv_i32 scalar, rn, rd;
2671 int pass;
2673 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2674 return false;
2677 if (!dc_isar_feature(aa32_rdm, s)) {
2678 return false;
2681 /* UNDEF accesses to D16-D31 if they don't exist. */
2682 if (!dc_isar_feature(aa32_simd_r32, s) &&
2683 ((a->vd | a->vn | a->vm) & 0x10)) {
2684 return false;
2687 if (!opfn) {
2688 /* Bad size (including size == 3, which is a different insn group) */
2689 return false;
2692 if (a->q && ((a->vd | a->vn) & 1)) {
2693 return false;
2696 if (!vfp_access_check(s)) {
2697 return true;
2700 scalar = neon_get_scalar(a->size, a->vm);
2701 rn = tcg_temp_new_i32();
2702 rd = tcg_temp_new_i32();
2704 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
2705 read_neon_element32(rn, a->vn, pass, MO_32);
2706 read_neon_element32(rd, a->vd, pass, MO_32);
2707 opfn(rd, cpu_env, rn, scalar, rd);
2708 write_neon_element32(rd, a->vd, pass, MO_32);
2710 tcg_temp_free_i32(rn);
2711 tcg_temp_free_i32(rd);
2712 tcg_temp_free_i32(scalar);
2714 return true;
2717 static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a)
2719 static NeonGenThreeOpEnvFn *opfn[] = {
2720 NULL,
2721 gen_helper_neon_qrdmlah_s16,
2722 gen_helper_neon_qrdmlah_s32,
2723 NULL,
2725 return do_vqrdmlah_2sc(s, a, opfn[a->size]);
2728 static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
2730 static NeonGenThreeOpEnvFn *opfn[] = {
2731 NULL,
2732 gen_helper_neon_qrdmlsh_s16,
2733 gen_helper_neon_qrdmlsh_s32,
2734 NULL,
2736 return do_vqrdmlah_2sc(s, a, opfn[a->size]);
2739 static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
2740 NeonGenTwoOpWidenFn *opfn,
2741 NeonGenTwo64OpFn *accfn)
2744 * Two registers and a scalar, long operations: perform an
2745 * operation on the input elements and the scalar which produces
2746 * a double-width result, and then possibly perform an accumulation
2747 * operation of that result into the destination.
2749 TCGv_i32 scalar, rn;
2750 TCGv_i64 rn0_64, rn1_64;
2752 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2753 return false;
2756 /* UNDEF accesses to D16-D31 if they don't exist. */
2757 if (!dc_isar_feature(aa32_simd_r32, s) &&
2758 ((a->vd | a->vn | a->vm) & 0x10)) {
2759 return false;
2762 if (!opfn) {
2763 /* Bad size (including size == 3, which is a different insn group) */
2764 return false;
2767 if (a->vd & 1) {
2768 return false;
2771 if (!vfp_access_check(s)) {
2772 return true;
2775 scalar = neon_get_scalar(a->size, a->vm);
2777 /* Load all inputs before writing any outputs, in case of overlap */
2778 rn = tcg_temp_new_i32();
2779 read_neon_element32(rn, a->vn, 0, MO_32);
2780 rn0_64 = tcg_temp_new_i64();
2781 opfn(rn0_64, rn, scalar);
2783 read_neon_element32(rn, a->vn, 1, MO_32);
2784 rn1_64 = tcg_temp_new_i64();
2785 opfn(rn1_64, rn, scalar);
2786 tcg_temp_free_i32(rn);
2787 tcg_temp_free_i32(scalar);
2789 if (accfn) {
2790 TCGv_i64 t64 = tcg_temp_new_i64();
2791 read_neon_element64(t64, a->vd, 0, MO_64);
2792 accfn(rn0_64, t64, rn0_64);
2793 read_neon_element64(t64, a->vd, 1, MO_64);
2794 accfn(rn1_64, t64, rn1_64);
2795 tcg_temp_free_i64(t64);
2798 write_neon_element64(rn0_64, a->vd, 0, MO_64);
2799 write_neon_element64(rn1_64, a->vd, 1, MO_64);
2800 tcg_temp_free_i64(rn0_64);
2801 tcg_temp_free_i64(rn1_64);
2802 return true;
2805 static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a)
2807 static NeonGenTwoOpWidenFn * const opfn[] = {
2808 NULL,
2809 gen_helper_neon_mull_s16,
2810 gen_mull_s32,
2811 NULL,
2814 return do_2scalar_long(s, a, opfn[a->size], NULL);
2817 static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a)
2819 static NeonGenTwoOpWidenFn * const opfn[] = {
2820 NULL,
2821 gen_helper_neon_mull_u16,
2822 gen_mull_u32,
2823 NULL,
2826 return do_2scalar_long(s, a, opfn[a->size], NULL);
2829 #define DO_VMLAL_2SC(INSN, MULL, ACC) \
2830 static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a) \
2832 static NeonGenTwoOpWidenFn * const opfn[] = { \
2833 NULL, \
2834 gen_helper_neon_##MULL##16, \
2835 gen_##MULL##32, \
2836 NULL, \
2837 }; \
2838 static NeonGenTwo64OpFn * const accfn[] = { \
2839 NULL, \
2840 gen_helper_neon_##ACC##l_u32, \
2841 tcg_gen_##ACC##_i64, \
2842 NULL, \
2843 }; \
2844 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); \
2847 DO_VMLAL_2SC(VMLAL_S, mull_s, add)
2848 DO_VMLAL_2SC(VMLAL_U, mull_u, add)
2849 DO_VMLAL_2SC(VMLSL_S, mull_s, sub)
2850 DO_VMLAL_2SC(VMLSL_U, mull_u, sub)
2852 static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a)
2854 static NeonGenTwoOpWidenFn * const opfn[] = {
2855 NULL,
2856 gen_VQDMULL_16,
2857 gen_VQDMULL_32,
2858 NULL,
2861 return do_2scalar_long(s, a, opfn[a->size], NULL);
2864 static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a)
2866 static NeonGenTwoOpWidenFn * const opfn[] = {
2867 NULL,
2868 gen_VQDMULL_16,
2869 gen_VQDMULL_32,
2870 NULL,
2872 static NeonGenTwo64OpFn * const accfn[] = {
2873 NULL,
2874 gen_VQDMLAL_acc_16,
2875 gen_VQDMLAL_acc_32,
2876 NULL,
2879 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
2882 static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
2884 static NeonGenTwoOpWidenFn * const opfn[] = {
2885 NULL,
2886 gen_VQDMULL_16,
2887 gen_VQDMULL_32,
2888 NULL,
2890 static NeonGenTwo64OpFn * const accfn[] = {
2891 NULL,
2892 gen_VQDMLSL_acc_16,
2893 gen_VQDMLSL_acc_32,
2894 NULL,
2897 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
2900 static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
2902 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2903 return false;
2906 /* UNDEF accesses to D16-D31 if they don't exist. */
2907 if (!dc_isar_feature(aa32_simd_r32, s) &&
2908 ((a->vd | a->vn | a->vm) & 0x10)) {
2909 return false;
2912 if ((a->vn | a->vm | a->vd) & a->q) {
2913 return false;
2916 if (a->imm > 7 && !a->q) {
2917 return false;
2920 if (!vfp_access_check(s)) {
2921 return true;
2924 if (!a->q) {
2925 /* Extract 64 bits from <Vm:Vn> */
2926 TCGv_i64 left, right, dest;
2928 left = tcg_temp_new_i64();
2929 right = tcg_temp_new_i64();
2930 dest = tcg_temp_new_i64();
2932 read_neon_element64(right, a->vn, 0, MO_64);
2933 read_neon_element64(left, a->vm, 0, MO_64);
2934 tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
2935 write_neon_element64(dest, a->vd, 0, MO_64);
2937 tcg_temp_free_i64(left);
2938 tcg_temp_free_i64(right);
2939 tcg_temp_free_i64(dest);
2940 } else {
2941 /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */
2942 TCGv_i64 left, middle, right, destleft, destright;
2944 left = tcg_temp_new_i64();
2945 middle = tcg_temp_new_i64();
2946 right = tcg_temp_new_i64();
2947 destleft = tcg_temp_new_i64();
2948 destright = tcg_temp_new_i64();
2950 if (a->imm < 8) {
2951 read_neon_element64(right, a->vn, 0, MO_64);
2952 read_neon_element64(middle, a->vn, 1, MO_64);
2953 tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
2954 read_neon_element64(left, a->vm, 0, MO_64);
2955 tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
2956 } else {
2957 read_neon_element64(right, a->vn, 1, MO_64);
2958 read_neon_element64(middle, a->vm, 0, MO_64);
2959 tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
2960 read_neon_element64(left, a->vm, 1, MO_64);
2961 tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
2964 write_neon_element64(destright, a->vd, 0, MO_64);
2965 write_neon_element64(destleft, a->vd, 1, MO_64);
2967 tcg_temp_free_i64(destright);
2968 tcg_temp_free_i64(destleft);
2969 tcg_temp_free_i64(right);
2970 tcg_temp_free_i64(middle);
2971 tcg_temp_free_i64(left);
2973 return true;
2976 static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
2978 TCGv_i64 val, def;
2979 TCGv_i32 desc;
2981 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2982 return false;
2985 /* UNDEF accesses to D16-D31 if they don't exist. */
2986 if (!dc_isar_feature(aa32_simd_r32, s) &&
2987 ((a->vd | a->vn | a->vm) & 0x10)) {
2988 return false;
2991 if ((a->vn + a->len + 1) > 32) {
2993 * This is UNPREDICTABLE; we choose to UNDEF to avoid the
2994 * helper function running off the end of the register file.
2996 return false;
2999 if (!vfp_access_check(s)) {
3000 return true;
3003 desc = tcg_const_i32((a->vn << 2) | a->len);
3004 def = tcg_temp_new_i64();
3005 if (a->op) {
3006 read_neon_element64(def, a->vd, 0, MO_64);
3007 } else {
3008 tcg_gen_movi_i64(def, 0);
3010 val = tcg_temp_new_i64();
3011 read_neon_element64(val, a->vm, 0, MO_64);
3013 gen_helper_neon_tbl(val, cpu_env, desc, val, def);
3014 write_neon_element64(val, a->vd, 0, MO_64);
3016 tcg_temp_free_i64(def);
3017 tcg_temp_free_i64(val);
3018 tcg_temp_free_i32(desc);
3019 return true;
3022 static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
3024 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3025 return false;
3028 /* UNDEF accesses to D16-D31 if they don't exist. */
3029 if (!dc_isar_feature(aa32_simd_r32, s) &&
3030 ((a->vd | a->vm) & 0x10)) {
3031 return false;
3034 if (a->vd & a->q) {
3035 return false;
3038 if (!vfp_access_check(s)) {
3039 return true;
3042 tcg_gen_gvec_dup_mem(a->size, neon_full_reg_offset(a->vd),
3043 neon_element_offset(a->vm, a->index, a->size),
3044 a->q ? 16 : 8, a->q ? 16 : 8);
3045 return true;
3048 static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
3050 int pass, half;
3051 TCGv_i32 tmp[2];
3053 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3054 return false;
3057 /* UNDEF accesses to D16-D31 if they don't exist. */
3058 if (!dc_isar_feature(aa32_simd_r32, s) &&
3059 ((a->vd | a->vm) & 0x10)) {
3060 return false;
3063 if ((a->vd | a->vm) & a->q) {
3064 return false;
3067 if (a->size == 3) {
3068 return false;
3071 if (!vfp_access_check(s)) {
3072 return true;
3075 tmp[0] = tcg_temp_new_i32();
3076 tmp[1] = tcg_temp_new_i32();
3078 for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
3079 for (half = 0; half < 2; half++) {
3080 read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32);
3081 switch (a->size) {
3082 case 0:
3083 tcg_gen_bswap32_i32(tmp[half], tmp[half]);
3084 break;
3085 case 1:
3086 gen_swap_half(tmp[half], tmp[half]);
3087 break;
3088 case 2:
3089 break;
3090 default:
3091 g_assert_not_reached();
3094 write_neon_element32(tmp[1], a->vd, pass * 2, MO_32);
3095 write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32);
3098 tcg_temp_free_i32(tmp[0]);
3099 tcg_temp_free_i32(tmp[1]);
3100 return true;
3103 static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
3104 NeonGenWidenFn *widenfn,
3105 NeonGenTwo64OpFn *opfn,
3106 NeonGenTwo64OpFn *accfn)
3109 * Pairwise long operations: widen both halves of the pair,
3110 * combine the pairs with the opfn, and then possibly accumulate
3111 * into the destination with the accfn.
3113 int pass;
3115 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3116 return false;
3119 /* UNDEF accesses to D16-D31 if they don't exist. */
3120 if (!dc_isar_feature(aa32_simd_r32, s) &&
3121 ((a->vd | a->vm) & 0x10)) {
3122 return false;
3125 if ((a->vd | a->vm) & a->q) {
3126 return false;
3129 if (!widenfn) {
3130 return false;
3133 if (!vfp_access_check(s)) {
3134 return true;
3137 for (pass = 0; pass < a->q + 1; pass++) {
3138 TCGv_i32 tmp;
3139 TCGv_i64 rm0_64, rm1_64, rd_64;
3141 rm0_64 = tcg_temp_new_i64();
3142 rm1_64 = tcg_temp_new_i64();
3143 rd_64 = tcg_temp_new_i64();
3145 tmp = tcg_temp_new_i32();
3146 read_neon_element32(tmp, a->vm, pass * 2, MO_32);
3147 widenfn(rm0_64, tmp);
3148 read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32);
3149 widenfn(rm1_64, tmp);
3150 tcg_temp_free_i32(tmp);
3152 opfn(rd_64, rm0_64, rm1_64);
3153 tcg_temp_free_i64(rm0_64);
3154 tcg_temp_free_i64(rm1_64);
3156 if (accfn) {
3157 TCGv_i64 tmp64 = tcg_temp_new_i64();
3158 read_neon_element64(tmp64, a->vd, pass, MO_64);
3159 accfn(rd_64, tmp64, rd_64);
3160 tcg_temp_free_i64(tmp64);
3162 write_neon_element64(rd_64, a->vd, pass, MO_64);
3163 tcg_temp_free_i64(rd_64);
3165 return true;
3168 static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a)
3170 static NeonGenWidenFn * const widenfn[] = {
3171 gen_helper_neon_widen_s8,
3172 gen_helper_neon_widen_s16,
3173 tcg_gen_ext_i32_i64,
3174 NULL,
3176 static NeonGenTwo64OpFn * const opfn[] = {
3177 gen_helper_neon_paddl_u16,
3178 gen_helper_neon_paddl_u32,
3179 tcg_gen_add_i64,
3180 NULL,
3183 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
3186 static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a)
3188 static NeonGenWidenFn * const widenfn[] = {
3189 gen_helper_neon_widen_u8,
3190 gen_helper_neon_widen_u16,
3191 tcg_gen_extu_i32_i64,
3192 NULL,
3194 static NeonGenTwo64OpFn * const opfn[] = {
3195 gen_helper_neon_paddl_u16,
3196 gen_helper_neon_paddl_u32,
3197 tcg_gen_add_i64,
3198 NULL,
3201 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
3204 static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a)
3206 static NeonGenWidenFn * const widenfn[] = {
3207 gen_helper_neon_widen_s8,
3208 gen_helper_neon_widen_s16,
3209 tcg_gen_ext_i32_i64,
3210 NULL,
3212 static NeonGenTwo64OpFn * const opfn[] = {
3213 gen_helper_neon_paddl_u16,
3214 gen_helper_neon_paddl_u32,
3215 tcg_gen_add_i64,
3216 NULL,
3218 static NeonGenTwo64OpFn * const accfn[] = {
3219 gen_helper_neon_addl_u16,
3220 gen_helper_neon_addl_u32,
3221 tcg_gen_add_i64,
3222 NULL,
3225 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
3226 accfn[a->size]);
3229 static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a)
3231 static NeonGenWidenFn * const widenfn[] = {
3232 gen_helper_neon_widen_u8,
3233 gen_helper_neon_widen_u16,
3234 tcg_gen_extu_i32_i64,
3235 NULL,
3237 static NeonGenTwo64OpFn * const opfn[] = {
3238 gen_helper_neon_paddl_u16,
3239 gen_helper_neon_paddl_u32,
3240 tcg_gen_add_i64,
3241 NULL,
3243 static NeonGenTwo64OpFn * const accfn[] = {
3244 gen_helper_neon_addl_u16,
3245 gen_helper_neon_addl_u32,
3246 tcg_gen_add_i64,
3247 NULL,
3250 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
3251 accfn[a->size]);
3254 typedef void ZipFn(TCGv_ptr, TCGv_ptr);
3256 static bool do_zip_uzp(DisasContext *s, arg_2misc *a,
3257 ZipFn *fn)
3259 TCGv_ptr pd, pm;
3261 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3262 return false;
3265 /* UNDEF accesses to D16-D31 if they don't exist. */
3266 if (!dc_isar_feature(aa32_simd_r32, s) &&
3267 ((a->vd | a->vm) & 0x10)) {
3268 return false;
3271 if ((a->vd | a->vm) & a->q) {
3272 return false;
3275 if (!fn) {
3276 /* Bad size or size/q combination */
3277 return false;
3280 if (!vfp_access_check(s)) {
3281 return true;
3284 pd = vfp_reg_ptr(true, a->vd);
3285 pm = vfp_reg_ptr(true, a->vm);
3286 fn(pd, pm);
3287 tcg_temp_free_ptr(pd);
3288 tcg_temp_free_ptr(pm);
3289 return true;
3292 static bool trans_VUZP(DisasContext *s, arg_2misc *a)
3294 static ZipFn * const fn[2][4] = {
3296 gen_helper_neon_unzip8,
3297 gen_helper_neon_unzip16,
3298 NULL,
3299 NULL,
3300 }, {
3301 gen_helper_neon_qunzip8,
3302 gen_helper_neon_qunzip16,
3303 gen_helper_neon_qunzip32,
3304 NULL,
3307 return do_zip_uzp(s, a, fn[a->q][a->size]);
3310 static bool trans_VZIP(DisasContext *s, arg_2misc *a)
3312 static ZipFn * const fn[2][4] = {
3314 gen_helper_neon_zip8,
3315 gen_helper_neon_zip16,
3316 NULL,
3317 NULL,
3318 }, {
3319 gen_helper_neon_qzip8,
3320 gen_helper_neon_qzip16,
3321 gen_helper_neon_qzip32,
3322 NULL,
3325 return do_zip_uzp(s, a, fn[a->q][a->size]);
3328 static bool do_vmovn(DisasContext *s, arg_2misc *a,
3329 NeonGenNarrowEnvFn *narrowfn)
3331 TCGv_i64 rm;
3332 TCGv_i32 rd0, rd1;
3334 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3335 return false;
3338 /* UNDEF accesses to D16-D31 if they don't exist. */
3339 if (!dc_isar_feature(aa32_simd_r32, s) &&
3340 ((a->vd | a->vm) & 0x10)) {
3341 return false;
3344 if (a->vm & 1) {
3345 return false;
3348 if (!narrowfn) {
3349 return false;
3352 if (!vfp_access_check(s)) {
3353 return true;
3356 rm = tcg_temp_new_i64();
3357 rd0 = tcg_temp_new_i32();
3358 rd1 = tcg_temp_new_i32();
3360 read_neon_element64(rm, a->vm, 0, MO_64);
3361 narrowfn(rd0, cpu_env, rm);
3362 read_neon_element64(rm, a->vm, 1, MO_64);
3363 narrowfn(rd1, cpu_env, rm);
3364 write_neon_element32(rd0, a->vd, 0, MO_32);
3365 write_neon_element32(rd1, a->vd, 1, MO_32);
3366 tcg_temp_free_i32(rd0);
3367 tcg_temp_free_i32(rd1);
3368 tcg_temp_free_i64(rm);
3369 return true;
3372 #define DO_VMOVN(INSN, FUNC) \
3373 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3375 static NeonGenNarrowEnvFn * const narrowfn[] = { \
3376 FUNC##8, \
3377 FUNC##16, \
3378 FUNC##32, \
3379 NULL, \
3380 }; \
3381 return do_vmovn(s, a, narrowfn[a->size]); \
3384 DO_VMOVN(VMOVN, gen_neon_narrow_u)
3385 DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat)
3386 DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s)
3387 DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u)
3389 static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
3391 TCGv_i32 rm0, rm1;
3392 TCGv_i64 rd;
3393 static NeonGenWidenFn * const widenfns[] = {
3394 gen_helper_neon_widen_u8,
3395 gen_helper_neon_widen_u16,
3396 tcg_gen_extu_i32_i64,
3397 NULL,
3399 NeonGenWidenFn *widenfn = widenfns[a->size];
3401 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3402 return false;
3405 /* UNDEF accesses to D16-D31 if they don't exist. */
3406 if (!dc_isar_feature(aa32_simd_r32, s) &&
3407 ((a->vd | a->vm) & 0x10)) {
3408 return false;
3411 if (a->vd & 1) {
3412 return false;
3415 if (!widenfn) {
3416 return false;
3419 if (!vfp_access_check(s)) {
3420 return true;
3423 rd = tcg_temp_new_i64();
3424 rm0 = tcg_temp_new_i32();
3425 rm1 = tcg_temp_new_i32();
3427 read_neon_element32(rm0, a->vm, 0, MO_32);
3428 read_neon_element32(rm1, a->vm, 1, MO_32);
3430 widenfn(rd, rm0);
3431 tcg_gen_shli_i64(rd, rd, 8 << a->size);
3432 write_neon_element64(rd, a->vd, 0, MO_64);
3433 widenfn(rd, rm1);
3434 tcg_gen_shli_i64(rd, rd, 8 << a->size);
3435 write_neon_element64(rd, a->vd, 1, MO_64);
3437 tcg_temp_free_i64(rd);
3438 tcg_temp_free_i32(rm0);
3439 tcg_temp_free_i32(rm1);
3440 return true;
3443 static bool trans_VCVT_B16_F32(DisasContext *s, arg_2misc *a)
3445 TCGv_ptr fpst;
3446 TCGv_i64 tmp;
3447 TCGv_i32 dst0, dst1;
3449 if (!dc_isar_feature(aa32_bf16, s)) {
3450 return false;
3453 /* UNDEF accesses to D16-D31 if they don't exist. */
3454 if (!dc_isar_feature(aa32_simd_r32, s) &&
3455 ((a->vd | a->vm) & 0x10)) {
3456 return false;
3459 if ((a->vm & 1) || (a->size != 1)) {
3460 return false;
3463 if (!vfp_access_check(s)) {
3464 return true;
3467 fpst = fpstatus_ptr(FPST_STD);
3468 tmp = tcg_temp_new_i64();
3469 dst0 = tcg_temp_new_i32();
3470 dst1 = tcg_temp_new_i32();
3472 read_neon_element64(tmp, a->vm, 0, MO_64);
3473 gen_helper_bfcvt_pair(dst0, tmp, fpst);
3475 read_neon_element64(tmp, a->vm, 1, MO_64);
3476 gen_helper_bfcvt_pair(dst1, tmp, fpst);
3478 write_neon_element32(dst0, a->vd, 0, MO_32);
3479 write_neon_element32(dst1, a->vd, 1, MO_32);
3481 tcg_temp_free_i64(tmp);
3482 tcg_temp_free_i32(dst0);
3483 tcg_temp_free_i32(dst1);
3484 tcg_temp_free_ptr(fpst);
3485 return true;
3488 static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
3490 TCGv_ptr fpst;
3491 TCGv_i32 ahp, tmp, tmp2, tmp3;
3493 if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
3494 !dc_isar_feature(aa32_fp16_spconv, s)) {
3495 return false;
3498 /* UNDEF accesses to D16-D31 if they don't exist. */
3499 if (!dc_isar_feature(aa32_simd_r32, s) &&
3500 ((a->vd | a->vm) & 0x10)) {
3501 return false;
3504 if ((a->vm & 1) || (a->size != 1)) {
3505 return false;
3508 if (!vfp_access_check(s)) {
3509 return true;
3512 fpst = fpstatus_ptr(FPST_STD);
3513 ahp = get_ahp_flag();
3514 tmp = tcg_temp_new_i32();
3515 read_neon_element32(tmp, a->vm, 0, MO_32);
3516 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
3517 tmp2 = tcg_temp_new_i32();
3518 read_neon_element32(tmp2, a->vm, 1, MO_32);
3519 gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
3520 tcg_gen_shli_i32(tmp2, tmp2, 16);
3521 tcg_gen_or_i32(tmp2, tmp2, tmp);
3522 read_neon_element32(tmp, a->vm, 2, MO_32);
3523 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
3524 tmp3 = tcg_temp_new_i32();
3525 read_neon_element32(tmp3, a->vm, 3, MO_32);
3526 write_neon_element32(tmp2, a->vd, 0, MO_32);
3527 tcg_temp_free_i32(tmp2);
3528 gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
3529 tcg_gen_shli_i32(tmp3, tmp3, 16);
3530 tcg_gen_or_i32(tmp3, tmp3, tmp);
3531 write_neon_element32(tmp3, a->vd, 1, MO_32);
3532 tcg_temp_free_i32(tmp3);
3533 tcg_temp_free_i32(tmp);
3534 tcg_temp_free_i32(ahp);
3535 tcg_temp_free_ptr(fpst);
3537 return true;
3540 static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
3542 TCGv_ptr fpst;
3543 TCGv_i32 ahp, tmp, tmp2, tmp3;
3545 if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
3546 !dc_isar_feature(aa32_fp16_spconv, s)) {
3547 return false;
3550 /* UNDEF accesses to D16-D31 if they don't exist. */
3551 if (!dc_isar_feature(aa32_simd_r32, s) &&
3552 ((a->vd | a->vm) & 0x10)) {
3553 return false;
3556 if ((a->vd & 1) || (a->size != 1)) {
3557 return false;
3560 if (!vfp_access_check(s)) {
3561 return true;
3564 fpst = fpstatus_ptr(FPST_STD);
3565 ahp = get_ahp_flag();
3566 tmp3 = tcg_temp_new_i32();
3567 tmp2 = tcg_temp_new_i32();
3568 tmp = tcg_temp_new_i32();
3569 read_neon_element32(tmp, a->vm, 0, MO_32);
3570 read_neon_element32(tmp2, a->vm, 1, MO_32);
3571 tcg_gen_ext16u_i32(tmp3, tmp);
3572 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
3573 write_neon_element32(tmp3, a->vd, 0, MO_32);
3574 tcg_gen_shri_i32(tmp, tmp, 16);
3575 gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
3576 write_neon_element32(tmp, a->vd, 1, MO_32);
3577 tcg_temp_free_i32(tmp);
3578 tcg_gen_ext16u_i32(tmp3, tmp2);
3579 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
3580 write_neon_element32(tmp3, a->vd, 2, MO_32);
3581 tcg_temp_free_i32(tmp3);
3582 tcg_gen_shri_i32(tmp2, tmp2, 16);
3583 gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
3584 write_neon_element32(tmp2, a->vd, 3, MO_32);
3585 tcg_temp_free_i32(tmp2);
3586 tcg_temp_free_i32(ahp);
3587 tcg_temp_free_ptr(fpst);
3589 return true;
3592 static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn)
3594 int vec_size = a->q ? 16 : 8;
3595 int rd_ofs = neon_full_reg_offset(a->vd);
3596 int rm_ofs = neon_full_reg_offset(a->vm);
3598 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3599 return false;
3602 /* UNDEF accesses to D16-D31 if they don't exist. */
3603 if (!dc_isar_feature(aa32_simd_r32, s) &&
3604 ((a->vd | a->vm) & 0x10)) {
3605 return false;
3608 if (a->size == 3) {
3609 return false;
3612 if ((a->vd | a->vm) & a->q) {
3613 return false;
3616 if (!vfp_access_check(s)) {
3617 return true;
3620 fn(a->size, rd_ofs, rm_ofs, vec_size, vec_size);
3622 return true;
3625 #define DO_2MISC_VEC(INSN, FN) \
3626 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3628 return do_2misc_vec(s, a, FN); \
3631 DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg)
3632 DO_2MISC_VEC(VABS, tcg_gen_gvec_abs)
3633 DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0)
3634 DO_2MISC_VEC(VCGT0, gen_gvec_cgt0)
3635 DO_2MISC_VEC(VCLE0, gen_gvec_cle0)
3636 DO_2MISC_VEC(VCGE0, gen_gvec_cge0)
3637 DO_2MISC_VEC(VCLT0, gen_gvec_clt0)
3639 static bool trans_VMVN(DisasContext *s, arg_2misc *a)
3641 if (a->size != 0) {
3642 return false;
3644 return do_2misc_vec(s, a, tcg_gen_gvec_not);
3647 #define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA) \
3648 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
3649 uint32_t rm_ofs, uint32_t oprsz, \
3650 uint32_t maxsz) \
3652 tcg_gen_gvec_3_ool(rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz, \
3653 DATA, FUNC); \
3656 #define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA) \
3657 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
3658 uint32_t rm_ofs, uint32_t oprsz, \
3659 uint32_t maxsz) \
3661 tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC); \
3664 WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0)
3665 WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aese, 1)
3666 WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0)
3667 WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesmc, 1)
3668 WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0)
3669 WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0)
3670 WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0)
3672 #define DO_2M_CRYPTO(INSN, FEATURE, SIZE) \
3673 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3675 if (!dc_isar_feature(FEATURE, s) || a->size != SIZE) { \
3676 return false; \
3678 return do_2misc_vec(s, a, gen_##INSN); \
3681 DO_2M_CRYPTO(AESE, aa32_aes, 0)
3682 DO_2M_CRYPTO(AESD, aa32_aes, 0)
3683 DO_2M_CRYPTO(AESMC, aa32_aes, 0)
3684 DO_2M_CRYPTO(AESIMC, aa32_aes, 0)
3685 DO_2M_CRYPTO(SHA1H, aa32_sha1, 2)
3686 DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2)
3687 DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2)
3689 static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
3691 TCGv_i32 tmp;
3692 int pass;
3694 /* Handle a 2-reg-misc operation by iterating 32 bits at a time */
3695 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3696 return false;
3699 /* UNDEF accesses to D16-D31 if they don't exist. */
3700 if (!dc_isar_feature(aa32_simd_r32, s) &&
3701 ((a->vd | a->vm) & 0x10)) {
3702 return false;
3705 if (!fn) {
3706 return false;
3709 if ((a->vd | a->vm) & a->q) {
3710 return false;
3713 if (!vfp_access_check(s)) {
3714 return true;
3717 tmp = tcg_temp_new_i32();
3718 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
3719 read_neon_element32(tmp, a->vm, pass, MO_32);
3720 fn(tmp, tmp);
3721 write_neon_element32(tmp, a->vd, pass, MO_32);
3723 tcg_temp_free_i32(tmp);
3725 return true;
3728 static bool trans_VREV32(DisasContext *s, arg_2misc *a)
3730 static NeonGenOneOpFn * const fn[] = {
3731 tcg_gen_bswap32_i32,
3732 gen_swap_half,
3733 NULL,
3734 NULL,
3736 return do_2misc(s, a, fn[a->size]);
3739 static bool trans_VREV16(DisasContext *s, arg_2misc *a)
3741 if (a->size != 0) {
3742 return false;
3744 return do_2misc(s, a, gen_rev16);
3747 static bool trans_VCLS(DisasContext *s, arg_2misc *a)
3749 static NeonGenOneOpFn * const fn[] = {
3750 gen_helper_neon_cls_s8,
3751 gen_helper_neon_cls_s16,
3752 gen_helper_neon_cls_s32,
3753 NULL,
3755 return do_2misc(s, a, fn[a->size]);
3758 static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm)
3760 tcg_gen_clzi_i32(rd, rm, 32);
3763 static bool trans_VCLZ(DisasContext *s, arg_2misc *a)
3765 static NeonGenOneOpFn * const fn[] = {
3766 gen_helper_neon_clz_u8,
3767 gen_helper_neon_clz_u16,
3768 do_VCLZ_32,
3769 NULL,
3771 return do_2misc(s, a, fn[a->size]);
3774 static bool trans_VCNT(DisasContext *s, arg_2misc *a)
3776 if (a->size != 0) {
3777 return false;
3779 return do_2misc(s, a, gen_helper_neon_cnt_u8);
3782 static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3783 uint32_t oprsz, uint32_t maxsz)
3785 tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs,
3786 vece == MO_16 ? 0x7fff : 0x7fffffff,
3787 oprsz, maxsz);
3790 static bool trans_VABS_F(DisasContext *s, arg_2misc *a)
3792 if (a->size == MO_16) {
3793 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3794 return false;
3796 } else if (a->size != MO_32) {
3797 return false;
3799 return do_2misc_vec(s, a, gen_VABS_F);
3802 static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3803 uint32_t oprsz, uint32_t maxsz)
3805 tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs,
3806 vece == MO_16 ? 0x8000 : 0x80000000,
3807 oprsz, maxsz);
3810 static bool trans_VNEG_F(DisasContext *s, arg_2misc *a)
3812 if (a->size == MO_16) {
3813 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3814 return false;
3816 } else if (a->size != MO_32) {
3817 return false;
3819 return do_2misc_vec(s, a, gen_VNEG_F);
3822 static bool trans_VRECPE(DisasContext *s, arg_2misc *a)
3824 if (a->size != 2) {
3825 return false;
3827 return do_2misc(s, a, gen_helper_recpe_u32);
3830 static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a)
3832 if (a->size != 2) {
3833 return false;
3835 return do_2misc(s, a, gen_helper_rsqrte_u32);
3838 #define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \
3839 static void WRAPNAME(TCGv_i32 d, TCGv_i32 m) \
3841 FUNC(d, cpu_env, m); \
3844 WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8)
3845 WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16)
3846 WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32)
3847 WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8)
3848 WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16)
3849 WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32)
3851 static bool trans_VQABS(DisasContext *s, arg_2misc *a)
3853 static NeonGenOneOpFn * const fn[] = {
3854 gen_VQABS_s8,
3855 gen_VQABS_s16,
3856 gen_VQABS_s32,
3857 NULL,
3859 return do_2misc(s, a, fn[a->size]);
3862 static bool trans_VQNEG(DisasContext *s, arg_2misc *a)
3864 static NeonGenOneOpFn * const fn[] = {
3865 gen_VQNEG_s8,
3866 gen_VQNEG_s16,
3867 gen_VQNEG_s32,
3868 NULL,
3870 return do_2misc(s, a, fn[a->size]);
3873 #define DO_2MISC_FP_VEC(INSN, HFUNC, SFUNC) \
3874 static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \
3875 uint32_t rm_ofs, \
3876 uint32_t oprsz, uint32_t maxsz) \
3878 static gen_helper_gvec_2_ptr * const fns[4] = { \
3879 NULL, HFUNC, SFUNC, NULL, \
3880 }; \
3881 TCGv_ptr fpst; \
3882 fpst = fpstatus_ptr(vece == MO_16 ? FPST_STD_F16 : FPST_STD); \
3883 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, 0, \
3884 fns[vece]); \
3885 tcg_temp_free_ptr(fpst); \
3887 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3889 if (a->size == MO_16) { \
3890 if (!dc_isar_feature(aa32_fp16_arith, s)) { \
3891 return false; \
3893 } else if (a->size != MO_32) { \
3894 return false; \
3896 return do_2misc_vec(s, a, gen_##INSN); \
3899 DO_2MISC_FP_VEC(VRECPE_F, gen_helper_gvec_frecpe_h, gen_helper_gvec_frecpe_s)
3900 DO_2MISC_FP_VEC(VRSQRTE_F, gen_helper_gvec_frsqrte_h, gen_helper_gvec_frsqrte_s)
3901 DO_2MISC_FP_VEC(VCGT0_F, gen_helper_gvec_fcgt0_h, gen_helper_gvec_fcgt0_s)
3902 DO_2MISC_FP_VEC(VCGE0_F, gen_helper_gvec_fcge0_h, gen_helper_gvec_fcge0_s)
3903 DO_2MISC_FP_VEC(VCEQ0_F, gen_helper_gvec_fceq0_h, gen_helper_gvec_fceq0_s)
3904 DO_2MISC_FP_VEC(VCLT0_F, gen_helper_gvec_fclt0_h, gen_helper_gvec_fclt0_s)
3905 DO_2MISC_FP_VEC(VCLE0_F, gen_helper_gvec_fcle0_h, gen_helper_gvec_fcle0_s)
3906 DO_2MISC_FP_VEC(VCVT_FS, gen_helper_gvec_sstoh, gen_helper_gvec_sitos)
3907 DO_2MISC_FP_VEC(VCVT_FU, gen_helper_gvec_ustoh, gen_helper_gvec_uitos)
3908 DO_2MISC_FP_VEC(VCVT_SF, gen_helper_gvec_tosszh, gen_helper_gvec_tosizs)
3909 DO_2MISC_FP_VEC(VCVT_UF, gen_helper_gvec_touszh, gen_helper_gvec_touizs)
3911 DO_2MISC_FP_VEC(VRINTX_impl, gen_helper_gvec_vrintx_h, gen_helper_gvec_vrintx_s)
3913 static bool trans_VRINTX(DisasContext *s, arg_2misc *a)
3915 if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
3916 return false;
3918 return trans_VRINTX_impl(s, a);
3921 #define DO_VEC_RMODE(INSN, RMODE, OP) \
3922 static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \
3923 uint32_t rm_ofs, \
3924 uint32_t oprsz, uint32_t maxsz) \
3926 static gen_helper_gvec_2_ptr * const fns[4] = { \
3927 NULL, \
3928 gen_helper_gvec_##OP##h, \
3929 gen_helper_gvec_##OP##s, \
3930 NULL, \
3931 }; \
3932 TCGv_ptr fpst; \
3933 fpst = fpstatus_ptr(vece == 1 ? FPST_STD_F16 : FPST_STD); \
3934 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, \
3935 arm_rmode_to_sf(RMODE), fns[vece]); \
3936 tcg_temp_free_ptr(fpst); \
3938 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3940 if (!arm_dc_feature(s, ARM_FEATURE_V8)) { \
3941 return false; \
3943 if (a->size == MO_16) { \
3944 if (!dc_isar_feature(aa32_fp16_arith, s)) { \
3945 return false; \
3947 } else if (a->size != MO_32) { \
3948 return false; \
3950 return do_2misc_vec(s, a, gen_##INSN); \
3953 DO_VEC_RMODE(VCVTAU, FPROUNDING_TIEAWAY, vcvt_rm_u)
3954 DO_VEC_RMODE(VCVTAS, FPROUNDING_TIEAWAY, vcvt_rm_s)
3955 DO_VEC_RMODE(VCVTNU, FPROUNDING_TIEEVEN, vcvt_rm_u)
3956 DO_VEC_RMODE(VCVTNS, FPROUNDING_TIEEVEN, vcvt_rm_s)
3957 DO_VEC_RMODE(VCVTPU, FPROUNDING_POSINF, vcvt_rm_u)
3958 DO_VEC_RMODE(VCVTPS, FPROUNDING_POSINF, vcvt_rm_s)
3959 DO_VEC_RMODE(VCVTMU, FPROUNDING_NEGINF, vcvt_rm_u)
3960 DO_VEC_RMODE(VCVTMS, FPROUNDING_NEGINF, vcvt_rm_s)
3962 DO_VEC_RMODE(VRINTN, FPROUNDING_TIEEVEN, vrint_rm_)
3963 DO_VEC_RMODE(VRINTA, FPROUNDING_TIEAWAY, vrint_rm_)
3964 DO_VEC_RMODE(VRINTZ, FPROUNDING_ZERO, vrint_rm_)
3965 DO_VEC_RMODE(VRINTM, FPROUNDING_NEGINF, vrint_rm_)
3966 DO_VEC_RMODE(VRINTP, FPROUNDING_POSINF, vrint_rm_)
3968 static bool trans_VSWP(DisasContext *s, arg_2misc *a)
3970 TCGv_i64 rm, rd;
3971 int pass;
3973 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3974 return false;
3977 /* UNDEF accesses to D16-D31 if they don't exist. */
3978 if (!dc_isar_feature(aa32_simd_r32, s) &&
3979 ((a->vd | a->vm) & 0x10)) {
3980 return false;
3983 if (a->size != 0) {
3984 return false;
3987 if ((a->vd | a->vm) & a->q) {
3988 return false;
3991 if (!vfp_access_check(s)) {
3992 return true;
3995 rm = tcg_temp_new_i64();
3996 rd = tcg_temp_new_i64();
3997 for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
3998 read_neon_element64(rm, a->vm, pass, MO_64);
3999 read_neon_element64(rd, a->vd, pass, MO_64);
4000 write_neon_element64(rm, a->vd, pass, MO_64);
4001 write_neon_element64(rd, a->vm, pass, MO_64);
4003 tcg_temp_free_i64(rm);
4004 tcg_temp_free_i64(rd);
4006 return true;
4008 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
4010 TCGv_i32 rd, tmp;
4012 rd = tcg_temp_new_i32();
4013 tmp = tcg_temp_new_i32();
4015 tcg_gen_shli_i32(rd, t0, 8);
4016 tcg_gen_andi_i32(rd, rd, 0xff00ff00);
4017 tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
4018 tcg_gen_or_i32(rd, rd, tmp);
4020 tcg_gen_shri_i32(t1, t1, 8);
4021 tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
4022 tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
4023 tcg_gen_or_i32(t1, t1, tmp);
4024 tcg_gen_mov_i32(t0, rd);
4026 tcg_temp_free_i32(tmp);
4027 tcg_temp_free_i32(rd);
4030 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
4032 TCGv_i32 rd, tmp;
4034 rd = tcg_temp_new_i32();
4035 tmp = tcg_temp_new_i32();
4037 tcg_gen_shli_i32(rd, t0, 16);
4038 tcg_gen_andi_i32(tmp, t1, 0xffff);
4039 tcg_gen_or_i32(rd, rd, tmp);
4040 tcg_gen_shri_i32(t1, t1, 16);
4041 tcg_gen_andi_i32(tmp, t0, 0xffff0000);
4042 tcg_gen_or_i32(t1, t1, tmp);
4043 tcg_gen_mov_i32(t0, rd);
4045 tcg_temp_free_i32(tmp);
4046 tcg_temp_free_i32(rd);
4049 static bool trans_VTRN(DisasContext *s, arg_2misc *a)
4051 TCGv_i32 tmp, tmp2;
4052 int pass;
4054 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
4055 return false;
4058 /* UNDEF accesses to D16-D31 if they don't exist. */
4059 if (!dc_isar_feature(aa32_simd_r32, s) &&
4060 ((a->vd | a->vm) & 0x10)) {
4061 return false;
4064 if ((a->vd | a->vm) & a->q) {
4065 return false;
4068 if (a->size == 3) {
4069 return false;
4072 if (!vfp_access_check(s)) {
4073 return true;
4076 tmp = tcg_temp_new_i32();
4077 tmp2 = tcg_temp_new_i32();
4078 if (a->size == MO_32) {
4079 for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) {
4080 read_neon_element32(tmp, a->vm, pass, MO_32);
4081 read_neon_element32(tmp2, a->vd, pass + 1, MO_32);
4082 write_neon_element32(tmp2, a->vm, pass, MO_32);
4083 write_neon_element32(tmp, a->vd, pass + 1, MO_32);
4085 } else {
4086 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
4087 read_neon_element32(tmp, a->vm, pass, MO_32);
4088 read_neon_element32(tmp2, a->vd, pass, MO_32);
4089 if (a->size == MO_8) {
4090 gen_neon_trn_u8(tmp, tmp2);
4091 } else {
4092 gen_neon_trn_u16(tmp, tmp2);
4094 write_neon_element32(tmp2, a->vm, pass, MO_32);
4095 write_neon_element32(tmp, a->vd, pass, MO_32);
4098 tcg_temp_free_i32(tmp);
4099 tcg_temp_free_i32(tmp2);
4100 return true;
4103 static bool trans_VSMMLA(DisasContext *s, arg_VSMMLA *a)
4105 if (!dc_isar_feature(aa32_i8mm, s)) {
4106 return false;
4108 return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
4109 gen_helper_gvec_smmla_b);
4112 static bool trans_VUMMLA(DisasContext *s, arg_VUMMLA *a)
4114 if (!dc_isar_feature(aa32_i8mm, s)) {
4115 return false;
4117 return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
4118 gen_helper_gvec_ummla_b);
4121 static bool trans_VUSMMLA(DisasContext *s, arg_VUSMMLA *a)
4123 if (!dc_isar_feature(aa32_i8mm, s)) {
4124 return false;
4126 return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
4127 gen_helper_gvec_usmmla_b);
4130 static bool trans_VMMLA_b16(DisasContext *s, arg_VMMLA_b16 *a)
4132 if (!dc_isar_feature(aa32_bf16, s)) {
4133 return false;
4135 return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
4136 gen_helper_gvec_bfmmla);
4139 static bool trans_VFMA_b16(DisasContext *s, arg_VFMA_b16 *a)
4141 if (!dc_isar_feature(aa32_bf16, s)) {
4142 return false;
4144 return do_neon_ddda_fpst(s, 7, a->vd, a->vn, a->vm, a->q, FPST_STD,
4145 gen_helper_gvec_bfmlal);
4148 static bool trans_VFMA_b16_scal(DisasContext *s, arg_VFMA_b16_scal *a)
4150 if (!dc_isar_feature(aa32_bf16, s)) {
4151 return false;
4153 return do_neon_ddda_fpst(s, 6, a->vd, a->vn, a->vm,
4154 (a->index << 1) | a->q, FPST_STD,
4155 gen_helper_gvec_bfmlal_idx);