Merge remote-tracking branch 'remotes/kraxel/tags/ui-20210521-pull-request' into...
[qemu/ar7.git] / target / arm / translate-neon.c
blob658bd275dac7a30a958901232baee324f71e20da
1 /*
2 * ARM translation: AArch32 Neon instructions
4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
7 * Copyright (c) 2020 Linaro, Ltd.
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
23 #include "qemu/osdep.h"
24 #include "tcg/tcg-op.h"
25 #include "tcg/tcg-op-gvec.h"
26 #include "exec/exec-all.h"
27 #include "exec/gen-icount.h"
28 #include "translate.h"
29 #include "translate-a32.h"
31 static inline int plus1(DisasContext *s, int x)
33 return x + 1;
36 static inline int rsub_64(DisasContext *s, int x)
38 return 64 - x;
41 static inline int rsub_32(DisasContext *s, int x)
43 return 32 - x;
45 static inline int rsub_16(DisasContext *s, int x)
47 return 16 - x;
49 static inline int rsub_8(DisasContext *s, int x)
51 return 8 - x;
54 static inline int neon_3same_fp_size(DisasContext *s, int x)
56 /* Convert 0==fp32, 1==fp16 into a MO_* value */
57 return MO_32 - x;
60 /* Include the generated Neon decoder */
61 #include "decode-neon-dp.c.inc"
62 #include "decode-neon-ls.c.inc"
63 #include "decode-neon-shared.c.inc"
65 static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
67 TCGv_ptr ret = tcg_temp_new_ptr();
68 tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
69 return ret;
72 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
74 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
76 switch (mop) {
77 case MO_UB:
78 tcg_gen_ld8u_i32(var, cpu_env, offset);
79 break;
80 case MO_UW:
81 tcg_gen_ld16u_i32(var, cpu_env, offset);
82 break;
83 case MO_UL:
84 tcg_gen_ld_i32(var, cpu_env, offset);
85 break;
86 default:
87 g_assert_not_reached();
91 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
93 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
95 switch (mop) {
96 case MO_UB:
97 tcg_gen_ld8u_i64(var, cpu_env, offset);
98 break;
99 case MO_UW:
100 tcg_gen_ld16u_i64(var, cpu_env, offset);
101 break;
102 case MO_UL:
103 tcg_gen_ld32u_i64(var, cpu_env, offset);
104 break;
105 case MO_Q:
106 tcg_gen_ld_i64(var, cpu_env, offset);
107 break;
108 default:
109 g_assert_not_reached();
113 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
115 long offset = neon_element_offset(reg, ele, size);
117 switch (size) {
118 case MO_8:
119 tcg_gen_st8_i32(var, cpu_env, offset);
120 break;
121 case MO_16:
122 tcg_gen_st16_i32(var, cpu_env, offset);
123 break;
124 case MO_32:
125 tcg_gen_st_i32(var, cpu_env, offset);
126 break;
127 default:
128 g_assert_not_reached();
132 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
134 long offset = neon_element_offset(reg, ele, size);
136 switch (size) {
137 case MO_8:
138 tcg_gen_st8_i64(var, cpu_env, offset);
139 break;
140 case MO_16:
141 tcg_gen_st16_i64(var, cpu_env, offset);
142 break;
143 case MO_32:
144 tcg_gen_st32_i64(var, cpu_env, offset);
145 break;
146 case MO_64:
147 tcg_gen_st_i64(var, cpu_env, offset);
148 break;
149 default:
150 g_assert_not_reached();
154 static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
156 int opr_sz;
157 TCGv_ptr fpst;
158 gen_helper_gvec_3_ptr *fn_gvec_ptr;
160 if (!dc_isar_feature(aa32_vcma, s)
161 || (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s))) {
162 return false;
165 /* UNDEF accesses to D16-D31 if they don't exist. */
166 if (!dc_isar_feature(aa32_simd_r32, s) &&
167 ((a->vd | a->vn | a->vm) & 0x10)) {
168 return false;
171 if ((a->vn | a->vm | a->vd) & a->q) {
172 return false;
175 if (!vfp_access_check(s)) {
176 return true;
179 opr_sz = (1 + a->q) * 8;
180 fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
181 fn_gvec_ptr = (a->size == MO_16) ?
182 gen_helper_gvec_fcmlah : gen_helper_gvec_fcmlas;
183 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
184 vfp_reg_offset(1, a->vn),
185 vfp_reg_offset(1, a->vm),
186 fpst, opr_sz, opr_sz, a->rot,
187 fn_gvec_ptr);
188 tcg_temp_free_ptr(fpst);
189 return true;
192 static bool trans_VCADD(DisasContext *s, arg_VCADD *a)
194 int opr_sz;
195 TCGv_ptr fpst;
196 gen_helper_gvec_3_ptr *fn_gvec_ptr;
198 if (!dc_isar_feature(aa32_vcma, s)
199 || (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s))) {
200 return false;
203 /* UNDEF accesses to D16-D31 if they don't exist. */
204 if (!dc_isar_feature(aa32_simd_r32, s) &&
205 ((a->vd | a->vn | a->vm) & 0x10)) {
206 return false;
209 if ((a->vn | a->vm | a->vd) & a->q) {
210 return false;
213 if (!vfp_access_check(s)) {
214 return true;
217 opr_sz = (1 + a->q) * 8;
218 fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
219 fn_gvec_ptr = (a->size == MO_16) ?
220 gen_helper_gvec_fcaddh : gen_helper_gvec_fcadds;
221 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
222 vfp_reg_offset(1, a->vn),
223 vfp_reg_offset(1, a->vm),
224 fpst, opr_sz, opr_sz, a->rot,
225 fn_gvec_ptr);
226 tcg_temp_free_ptr(fpst);
227 return true;
230 static bool trans_VDOT(DisasContext *s, arg_VDOT *a)
232 int opr_sz;
233 gen_helper_gvec_3 *fn_gvec;
235 if (!dc_isar_feature(aa32_dp, s)) {
236 return false;
239 /* UNDEF accesses to D16-D31 if they don't exist. */
240 if (!dc_isar_feature(aa32_simd_r32, s) &&
241 ((a->vd | a->vn | a->vm) & 0x10)) {
242 return false;
245 if ((a->vn | a->vm | a->vd) & a->q) {
246 return false;
249 if (!vfp_access_check(s)) {
250 return true;
253 opr_sz = (1 + a->q) * 8;
254 fn_gvec = a->u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
255 tcg_gen_gvec_3_ool(vfp_reg_offset(1, a->vd),
256 vfp_reg_offset(1, a->vn),
257 vfp_reg_offset(1, a->vm),
258 opr_sz, opr_sz, 0, fn_gvec);
259 return true;
262 static bool trans_VFML(DisasContext *s, arg_VFML *a)
264 int opr_sz;
266 if (!dc_isar_feature(aa32_fhm, s)) {
267 return false;
270 /* UNDEF accesses to D16-D31 if they don't exist. */
271 if (!dc_isar_feature(aa32_simd_r32, s) &&
272 (a->vd & 0x10)) {
273 return false;
276 if (a->vd & a->q) {
277 return false;
280 if (!vfp_access_check(s)) {
281 return true;
284 opr_sz = (1 + a->q) * 8;
285 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
286 vfp_reg_offset(a->q, a->vn),
287 vfp_reg_offset(a->q, a->vm),
288 cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */
289 gen_helper_gvec_fmlal_a32);
290 return true;
293 static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a)
295 gen_helper_gvec_3_ptr *fn_gvec_ptr;
296 int opr_sz;
297 TCGv_ptr fpst;
299 if (!dc_isar_feature(aa32_vcma, s)) {
300 return false;
302 if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) {
303 return false;
306 /* UNDEF accesses to D16-D31 if they don't exist. */
307 if (!dc_isar_feature(aa32_simd_r32, s) &&
308 ((a->vd | a->vn | a->vm) & 0x10)) {
309 return false;
312 if ((a->vd | a->vn) & a->q) {
313 return false;
316 if (!vfp_access_check(s)) {
317 return true;
320 fn_gvec_ptr = (a->size == MO_16) ?
321 gen_helper_gvec_fcmlah_idx : gen_helper_gvec_fcmlas_idx;
322 opr_sz = (1 + a->q) * 8;
323 fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
324 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
325 vfp_reg_offset(1, a->vn),
326 vfp_reg_offset(1, a->vm),
327 fpst, opr_sz, opr_sz,
328 (a->index << 2) | a->rot, fn_gvec_ptr);
329 tcg_temp_free_ptr(fpst);
330 return true;
333 static bool trans_VDOT_scalar(DisasContext *s, arg_VDOT_scalar *a)
335 gen_helper_gvec_3 *fn_gvec;
336 int opr_sz;
337 TCGv_ptr fpst;
339 if (!dc_isar_feature(aa32_dp, s)) {
340 return false;
343 /* UNDEF accesses to D16-D31 if they don't exist. */
344 if (!dc_isar_feature(aa32_simd_r32, s) &&
345 ((a->vd | a->vn) & 0x10)) {
346 return false;
349 if ((a->vd | a->vn) & a->q) {
350 return false;
353 if (!vfp_access_check(s)) {
354 return true;
357 fn_gvec = a->u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
358 opr_sz = (1 + a->q) * 8;
359 fpst = fpstatus_ptr(FPST_STD);
360 tcg_gen_gvec_3_ool(vfp_reg_offset(1, a->vd),
361 vfp_reg_offset(1, a->vn),
362 vfp_reg_offset(1, a->rm),
363 opr_sz, opr_sz, a->index, fn_gvec);
364 tcg_temp_free_ptr(fpst);
365 return true;
368 static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
370 int opr_sz;
372 if (!dc_isar_feature(aa32_fhm, s)) {
373 return false;
376 /* UNDEF accesses to D16-D31 if they don't exist. */
377 if (!dc_isar_feature(aa32_simd_r32, s) &&
378 ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) {
379 return false;
382 if (a->vd & a->q) {
383 return false;
386 if (!vfp_access_check(s)) {
387 return true;
390 opr_sz = (1 + a->q) * 8;
391 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
392 vfp_reg_offset(a->q, a->vn),
393 vfp_reg_offset(a->q, a->rm),
394 cpu_env, opr_sz, opr_sz,
395 (a->index << 2) | a->s, /* is_2 == 0 */
396 gen_helper_gvec_fmlal_idx_a32);
397 return true;
400 static struct {
401 int nregs;
402 int interleave;
403 int spacing;
404 } const neon_ls_element_type[11] = {
405 {1, 4, 1},
406 {1, 4, 2},
407 {4, 1, 1},
408 {2, 2, 2},
409 {1, 3, 1},
410 {1, 3, 2},
411 {3, 1, 1},
412 {1, 1, 1},
413 {1, 2, 1},
414 {1, 2, 2},
415 {2, 1, 1}
418 static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn,
419 int stride)
421 if (rm != 15) {
422 TCGv_i32 base;
424 base = load_reg(s, rn);
425 if (rm == 13) {
426 tcg_gen_addi_i32(base, base, stride);
427 } else {
428 TCGv_i32 index;
429 index = load_reg(s, rm);
430 tcg_gen_add_i32(base, base, index);
431 tcg_temp_free_i32(index);
433 store_reg(s, rn, base);
437 static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
439 /* Neon load/store multiple structures */
440 int nregs, interleave, spacing, reg, n;
441 MemOp mop, align, endian;
442 int mmu_idx = get_mem_index(s);
443 int size = a->size;
444 TCGv_i64 tmp64;
445 TCGv_i32 addr, tmp;
447 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
448 return false;
451 /* UNDEF accesses to D16-D31 if they don't exist */
452 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
453 return false;
455 if (a->itype > 10) {
456 return false;
458 /* Catch UNDEF cases for bad values of align field */
459 switch (a->itype & 0xc) {
460 case 4:
461 if (a->align >= 2) {
462 return false;
464 break;
465 case 8:
466 if (a->align == 3) {
467 return false;
469 break;
470 default:
471 break;
473 nregs = neon_ls_element_type[a->itype].nregs;
474 interleave = neon_ls_element_type[a->itype].interleave;
475 spacing = neon_ls_element_type[a->itype].spacing;
476 if (size == 3 && (interleave | spacing) != 1) {
477 return false;
480 if (!vfp_access_check(s)) {
481 return true;
484 /* For our purposes, bytes are always little-endian. */
485 endian = s->be_data;
486 if (size == 0) {
487 endian = MO_LE;
490 /* Enforce alignment requested by the instruction */
491 if (a->align) {
492 align = pow2_align(a->align + 2); /* 4 ** a->align */
493 } else {
494 align = s->align_mem ? MO_ALIGN : 0;
498 * Consecutive little-endian elements from a single register
499 * can be promoted to a larger little-endian operation.
501 if (interleave == 1 && endian == MO_LE) {
502 /* Retain any natural alignment. */
503 if (align == MO_ALIGN) {
504 align = pow2_align(size);
506 size = 3;
509 tmp64 = tcg_temp_new_i64();
510 addr = tcg_temp_new_i32();
511 tmp = tcg_const_i32(1 << size);
512 load_reg_var(s, addr, a->rn);
514 mop = endian | size | align;
515 for (reg = 0; reg < nregs; reg++) {
516 for (n = 0; n < 8 >> size; n++) {
517 int xs;
518 for (xs = 0; xs < interleave; xs++) {
519 int tt = a->vd + reg + spacing * xs;
521 if (a->l) {
522 gen_aa32_ld_internal_i64(s, tmp64, addr, mmu_idx, mop);
523 neon_store_element64(tt, n, size, tmp64);
524 } else {
525 neon_load_element64(tmp64, tt, n, size);
526 gen_aa32_st_internal_i64(s, tmp64, addr, mmu_idx, mop);
528 tcg_gen_add_i32(addr, addr, tmp);
530 /* Subsequent memory operations inherit alignment */
531 mop &= ~MO_AMASK;
535 tcg_temp_free_i32(addr);
536 tcg_temp_free_i32(tmp);
537 tcg_temp_free_i64(tmp64);
539 gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
540 return true;
543 static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
545 /* Neon load single structure to all lanes */
546 int reg, stride, vec_size;
547 int vd = a->vd;
548 int size = a->size;
549 int nregs = a->n + 1;
550 TCGv_i32 addr, tmp;
551 MemOp mop, align;
553 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
554 return false;
557 /* UNDEF accesses to D16-D31 if they don't exist */
558 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
559 return false;
562 align = 0;
563 if (size == 3) {
564 if (nregs != 4 || a->a == 0) {
565 return false;
567 /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */
568 size = MO_32;
569 align = MO_ALIGN_16;
570 } else if (a->a) {
571 switch (nregs) {
572 case 1:
573 if (size == 0) {
574 return false;
576 align = MO_ALIGN;
577 break;
578 case 2:
579 align = pow2_align(size + 1);
580 break;
581 case 3:
582 return false;
583 case 4:
584 align = pow2_align(size + 2);
585 break;
586 default:
587 g_assert_not_reached();
591 if (!vfp_access_check(s)) {
592 return true;
596 * VLD1 to all lanes: T bit indicates how many Dregs to write.
597 * VLD2/3/4 to all lanes: T bit indicates register stride.
599 stride = a->t ? 2 : 1;
600 vec_size = nregs == 1 ? stride * 8 : 8;
601 mop = size | align;
602 tmp = tcg_temp_new_i32();
603 addr = tcg_temp_new_i32();
604 load_reg_var(s, addr, a->rn);
605 for (reg = 0; reg < nregs; reg++) {
606 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop);
607 if ((vd & 1) && vec_size == 16) {
609 * We cannot write 16 bytes at once because the
610 * destination is unaligned.
612 tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
613 8, 8, tmp);
614 tcg_gen_gvec_mov(0, neon_full_reg_offset(vd + 1),
615 neon_full_reg_offset(vd), 8, 8);
616 } else {
617 tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
618 vec_size, vec_size, tmp);
620 tcg_gen_addi_i32(addr, addr, 1 << size);
621 vd += stride;
623 /* Subsequent memory operations inherit alignment */
624 mop &= ~MO_AMASK;
626 tcg_temp_free_i32(tmp);
627 tcg_temp_free_i32(addr);
629 gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs);
631 return true;
634 static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
636 /* Neon load/store single structure to one lane */
637 int reg;
638 int nregs = a->n + 1;
639 int vd = a->vd;
640 TCGv_i32 addr, tmp;
641 MemOp mop;
643 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
644 return false;
647 /* UNDEF accesses to D16-D31 if they don't exist */
648 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
649 return false;
652 /* Catch the UNDEF cases. This is unavoidably a bit messy. */
653 switch (nregs) {
654 case 1:
655 if (((a->align & (1 << a->size)) != 0) ||
656 (a->size == 2 && (a->align == 1 || a->align == 2))) {
657 return false;
659 break;
660 case 3:
661 if ((a->align & 1) != 0) {
662 return false;
664 /* fall through */
665 case 2:
666 if (a->size == 2 && (a->align & 2) != 0) {
667 return false;
669 break;
670 case 4:
671 if (a->size == 2 && a->align == 3) {
672 return false;
674 break;
675 default:
676 abort();
678 if ((vd + a->stride * (nregs - 1)) > 31) {
680 * Attempts to write off the end of the register file are
681 * UNPREDICTABLE; we choose to UNDEF because otherwise we would
682 * access off the end of the array that holds the register data.
684 return false;
687 if (!vfp_access_check(s)) {
688 return true;
691 /* Pick up SCTLR settings */
692 mop = finalize_memop(s, a->size);
694 if (a->align) {
695 MemOp align_op;
697 switch (nregs) {
698 case 1:
699 /* For VLD1, use natural alignment. */
700 align_op = MO_ALIGN;
701 break;
702 case 2:
703 /* For VLD2, use double alignment. */
704 align_op = pow2_align(a->size + 1);
705 break;
706 case 4:
707 if (a->size == MO_32) {
709 * For VLD4.32, align = 1 is double alignment, align = 2 is
710 * quad alignment; align = 3 is rejected above.
712 align_op = pow2_align(a->size + a->align);
713 } else {
714 /* For VLD4.8 and VLD.16, we want quad alignment. */
715 align_op = pow2_align(a->size + 2);
717 break;
718 default:
719 /* For VLD3, the alignment field is zero and rejected above. */
720 g_assert_not_reached();
723 mop = (mop & ~MO_AMASK) | align_op;
726 tmp = tcg_temp_new_i32();
727 addr = tcg_temp_new_i32();
728 load_reg_var(s, addr, a->rn);
730 for (reg = 0; reg < nregs; reg++) {
731 if (a->l) {
732 gen_aa32_ld_internal_i32(s, tmp, addr, get_mem_index(s), mop);
733 neon_store_element(vd, a->reg_idx, a->size, tmp);
734 } else { /* Store */
735 neon_load_element(tmp, vd, a->reg_idx, a->size);
736 gen_aa32_st_internal_i32(s, tmp, addr, get_mem_index(s), mop);
738 vd += a->stride;
739 tcg_gen_addi_i32(addr, addr, 1 << a->size);
741 /* Subsequent memory operations inherit alignment */
742 mop &= ~MO_AMASK;
744 tcg_temp_free_i32(addr);
745 tcg_temp_free_i32(tmp);
747 gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs);
749 return true;
752 static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn)
754 int vec_size = a->q ? 16 : 8;
755 int rd_ofs = neon_full_reg_offset(a->vd);
756 int rn_ofs = neon_full_reg_offset(a->vn);
757 int rm_ofs = neon_full_reg_offset(a->vm);
759 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
760 return false;
763 /* UNDEF accesses to D16-D31 if they don't exist. */
764 if (!dc_isar_feature(aa32_simd_r32, s) &&
765 ((a->vd | a->vn | a->vm) & 0x10)) {
766 return false;
769 if ((a->vn | a->vm | a->vd) & a->q) {
770 return false;
773 if (!vfp_access_check(s)) {
774 return true;
777 fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
778 return true;
781 #define DO_3SAME(INSN, FUNC) \
782 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
784 return do_3same(s, a, FUNC); \
787 DO_3SAME(VADD, tcg_gen_gvec_add)
788 DO_3SAME(VSUB, tcg_gen_gvec_sub)
789 DO_3SAME(VAND, tcg_gen_gvec_and)
790 DO_3SAME(VBIC, tcg_gen_gvec_andc)
791 DO_3SAME(VORR, tcg_gen_gvec_or)
792 DO_3SAME(VORN, tcg_gen_gvec_orc)
793 DO_3SAME(VEOR, tcg_gen_gvec_xor)
794 DO_3SAME(VSHL_S, gen_gvec_sshl)
795 DO_3SAME(VSHL_U, gen_gvec_ushl)
796 DO_3SAME(VQADD_S, gen_gvec_sqadd_qc)
797 DO_3SAME(VQADD_U, gen_gvec_uqadd_qc)
798 DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc)
799 DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc)
801 /* These insns are all gvec_bitsel but with the inputs in various orders. */
802 #define DO_3SAME_BITSEL(INSN, O1, O2, O3) \
803 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
804 uint32_t rn_ofs, uint32_t rm_ofs, \
805 uint32_t oprsz, uint32_t maxsz) \
807 tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz); \
809 DO_3SAME(INSN, gen_##INSN##_3s)
811 DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs)
812 DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs)
813 DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs)
815 #define DO_3SAME_NO_SZ_3(INSN, FUNC) \
816 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
818 if (a->size == 3) { \
819 return false; \
821 return do_3same(s, a, FUNC); \
824 DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax)
825 DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
826 DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
827 DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
828 DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
829 DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla)
830 DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls)
831 DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst)
832 DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd)
833 DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba)
834 DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd)
835 DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba)
837 #define DO_3SAME_CMP(INSN, COND) \
838 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
839 uint32_t rn_ofs, uint32_t rm_ofs, \
840 uint32_t oprsz, uint32_t maxsz) \
842 tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \
844 DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
846 DO_3SAME_CMP(VCGT_S, TCG_COND_GT)
847 DO_3SAME_CMP(VCGT_U, TCG_COND_GTU)
848 DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
849 DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
850 DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
852 #define WRAP_OOL_FN(WRAPNAME, FUNC) \
853 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, \
854 uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz) \
856 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \
859 WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b)
861 static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
863 if (a->size != 0) {
864 return false;
866 return do_3same(s, a, gen_VMUL_p_3s);
869 #define DO_VQRDMLAH(INSN, FUNC) \
870 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
872 if (!dc_isar_feature(aa32_rdm, s)) { \
873 return false; \
875 if (a->size != 1 && a->size != 2) { \
876 return false; \
878 return do_3same(s, a, FUNC); \
881 DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc)
882 DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc)
884 #define DO_SHA1(NAME, FUNC) \
885 WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \
886 static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \
888 if (!dc_isar_feature(aa32_sha1, s)) { \
889 return false; \
891 return do_3same(s, a, gen_##NAME##_3s); \
894 DO_SHA1(SHA1C, gen_helper_crypto_sha1c)
895 DO_SHA1(SHA1P, gen_helper_crypto_sha1p)
896 DO_SHA1(SHA1M, gen_helper_crypto_sha1m)
897 DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0)
899 #define DO_SHA2(NAME, FUNC) \
900 WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \
901 static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \
903 if (!dc_isar_feature(aa32_sha2, s)) { \
904 return false; \
906 return do_3same(s, a, gen_##NAME##_3s); \
909 DO_SHA2(SHA256H, gen_helper_crypto_sha256h)
910 DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2)
911 DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1)
913 #define DO_3SAME_64(INSN, FUNC) \
914 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
915 uint32_t rn_ofs, uint32_t rm_ofs, \
916 uint32_t oprsz, uint32_t maxsz) \
918 static const GVecGen3 op = { .fni8 = FUNC }; \
919 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op); \
921 DO_3SAME(INSN, gen_##INSN##_3s)
923 #define DO_3SAME_64_ENV(INSN, FUNC) \
924 static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) \
926 FUNC(d, cpu_env, n, m); \
928 DO_3SAME_64(INSN, gen_##INSN##_elt)
930 DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64)
931 DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64)
932 DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64)
933 DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64)
934 DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64)
935 DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
937 #define DO_3SAME_32(INSN, FUNC) \
938 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
939 uint32_t rn_ofs, uint32_t rm_ofs, \
940 uint32_t oprsz, uint32_t maxsz) \
942 static const GVecGen3 ops[4] = { \
943 { .fni4 = gen_helper_neon_##FUNC##8 }, \
944 { .fni4 = gen_helper_neon_##FUNC##16 }, \
945 { .fni4 = gen_helper_neon_##FUNC##32 }, \
946 { 0 }, \
947 }; \
948 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
950 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
952 if (a->size > 2) { \
953 return false; \
955 return do_3same(s, a, gen_##INSN##_3s); \
959 * Some helper functions need to be passed the cpu_env. In order
960 * to use those with the gvec APIs like tcg_gen_gvec_3() we need
961 * to create wrapper functions whose prototype is a NeonGenTwoOpFn()
962 * and which call a NeonGenTwoOpEnvFn().
964 #define WRAP_ENV_FN(WRAPNAME, FUNC) \
965 static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m) \
967 FUNC(d, cpu_env, n, m); \
970 #define DO_3SAME_32_ENV(INSN, FUNC) \
971 WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8); \
972 WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16); \
973 WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32); \
974 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
975 uint32_t rn_ofs, uint32_t rm_ofs, \
976 uint32_t oprsz, uint32_t maxsz) \
978 static const GVecGen3 ops[4] = { \
979 { .fni4 = gen_##INSN##_tramp8 }, \
980 { .fni4 = gen_##INSN##_tramp16 }, \
981 { .fni4 = gen_##INSN##_tramp32 }, \
982 { 0 }, \
983 }; \
984 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
986 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
988 if (a->size > 2) { \
989 return false; \
991 return do_3same(s, a, gen_##INSN##_3s); \
994 DO_3SAME_32(VHADD_S, hadd_s)
995 DO_3SAME_32(VHADD_U, hadd_u)
996 DO_3SAME_32(VHSUB_S, hsub_s)
997 DO_3SAME_32(VHSUB_U, hsub_u)
998 DO_3SAME_32(VRHADD_S, rhadd_s)
999 DO_3SAME_32(VRHADD_U, rhadd_u)
1000 DO_3SAME_32(VRSHL_S, rshl_s)
1001 DO_3SAME_32(VRSHL_U, rshl_u)
1003 DO_3SAME_32_ENV(VQSHL_S, qshl_s)
1004 DO_3SAME_32_ENV(VQSHL_U, qshl_u)
1005 DO_3SAME_32_ENV(VQRSHL_S, qrshl_s)
1006 DO_3SAME_32_ENV(VQRSHL_U, qrshl_u)
1008 static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
1010 /* Operations handled pairwise 32 bits at a time */
1011 TCGv_i32 tmp, tmp2, tmp3;
1013 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1014 return false;
1017 /* UNDEF accesses to D16-D31 if they don't exist. */
1018 if (!dc_isar_feature(aa32_simd_r32, s) &&
1019 ((a->vd | a->vn | a->vm) & 0x10)) {
1020 return false;
1023 if (a->size == 3) {
1024 return false;
1027 if (!vfp_access_check(s)) {
1028 return true;
1031 assert(a->q == 0); /* enforced by decode patterns */
1034 * Note that we have to be careful not to clobber the source operands
1035 * in the "vm == vd" case by storing the result of the first pass too
1036 * early. Since Q is 0 there are always just two passes, so instead
1037 * of a complicated loop over each pass we just unroll.
1039 tmp = tcg_temp_new_i32();
1040 tmp2 = tcg_temp_new_i32();
1041 tmp3 = tcg_temp_new_i32();
1043 read_neon_element32(tmp, a->vn, 0, MO_32);
1044 read_neon_element32(tmp2, a->vn, 1, MO_32);
1045 fn(tmp, tmp, tmp2);
1047 read_neon_element32(tmp3, a->vm, 0, MO_32);
1048 read_neon_element32(tmp2, a->vm, 1, MO_32);
1049 fn(tmp3, tmp3, tmp2);
1051 write_neon_element32(tmp, a->vd, 0, MO_32);
1052 write_neon_element32(tmp3, a->vd, 1, MO_32);
1054 tcg_temp_free_i32(tmp);
1055 tcg_temp_free_i32(tmp2);
1056 tcg_temp_free_i32(tmp3);
1057 return true;
1060 #define DO_3SAME_PAIR(INSN, func) \
1061 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
1063 static NeonGenTwoOpFn * const fns[] = { \
1064 gen_helper_neon_##func##8, \
1065 gen_helper_neon_##func##16, \
1066 gen_helper_neon_##func##32, \
1067 }; \
1068 if (a->size > 2) { \
1069 return false; \
1071 return do_3same_pair(s, a, fns[a->size]); \
1074 /* 32-bit pairwise ops end up the same as the elementwise versions. */
1075 #define gen_helper_neon_pmax_s32 tcg_gen_smax_i32
1076 #define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
1077 #define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
1078 #define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
1079 #define gen_helper_neon_padd_u32 tcg_gen_add_i32
1081 DO_3SAME_PAIR(VPMAX_S, pmax_s)
1082 DO_3SAME_PAIR(VPMIN_S, pmin_s)
1083 DO_3SAME_PAIR(VPMAX_U, pmax_u)
1084 DO_3SAME_PAIR(VPMIN_U, pmin_u)
1085 DO_3SAME_PAIR(VPADD, padd_u)
1087 #define DO_3SAME_VQDMULH(INSN, FUNC) \
1088 WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16); \
1089 WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32); \
1090 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
1091 uint32_t rn_ofs, uint32_t rm_ofs, \
1092 uint32_t oprsz, uint32_t maxsz) \
1094 static const GVecGen3 ops[2] = { \
1095 { .fni4 = gen_##INSN##_tramp16 }, \
1096 { .fni4 = gen_##INSN##_tramp32 }, \
1097 }; \
1098 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \
1100 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
1102 if (a->size != 1 && a->size != 2) { \
1103 return false; \
1105 return do_3same(s, a, gen_##INSN##_3s); \
1108 DO_3SAME_VQDMULH(VQDMULH, qdmulh)
1109 DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
1111 #define WRAP_FP_GVEC(WRAPNAME, FPST, FUNC) \
1112 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
1113 uint32_t rn_ofs, uint32_t rm_ofs, \
1114 uint32_t oprsz, uint32_t maxsz) \
1116 TCGv_ptr fpst = fpstatus_ptr(FPST); \
1117 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst, \
1118 oprsz, maxsz, 0, FUNC); \
1119 tcg_temp_free_ptr(fpst); \
1122 #define DO_3S_FP_GVEC(INSN,SFUNC,HFUNC) \
1123 WRAP_FP_GVEC(gen_##INSN##_fp32_3s, FPST_STD, SFUNC) \
1124 WRAP_FP_GVEC(gen_##INSN##_fp16_3s, FPST_STD_F16, HFUNC) \
1125 static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
1127 if (a->size == MO_16) { \
1128 if (!dc_isar_feature(aa32_fp16_arith, s)) { \
1129 return false; \
1131 return do_3same(s, a, gen_##INSN##_fp16_3s); \
1133 return do_3same(s, a, gen_##INSN##_fp32_3s); \
1137 DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s, gen_helper_gvec_fadd_h)
1138 DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s, gen_helper_gvec_fsub_h)
1139 DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s, gen_helper_gvec_fabd_h)
1140 DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s, gen_helper_gvec_fmul_h)
1141 DO_3S_FP_GVEC(VCEQ, gen_helper_gvec_fceq_s, gen_helper_gvec_fceq_h)
1142 DO_3S_FP_GVEC(VCGE, gen_helper_gvec_fcge_s, gen_helper_gvec_fcge_h)
1143 DO_3S_FP_GVEC(VCGT, gen_helper_gvec_fcgt_s, gen_helper_gvec_fcgt_h)
1144 DO_3S_FP_GVEC(VACGE, gen_helper_gvec_facge_s, gen_helper_gvec_facge_h)
1145 DO_3S_FP_GVEC(VACGT, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_h)
1146 DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h)
1147 DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h)
1148 DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h)
1149 DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h)
1150 DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h)
1151 DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h)
1152 DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h)
1153 DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h)
1155 WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s)
1156 WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h)
1157 WRAP_FP_GVEC(gen_VMINNM_fp32_3s, FPST_STD, gen_helper_gvec_fminnum_s)
1158 WRAP_FP_GVEC(gen_VMINNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fminnum_h)
1160 static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a)
1162 if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
1163 return false;
1166 if (a->size == MO_16) {
1167 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1168 return false;
1170 return do_3same(s, a, gen_VMAXNM_fp16_3s);
1172 return do_3same(s, a, gen_VMAXNM_fp32_3s);
1175 static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
1177 if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
1178 return false;
1181 if (a->size == MO_16) {
1182 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1183 return false;
1185 return do_3same(s, a, gen_VMINNM_fp16_3s);
1187 return do_3same(s, a, gen_VMINNM_fp32_3s);
1190 static bool do_3same_fp_pair(DisasContext *s, arg_3same *a,
1191 gen_helper_gvec_3_ptr *fn)
1193 /* FP pairwise operations */
1194 TCGv_ptr fpstatus;
1196 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1197 return false;
1200 /* UNDEF accesses to D16-D31 if they don't exist. */
1201 if (!dc_isar_feature(aa32_simd_r32, s) &&
1202 ((a->vd | a->vn | a->vm) & 0x10)) {
1203 return false;
1206 if (!vfp_access_check(s)) {
1207 return true;
1210 assert(a->q == 0); /* enforced by decode patterns */
1213 fpstatus = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
1214 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
1215 vfp_reg_offset(1, a->vn),
1216 vfp_reg_offset(1, a->vm),
1217 fpstatus, 8, 8, 0, fn);
1218 tcg_temp_free_ptr(fpstatus);
1220 return true;
1224 * For all the functions using this macro, size == 1 means fp16,
1225 * which is an architecture extension we don't implement yet.
1227 #define DO_3S_FP_PAIR(INSN,FUNC) \
1228 static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
1230 if (a->size == MO_16) { \
1231 if (!dc_isar_feature(aa32_fp16_arith, s)) { \
1232 return false; \
1234 return do_3same_fp_pair(s, a, FUNC##h); \
1236 return do_3same_fp_pair(s, a, FUNC##s); \
1239 DO_3S_FP_PAIR(VPADD, gen_helper_neon_padd)
1240 DO_3S_FP_PAIR(VPMAX, gen_helper_neon_pmax)
1241 DO_3S_FP_PAIR(VPMIN, gen_helper_neon_pmin)
1243 static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
1245 /* Handle a 2-reg-shift insn which can be vectorized. */
1246 int vec_size = a->q ? 16 : 8;
1247 int rd_ofs = neon_full_reg_offset(a->vd);
1248 int rm_ofs = neon_full_reg_offset(a->vm);
1250 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1251 return false;
1254 /* UNDEF accesses to D16-D31 if they don't exist. */
1255 if (!dc_isar_feature(aa32_simd_r32, s) &&
1256 ((a->vd | a->vm) & 0x10)) {
1257 return false;
1260 if ((a->vm | a->vd) & a->q) {
1261 return false;
1264 if (!vfp_access_check(s)) {
1265 return true;
1268 fn(a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size);
1269 return true;
1272 #define DO_2SH(INSN, FUNC) \
1273 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1275 return do_vector_2sh(s, a, FUNC); \
1278 DO_2SH(VSHL, tcg_gen_gvec_shli)
1279 DO_2SH(VSLI, gen_gvec_sli)
1280 DO_2SH(VSRI, gen_gvec_sri)
1281 DO_2SH(VSRA_S, gen_gvec_ssra)
1282 DO_2SH(VSRA_U, gen_gvec_usra)
1283 DO_2SH(VRSHR_S, gen_gvec_srshr)
1284 DO_2SH(VRSHR_U, gen_gvec_urshr)
1285 DO_2SH(VRSRA_S, gen_gvec_srsra)
1286 DO_2SH(VRSRA_U, gen_gvec_ursra)
1288 static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a)
1290 /* Signed shift out of range results in all-sign-bits */
1291 a->shift = MIN(a->shift, (8 << a->size) - 1);
1292 return do_vector_2sh(s, a, tcg_gen_gvec_sari);
1295 static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
1296 int64_t shift, uint32_t oprsz, uint32_t maxsz)
1298 tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0);
1301 static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a)
1303 /* Shift out of range is architecturally valid and results in zero. */
1304 if (a->shift >= (8 << a->size)) {
1305 return do_vector_2sh(s, a, gen_zero_rd_2sh);
1306 } else {
1307 return do_vector_2sh(s, a, tcg_gen_gvec_shri);
1311 static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
1312 NeonGenTwo64OpEnvFn *fn)
1315 * 2-reg-and-shift operations, size == 3 case, where the
1316 * function needs to be passed cpu_env.
1318 TCGv_i64 constimm;
1319 int pass;
1321 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1322 return false;
1325 /* UNDEF accesses to D16-D31 if they don't exist. */
1326 if (!dc_isar_feature(aa32_simd_r32, s) &&
1327 ((a->vd | a->vm) & 0x10)) {
1328 return false;
1331 if ((a->vm | a->vd) & a->q) {
1332 return false;
1335 if (!vfp_access_check(s)) {
1336 return true;
1340 * To avoid excessive duplication of ops we implement shift
1341 * by immediate using the variable shift operations.
1343 constimm = tcg_const_i64(dup_const(a->size, a->shift));
1345 for (pass = 0; pass < a->q + 1; pass++) {
1346 TCGv_i64 tmp = tcg_temp_new_i64();
1348 read_neon_element64(tmp, a->vm, pass, MO_64);
1349 fn(tmp, cpu_env, tmp, constimm);
1350 write_neon_element64(tmp, a->vd, pass, MO_64);
1351 tcg_temp_free_i64(tmp);
1353 tcg_temp_free_i64(constimm);
1354 return true;
1357 static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
1358 NeonGenTwoOpEnvFn *fn)
1361 * 2-reg-and-shift operations, size < 3 case, where the
1362 * helper needs to be passed cpu_env.
1364 TCGv_i32 constimm, tmp;
1365 int pass;
1367 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1368 return false;
1371 /* UNDEF accesses to D16-D31 if they don't exist. */
1372 if (!dc_isar_feature(aa32_simd_r32, s) &&
1373 ((a->vd | a->vm) & 0x10)) {
1374 return false;
1377 if ((a->vm | a->vd) & a->q) {
1378 return false;
1381 if (!vfp_access_check(s)) {
1382 return true;
1386 * To avoid excessive duplication of ops we implement shift
1387 * by immediate using the variable shift operations.
1389 constimm = tcg_const_i32(dup_const(a->size, a->shift));
1390 tmp = tcg_temp_new_i32();
1392 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
1393 read_neon_element32(tmp, a->vm, pass, MO_32);
1394 fn(tmp, cpu_env, tmp, constimm);
1395 write_neon_element32(tmp, a->vd, pass, MO_32);
1397 tcg_temp_free_i32(tmp);
1398 tcg_temp_free_i32(constimm);
1399 return true;
1402 #define DO_2SHIFT_ENV(INSN, FUNC) \
1403 static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \
1405 return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64); \
1407 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1409 static NeonGenTwoOpEnvFn * const fns[] = { \
1410 gen_helper_neon_##FUNC##8, \
1411 gen_helper_neon_##FUNC##16, \
1412 gen_helper_neon_##FUNC##32, \
1413 }; \
1414 assert(a->size < ARRAY_SIZE(fns)); \
1415 return do_2shift_env_32(s, a, fns[a->size]); \
1418 DO_2SHIFT_ENV(VQSHLU, qshlu_s)
1419 DO_2SHIFT_ENV(VQSHL_U, qshl_u)
1420 DO_2SHIFT_ENV(VQSHL_S, qshl_s)
1422 static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
1423 NeonGenTwo64OpFn *shiftfn,
1424 NeonGenNarrowEnvFn *narrowfn)
1426 /* 2-reg-and-shift narrowing-shift operations, size == 3 case */
1427 TCGv_i64 constimm, rm1, rm2;
1428 TCGv_i32 rd;
1430 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1431 return false;
1434 /* UNDEF accesses to D16-D31 if they don't exist. */
1435 if (!dc_isar_feature(aa32_simd_r32, s) &&
1436 ((a->vd | a->vm) & 0x10)) {
1437 return false;
1440 if (a->vm & 1) {
1441 return false;
1444 if (!vfp_access_check(s)) {
1445 return true;
1449 * This is always a right shift, and the shiftfn is always a
1450 * left-shift helper, which thus needs the negated shift count.
1452 constimm = tcg_const_i64(-a->shift);
1453 rm1 = tcg_temp_new_i64();
1454 rm2 = tcg_temp_new_i64();
1455 rd = tcg_temp_new_i32();
1457 /* Load both inputs first to avoid potential overwrite if rm == rd */
1458 read_neon_element64(rm1, a->vm, 0, MO_64);
1459 read_neon_element64(rm2, a->vm, 1, MO_64);
1461 shiftfn(rm1, rm1, constimm);
1462 narrowfn(rd, cpu_env, rm1);
1463 write_neon_element32(rd, a->vd, 0, MO_32);
1465 shiftfn(rm2, rm2, constimm);
1466 narrowfn(rd, cpu_env, rm2);
1467 write_neon_element32(rd, a->vd, 1, MO_32);
1469 tcg_temp_free_i32(rd);
1470 tcg_temp_free_i64(rm1);
1471 tcg_temp_free_i64(rm2);
1472 tcg_temp_free_i64(constimm);
1474 return true;
1477 static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
1478 NeonGenTwoOpFn *shiftfn,
1479 NeonGenNarrowEnvFn *narrowfn)
1481 /* 2-reg-and-shift narrowing-shift operations, size < 3 case */
1482 TCGv_i32 constimm, rm1, rm2, rm3, rm4;
1483 TCGv_i64 rtmp;
1484 uint32_t imm;
1486 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1487 return false;
1490 /* UNDEF accesses to D16-D31 if they don't exist. */
1491 if (!dc_isar_feature(aa32_simd_r32, s) &&
1492 ((a->vd | a->vm) & 0x10)) {
1493 return false;
1496 if (a->vm & 1) {
1497 return false;
1500 if (!vfp_access_check(s)) {
1501 return true;
1505 * This is always a right shift, and the shiftfn is always a
1506 * left-shift helper, which thus needs the negated shift count
1507 * duplicated into each lane of the immediate value.
1509 if (a->size == 1) {
1510 imm = (uint16_t)(-a->shift);
1511 imm |= imm << 16;
1512 } else {
1513 /* size == 2 */
1514 imm = -a->shift;
1516 constimm = tcg_const_i32(imm);
1518 /* Load all inputs first to avoid potential overwrite */
1519 rm1 = tcg_temp_new_i32();
1520 rm2 = tcg_temp_new_i32();
1521 rm3 = tcg_temp_new_i32();
1522 rm4 = tcg_temp_new_i32();
1523 read_neon_element32(rm1, a->vm, 0, MO_32);
1524 read_neon_element32(rm2, a->vm, 1, MO_32);
1525 read_neon_element32(rm3, a->vm, 2, MO_32);
1526 read_neon_element32(rm4, a->vm, 3, MO_32);
1527 rtmp = tcg_temp_new_i64();
1529 shiftfn(rm1, rm1, constimm);
1530 shiftfn(rm2, rm2, constimm);
1532 tcg_gen_concat_i32_i64(rtmp, rm1, rm2);
1533 tcg_temp_free_i32(rm2);
1535 narrowfn(rm1, cpu_env, rtmp);
1536 write_neon_element32(rm1, a->vd, 0, MO_32);
1537 tcg_temp_free_i32(rm1);
1539 shiftfn(rm3, rm3, constimm);
1540 shiftfn(rm4, rm4, constimm);
1541 tcg_temp_free_i32(constimm);
1543 tcg_gen_concat_i32_i64(rtmp, rm3, rm4);
1544 tcg_temp_free_i32(rm4);
1546 narrowfn(rm3, cpu_env, rtmp);
1547 tcg_temp_free_i64(rtmp);
1548 write_neon_element32(rm3, a->vd, 1, MO_32);
1549 tcg_temp_free_i32(rm3);
1550 return true;
1553 #define DO_2SN_64(INSN, FUNC, NARROWFUNC) \
1554 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1556 return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC); \
1558 #define DO_2SN_32(INSN, FUNC, NARROWFUNC) \
1559 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1561 return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC); \
1564 static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1566 tcg_gen_extrl_i64_i32(dest, src);
1569 static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1571 gen_helper_neon_narrow_u16(dest, src);
1574 static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1576 gen_helper_neon_narrow_u8(dest, src);
1579 DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32)
1580 DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16)
1581 DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8)
1583 DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32)
1584 DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16)
1585 DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8)
1587 DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32)
1588 DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16)
1589 DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8)
1591 DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32)
1592 DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16)
1593 DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8)
1594 DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32)
1595 DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16)
1596 DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8)
1598 DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32)
1599 DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16)
1600 DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8)
1602 DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32)
1603 DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16)
1604 DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8)
1606 DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32)
1607 DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16)
1608 DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8)
1610 static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
1611 NeonGenWidenFn *widenfn, bool u)
1613 TCGv_i64 tmp;
1614 TCGv_i32 rm0, rm1;
1615 uint64_t widen_mask = 0;
1617 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1618 return false;
1621 /* UNDEF accesses to D16-D31 if they don't exist. */
1622 if (!dc_isar_feature(aa32_simd_r32, s) &&
1623 ((a->vd | a->vm) & 0x10)) {
1624 return false;
1627 if (a->vd & 1) {
1628 return false;
1631 if (!vfp_access_check(s)) {
1632 return true;
1636 * This is a widen-and-shift operation. The shift is always less
1637 * than the width of the source type, so after widening the input
1638 * vector we can simply shift the whole 64-bit widened register,
1639 * and then clear the potential overflow bits resulting from left
1640 * bits of the narrow input appearing as right bits of the left
1641 * neighbour narrow input. Calculate a mask of bits to clear.
1643 if ((a->shift != 0) && (a->size < 2 || u)) {
1644 int esize = 8 << a->size;
1645 widen_mask = MAKE_64BIT_MASK(0, esize);
1646 widen_mask >>= esize - a->shift;
1647 widen_mask = dup_const(a->size + 1, widen_mask);
1650 rm0 = tcg_temp_new_i32();
1651 rm1 = tcg_temp_new_i32();
1652 read_neon_element32(rm0, a->vm, 0, MO_32);
1653 read_neon_element32(rm1, a->vm, 1, MO_32);
1654 tmp = tcg_temp_new_i64();
1656 widenfn(tmp, rm0);
1657 tcg_temp_free_i32(rm0);
1658 if (a->shift != 0) {
1659 tcg_gen_shli_i64(tmp, tmp, a->shift);
1660 tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
1662 write_neon_element64(tmp, a->vd, 0, MO_64);
1664 widenfn(tmp, rm1);
1665 tcg_temp_free_i32(rm1);
1666 if (a->shift != 0) {
1667 tcg_gen_shli_i64(tmp, tmp, a->shift);
1668 tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
1670 write_neon_element64(tmp, a->vd, 1, MO_64);
1671 tcg_temp_free_i64(tmp);
1672 return true;
1675 static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
1677 static NeonGenWidenFn * const widenfn[] = {
1678 gen_helper_neon_widen_s8,
1679 gen_helper_neon_widen_s16,
1680 tcg_gen_ext_i32_i64,
1682 return do_vshll_2sh(s, a, widenfn[a->size], false);
1685 static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
1687 static NeonGenWidenFn * const widenfn[] = {
1688 gen_helper_neon_widen_u8,
1689 gen_helper_neon_widen_u16,
1690 tcg_gen_extu_i32_i64,
1692 return do_vshll_2sh(s, a, widenfn[a->size], true);
1695 static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
1696 gen_helper_gvec_2_ptr *fn)
1698 /* FP operations in 2-reg-and-shift group */
1699 int vec_size = a->q ? 16 : 8;
1700 int rd_ofs = neon_full_reg_offset(a->vd);
1701 int rm_ofs = neon_full_reg_offset(a->vm);
1702 TCGv_ptr fpst;
1704 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1705 return false;
1708 if (a->size == MO_16) {
1709 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1710 return false;
1714 /* UNDEF accesses to D16-D31 if they don't exist. */
1715 if (!dc_isar_feature(aa32_simd_r32, s) &&
1716 ((a->vd | a->vm) & 0x10)) {
1717 return false;
1720 if ((a->vm | a->vd) & a->q) {
1721 return false;
1724 if (!vfp_access_check(s)) {
1725 return true;
1728 fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
1729 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, vec_size, vec_size, a->shift, fn);
1730 tcg_temp_free_ptr(fpst);
1731 return true;
1734 #define DO_FP_2SH(INSN, FUNC) \
1735 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1737 return do_fp_2sh(s, a, FUNC); \
1740 DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf)
1741 DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf)
1742 DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs)
1743 DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu)
1745 DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh)
1746 DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh)
1747 DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs)
1748 DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu)
1750 static uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
1753 * Expand the encoded constant.
1754 * Note that cmode = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
1755 * We choose to not special-case this and will behave as if a
1756 * valid constant encoding of 0 had been given.
1757 * cmode = 15 op = 1 must UNDEF; we assume decode has handled that.
1759 switch (cmode) {
1760 case 0: case 1:
1761 /* no-op */
1762 break;
1763 case 2: case 3:
1764 imm <<= 8;
1765 break;
1766 case 4: case 5:
1767 imm <<= 16;
1768 break;
1769 case 6: case 7:
1770 imm <<= 24;
1771 break;
1772 case 8: case 9:
1773 imm |= imm << 16;
1774 break;
1775 case 10: case 11:
1776 imm = (imm << 8) | (imm << 24);
1777 break;
1778 case 12:
1779 imm = (imm << 8) | 0xff;
1780 break;
1781 case 13:
1782 imm = (imm << 16) | 0xffff;
1783 break;
1784 case 14:
1785 if (op) {
1787 * This is the only case where the top and bottom 32 bits
1788 * of the encoded constant differ.
1790 uint64_t imm64 = 0;
1791 int n;
1793 for (n = 0; n < 8; n++) {
1794 if (imm & (1 << n)) {
1795 imm64 |= (0xffULL << (n * 8));
1798 return imm64;
1800 imm |= (imm << 8) | (imm << 16) | (imm << 24);
1801 break;
1802 case 15:
1803 imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
1804 | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
1805 break;
1807 if (op) {
1808 imm = ~imm;
1810 return dup_const(MO_32, imm);
1813 static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
1814 GVecGen2iFn *fn)
1816 uint64_t imm;
1817 int reg_ofs, vec_size;
1819 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1820 return false;
1823 /* UNDEF accesses to D16-D31 if they don't exist. */
1824 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
1825 return false;
1828 if (a->vd & a->q) {
1829 return false;
1832 if (!vfp_access_check(s)) {
1833 return true;
1836 reg_ofs = neon_full_reg_offset(a->vd);
1837 vec_size = a->q ? 16 : 8;
1838 imm = asimd_imm_const(a->imm, a->cmode, a->op);
1840 fn(MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size);
1841 return true;
1844 static void gen_VMOV_1r(unsigned vece, uint32_t dofs, uint32_t aofs,
1845 int64_t c, uint32_t oprsz, uint32_t maxsz)
1847 tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
1850 static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
1852 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
1853 GVecGen2iFn *fn;
1855 if ((a->cmode & 1) && a->cmode < 12) {
1856 /* for op=1, the imm will be inverted, so BIC becomes AND. */
1857 fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
1858 } else {
1859 /* There is one unallocated cmode/op combination in this space */
1860 if (a->cmode == 15 && a->op == 1) {
1861 return false;
1863 fn = gen_VMOV_1r;
1865 return do_1reg_imm(s, a, fn);
1868 static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
1869 NeonGenWidenFn *widenfn,
1870 NeonGenTwo64OpFn *opfn,
1871 int src1_mop, int src2_mop)
1873 /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
1874 TCGv_i64 rn0_64, rn1_64, rm_64;
1876 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1877 return false;
1880 /* UNDEF accesses to D16-D31 if they don't exist. */
1881 if (!dc_isar_feature(aa32_simd_r32, s) &&
1882 ((a->vd | a->vn | a->vm) & 0x10)) {
1883 return false;
1886 if (!opfn) {
1887 /* size == 3 case, which is an entirely different insn group */
1888 return false;
1891 if ((a->vd & 1) || (src1_mop == MO_Q && (a->vn & 1))) {
1892 return false;
1895 if (!vfp_access_check(s)) {
1896 return true;
1899 rn0_64 = tcg_temp_new_i64();
1900 rn1_64 = tcg_temp_new_i64();
1901 rm_64 = tcg_temp_new_i64();
1903 if (src1_mop >= 0) {
1904 read_neon_element64(rn0_64, a->vn, 0, src1_mop);
1905 } else {
1906 TCGv_i32 tmp = tcg_temp_new_i32();
1907 read_neon_element32(tmp, a->vn, 0, MO_32);
1908 widenfn(rn0_64, tmp);
1909 tcg_temp_free_i32(tmp);
1911 if (src2_mop >= 0) {
1912 read_neon_element64(rm_64, a->vm, 0, src2_mop);
1913 } else {
1914 TCGv_i32 tmp = tcg_temp_new_i32();
1915 read_neon_element32(tmp, a->vm, 0, MO_32);
1916 widenfn(rm_64, tmp);
1917 tcg_temp_free_i32(tmp);
1920 opfn(rn0_64, rn0_64, rm_64);
1923 * Load second pass inputs before storing the first pass result, to
1924 * avoid incorrect results if a narrow input overlaps with the result.
1926 if (src1_mop >= 0) {
1927 read_neon_element64(rn1_64, a->vn, 1, src1_mop);
1928 } else {
1929 TCGv_i32 tmp = tcg_temp_new_i32();
1930 read_neon_element32(tmp, a->vn, 1, MO_32);
1931 widenfn(rn1_64, tmp);
1932 tcg_temp_free_i32(tmp);
1934 if (src2_mop >= 0) {
1935 read_neon_element64(rm_64, a->vm, 1, src2_mop);
1936 } else {
1937 TCGv_i32 tmp = tcg_temp_new_i32();
1938 read_neon_element32(tmp, a->vm, 1, MO_32);
1939 widenfn(rm_64, tmp);
1940 tcg_temp_free_i32(tmp);
1943 write_neon_element64(rn0_64, a->vd, 0, MO_64);
1945 opfn(rn1_64, rn1_64, rm_64);
1946 write_neon_element64(rn1_64, a->vd, 1, MO_64);
1948 tcg_temp_free_i64(rn0_64);
1949 tcg_temp_free_i64(rn1_64);
1950 tcg_temp_free_i64(rm_64);
1952 return true;
1955 #define DO_PREWIDEN(INSN, S, OP, SRC1WIDE, SIGN) \
1956 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
1958 static NeonGenWidenFn * const widenfn[] = { \
1959 gen_helper_neon_widen_##S##8, \
1960 gen_helper_neon_widen_##S##16, \
1961 NULL, NULL, \
1962 }; \
1963 static NeonGenTwo64OpFn * const addfn[] = { \
1964 gen_helper_neon_##OP##l_u16, \
1965 gen_helper_neon_##OP##l_u32, \
1966 tcg_gen_##OP##_i64, \
1967 NULL, \
1968 }; \
1969 int narrow_mop = a->size == MO_32 ? MO_32 | SIGN : -1; \
1970 return do_prewiden_3d(s, a, widenfn[a->size], addfn[a->size], \
1971 SRC1WIDE ? MO_Q : narrow_mop, \
1972 narrow_mop); \
1975 DO_PREWIDEN(VADDL_S, s, add, false, MO_SIGN)
1976 DO_PREWIDEN(VADDL_U, u, add, false, 0)
1977 DO_PREWIDEN(VSUBL_S, s, sub, false, MO_SIGN)
1978 DO_PREWIDEN(VSUBL_U, u, sub, false, 0)
1979 DO_PREWIDEN(VADDW_S, s, add, true, MO_SIGN)
1980 DO_PREWIDEN(VADDW_U, u, add, true, 0)
1981 DO_PREWIDEN(VSUBW_S, s, sub, true, MO_SIGN)
1982 DO_PREWIDEN(VSUBW_U, u, sub, true, 0)
1984 static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
1985 NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
1987 /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */
1988 TCGv_i64 rn_64, rm_64;
1989 TCGv_i32 rd0, rd1;
1991 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1992 return false;
1995 /* UNDEF accesses to D16-D31 if they don't exist. */
1996 if (!dc_isar_feature(aa32_simd_r32, s) &&
1997 ((a->vd | a->vn | a->vm) & 0x10)) {
1998 return false;
2001 if (!opfn || !narrowfn) {
2002 /* size == 3 case, which is an entirely different insn group */
2003 return false;
2006 if ((a->vn | a->vm) & 1) {
2007 return false;
2010 if (!vfp_access_check(s)) {
2011 return true;
2014 rn_64 = tcg_temp_new_i64();
2015 rm_64 = tcg_temp_new_i64();
2016 rd0 = tcg_temp_new_i32();
2017 rd1 = tcg_temp_new_i32();
2019 read_neon_element64(rn_64, a->vn, 0, MO_64);
2020 read_neon_element64(rm_64, a->vm, 0, MO_64);
2022 opfn(rn_64, rn_64, rm_64);
2024 narrowfn(rd0, rn_64);
2026 read_neon_element64(rn_64, a->vn, 1, MO_64);
2027 read_neon_element64(rm_64, a->vm, 1, MO_64);
2029 opfn(rn_64, rn_64, rm_64);
2031 narrowfn(rd1, rn_64);
2033 write_neon_element32(rd0, a->vd, 0, MO_32);
2034 write_neon_element32(rd1, a->vd, 1, MO_32);
2036 tcg_temp_free_i32(rd0);
2037 tcg_temp_free_i32(rd1);
2038 tcg_temp_free_i64(rn_64);
2039 tcg_temp_free_i64(rm_64);
2041 return true;
2044 #define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP) \
2045 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
2047 static NeonGenTwo64OpFn * const addfn[] = { \
2048 gen_helper_neon_##OP##l_u16, \
2049 gen_helper_neon_##OP##l_u32, \
2050 tcg_gen_##OP##_i64, \
2051 NULL, \
2052 }; \
2053 static NeonGenNarrowFn * const narrowfn[] = { \
2054 gen_helper_neon_##NARROWTYPE##_high_u8, \
2055 gen_helper_neon_##NARROWTYPE##_high_u16, \
2056 EXTOP, \
2057 NULL, \
2058 }; \
2059 return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]); \
2062 static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn)
2064 tcg_gen_addi_i64(rn, rn, 1u << 31);
2065 tcg_gen_extrh_i64_i32(rd, rn);
2068 DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
2069 DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
2070 DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
2071 DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
2073 static bool do_long_3d(DisasContext *s, arg_3diff *a,
2074 NeonGenTwoOpWidenFn *opfn,
2075 NeonGenTwo64OpFn *accfn)
2078 * 3-regs different lengths, long operations.
2079 * These perform an operation on two inputs that returns a double-width
2080 * result, and then possibly perform an accumulation operation of
2081 * that result into the double-width destination.
2083 TCGv_i64 rd0, rd1, tmp;
2084 TCGv_i32 rn, rm;
2086 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2087 return false;
2090 /* UNDEF accesses to D16-D31 if they don't exist. */
2091 if (!dc_isar_feature(aa32_simd_r32, s) &&
2092 ((a->vd | a->vn | a->vm) & 0x10)) {
2093 return false;
2096 if (!opfn) {
2097 /* size == 3 case, which is an entirely different insn group */
2098 return false;
2101 if (a->vd & 1) {
2102 return false;
2105 if (!vfp_access_check(s)) {
2106 return true;
2109 rd0 = tcg_temp_new_i64();
2110 rd1 = tcg_temp_new_i64();
2112 rn = tcg_temp_new_i32();
2113 rm = tcg_temp_new_i32();
2114 read_neon_element32(rn, a->vn, 0, MO_32);
2115 read_neon_element32(rm, a->vm, 0, MO_32);
2116 opfn(rd0, rn, rm);
2118 read_neon_element32(rn, a->vn, 1, MO_32);
2119 read_neon_element32(rm, a->vm, 1, MO_32);
2120 opfn(rd1, rn, rm);
2121 tcg_temp_free_i32(rn);
2122 tcg_temp_free_i32(rm);
2124 /* Don't store results until after all loads: they might overlap */
2125 if (accfn) {
2126 tmp = tcg_temp_new_i64();
2127 read_neon_element64(tmp, a->vd, 0, MO_64);
2128 accfn(rd0, tmp, rd0);
2129 read_neon_element64(tmp, a->vd, 1, MO_64);
2130 accfn(rd1, tmp, rd1);
2131 tcg_temp_free_i64(tmp);
2134 write_neon_element64(rd0, a->vd, 0, MO_64);
2135 write_neon_element64(rd1, a->vd, 1, MO_64);
2136 tcg_temp_free_i64(rd0);
2137 tcg_temp_free_i64(rd1);
2139 return true;
2142 static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a)
2144 static NeonGenTwoOpWidenFn * const opfn[] = {
2145 gen_helper_neon_abdl_s16,
2146 gen_helper_neon_abdl_s32,
2147 gen_helper_neon_abdl_s64,
2148 NULL,
2151 return do_long_3d(s, a, opfn[a->size], NULL);
2154 static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a)
2156 static NeonGenTwoOpWidenFn * const opfn[] = {
2157 gen_helper_neon_abdl_u16,
2158 gen_helper_neon_abdl_u32,
2159 gen_helper_neon_abdl_u64,
2160 NULL,
2163 return do_long_3d(s, a, opfn[a->size], NULL);
2166 static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a)
2168 static NeonGenTwoOpWidenFn * const opfn[] = {
2169 gen_helper_neon_abdl_s16,
2170 gen_helper_neon_abdl_s32,
2171 gen_helper_neon_abdl_s64,
2172 NULL,
2174 static NeonGenTwo64OpFn * const addfn[] = {
2175 gen_helper_neon_addl_u16,
2176 gen_helper_neon_addl_u32,
2177 tcg_gen_add_i64,
2178 NULL,
2181 return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
2184 static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
2186 static NeonGenTwoOpWidenFn * const opfn[] = {
2187 gen_helper_neon_abdl_u16,
2188 gen_helper_neon_abdl_u32,
2189 gen_helper_neon_abdl_u64,
2190 NULL,
2192 static NeonGenTwo64OpFn * const addfn[] = {
2193 gen_helper_neon_addl_u16,
2194 gen_helper_neon_addl_u32,
2195 tcg_gen_add_i64,
2196 NULL,
2199 return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
2202 static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2204 TCGv_i32 lo = tcg_temp_new_i32();
2205 TCGv_i32 hi = tcg_temp_new_i32();
2207 tcg_gen_muls2_i32(lo, hi, rn, rm);
2208 tcg_gen_concat_i32_i64(rd, lo, hi);
2210 tcg_temp_free_i32(lo);
2211 tcg_temp_free_i32(hi);
2214 static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2216 TCGv_i32 lo = tcg_temp_new_i32();
2217 TCGv_i32 hi = tcg_temp_new_i32();
2219 tcg_gen_mulu2_i32(lo, hi, rn, rm);
2220 tcg_gen_concat_i32_i64(rd, lo, hi);
2222 tcg_temp_free_i32(lo);
2223 tcg_temp_free_i32(hi);
2226 static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a)
2228 static NeonGenTwoOpWidenFn * const opfn[] = {
2229 gen_helper_neon_mull_s8,
2230 gen_helper_neon_mull_s16,
2231 gen_mull_s32,
2232 NULL,
2235 return do_long_3d(s, a, opfn[a->size], NULL);
2238 static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a)
2240 static NeonGenTwoOpWidenFn * const opfn[] = {
2241 gen_helper_neon_mull_u8,
2242 gen_helper_neon_mull_u16,
2243 gen_mull_u32,
2244 NULL,
2247 return do_long_3d(s, a, opfn[a->size], NULL);
2250 #define DO_VMLAL(INSN,MULL,ACC) \
2251 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
2253 static NeonGenTwoOpWidenFn * const opfn[] = { \
2254 gen_helper_neon_##MULL##8, \
2255 gen_helper_neon_##MULL##16, \
2256 gen_##MULL##32, \
2257 NULL, \
2258 }; \
2259 static NeonGenTwo64OpFn * const accfn[] = { \
2260 gen_helper_neon_##ACC##l_u16, \
2261 gen_helper_neon_##ACC##l_u32, \
2262 tcg_gen_##ACC##_i64, \
2263 NULL, \
2264 }; \
2265 return do_long_3d(s, a, opfn[a->size], accfn[a->size]); \
2268 DO_VMLAL(VMLAL_S,mull_s,add)
2269 DO_VMLAL(VMLAL_U,mull_u,add)
2270 DO_VMLAL(VMLSL_S,mull_s,sub)
2271 DO_VMLAL(VMLSL_U,mull_u,sub)
2273 static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2275 gen_helper_neon_mull_s16(rd, rn, rm);
2276 gen_helper_neon_addl_saturate_s32(rd, cpu_env, rd, rd);
2279 static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2281 gen_mull_s32(rd, rn, rm);
2282 gen_helper_neon_addl_saturate_s64(rd, cpu_env, rd, rd);
2285 static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a)
2287 static NeonGenTwoOpWidenFn * const opfn[] = {
2288 NULL,
2289 gen_VQDMULL_16,
2290 gen_VQDMULL_32,
2291 NULL,
2294 return do_long_3d(s, a, opfn[a->size], NULL);
2297 static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2299 gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
2302 static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2304 gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
2307 static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a)
2309 static NeonGenTwoOpWidenFn * const opfn[] = {
2310 NULL,
2311 gen_VQDMULL_16,
2312 gen_VQDMULL_32,
2313 NULL,
2315 static NeonGenTwo64OpFn * const accfn[] = {
2316 NULL,
2317 gen_VQDMLAL_acc_16,
2318 gen_VQDMLAL_acc_32,
2319 NULL,
2322 return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
2325 static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2327 gen_helper_neon_negl_u32(rm, rm);
2328 gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
2331 static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2333 tcg_gen_neg_i64(rm, rm);
2334 gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
2337 static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a)
2339 static NeonGenTwoOpWidenFn * const opfn[] = {
2340 NULL,
2341 gen_VQDMULL_16,
2342 gen_VQDMULL_32,
2343 NULL,
2345 static NeonGenTwo64OpFn * const accfn[] = {
2346 NULL,
2347 gen_VQDMLSL_acc_16,
2348 gen_VQDMLSL_acc_32,
2349 NULL,
2352 return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
2355 static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
2357 gen_helper_gvec_3 *fn_gvec;
2359 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2360 return false;
2363 /* UNDEF accesses to D16-D31 if they don't exist. */
2364 if (!dc_isar_feature(aa32_simd_r32, s) &&
2365 ((a->vd | a->vn | a->vm) & 0x10)) {
2366 return false;
2369 if (a->vd & 1) {
2370 return false;
2373 switch (a->size) {
2374 case 0:
2375 fn_gvec = gen_helper_neon_pmull_h;
2376 break;
2377 case 2:
2378 if (!dc_isar_feature(aa32_pmull, s)) {
2379 return false;
2381 fn_gvec = gen_helper_gvec_pmull_q;
2382 break;
2383 default:
2384 return false;
2387 if (!vfp_access_check(s)) {
2388 return true;
2391 tcg_gen_gvec_3_ool(neon_full_reg_offset(a->vd),
2392 neon_full_reg_offset(a->vn),
2393 neon_full_reg_offset(a->vm),
2394 16, 16, 0, fn_gvec);
2395 return true;
2398 static void gen_neon_dup_low16(TCGv_i32 var)
2400 TCGv_i32 tmp = tcg_temp_new_i32();
2401 tcg_gen_ext16u_i32(var, var);
2402 tcg_gen_shli_i32(tmp, var, 16);
2403 tcg_gen_or_i32(var, var, tmp);
2404 tcg_temp_free_i32(tmp);
2407 static void gen_neon_dup_high16(TCGv_i32 var)
2409 TCGv_i32 tmp = tcg_temp_new_i32();
2410 tcg_gen_andi_i32(var, var, 0xffff0000);
2411 tcg_gen_shri_i32(tmp, var, 16);
2412 tcg_gen_or_i32(var, var, tmp);
2413 tcg_temp_free_i32(tmp);
2416 static inline TCGv_i32 neon_get_scalar(int size, int reg)
2418 TCGv_i32 tmp = tcg_temp_new_i32();
2419 if (size == MO_16) {
2420 read_neon_element32(tmp, reg & 7, reg >> 4, MO_32);
2421 if (reg & 8) {
2422 gen_neon_dup_high16(tmp);
2423 } else {
2424 gen_neon_dup_low16(tmp);
2426 } else {
2427 read_neon_element32(tmp, reg & 15, reg >> 4, MO_32);
2429 return tmp;
2432 static bool do_2scalar(DisasContext *s, arg_2scalar *a,
2433 NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn)
2436 * Two registers and a scalar: perform an operation between
2437 * the input elements and the scalar, and then possibly
2438 * perform an accumulation operation of that result into the
2439 * destination.
2441 TCGv_i32 scalar, tmp;
2442 int pass;
2444 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2445 return false;
2448 /* UNDEF accesses to D16-D31 if they don't exist. */
2449 if (!dc_isar_feature(aa32_simd_r32, s) &&
2450 ((a->vd | a->vn | a->vm) & 0x10)) {
2451 return false;
2454 if (!opfn) {
2455 /* Bad size (including size == 3, which is a different insn group) */
2456 return false;
2459 if (a->q && ((a->vd | a->vn) & 1)) {
2460 return false;
2463 if (!vfp_access_check(s)) {
2464 return true;
2467 scalar = neon_get_scalar(a->size, a->vm);
2468 tmp = tcg_temp_new_i32();
2470 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
2471 read_neon_element32(tmp, a->vn, pass, MO_32);
2472 opfn(tmp, tmp, scalar);
2473 if (accfn) {
2474 TCGv_i32 rd = tcg_temp_new_i32();
2475 read_neon_element32(rd, a->vd, pass, MO_32);
2476 accfn(tmp, rd, tmp);
2477 tcg_temp_free_i32(rd);
2479 write_neon_element32(tmp, a->vd, pass, MO_32);
2481 tcg_temp_free_i32(tmp);
2482 tcg_temp_free_i32(scalar);
2483 return true;
2486 static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a)
2488 static NeonGenTwoOpFn * const opfn[] = {
2489 NULL,
2490 gen_helper_neon_mul_u16,
2491 tcg_gen_mul_i32,
2492 NULL,
2495 return do_2scalar(s, a, opfn[a->size], NULL);
2498 static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a)
2500 static NeonGenTwoOpFn * const opfn[] = {
2501 NULL,
2502 gen_helper_neon_mul_u16,
2503 tcg_gen_mul_i32,
2504 NULL,
2506 static NeonGenTwoOpFn * const accfn[] = {
2507 NULL,
2508 gen_helper_neon_add_u16,
2509 tcg_gen_add_i32,
2510 NULL,
2513 return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
2516 static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
2518 static NeonGenTwoOpFn * const opfn[] = {
2519 NULL,
2520 gen_helper_neon_mul_u16,
2521 tcg_gen_mul_i32,
2522 NULL,
2524 static NeonGenTwoOpFn * const accfn[] = {
2525 NULL,
2526 gen_helper_neon_sub_u16,
2527 tcg_gen_sub_i32,
2528 NULL,
2531 return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
2534 static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a,
2535 gen_helper_gvec_3_ptr *fn)
2537 /* Two registers and a scalar, using gvec */
2538 int vec_size = a->q ? 16 : 8;
2539 int rd_ofs = neon_full_reg_offset(a->vd);
2540 int rn_ofs = neon_full_reg_offset(a->vn);
2541 int rm_ofs;
2542 int idx;
2543 TCGv_ptr fpstatus;
2545 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2546 return false;
2549 /* UNDEF accesses to D16-D31 if they don't exist. */
2550 if (!dc_isar_feature(aa32_simd_r32, s) &&
2551 ((a->vd | a->vn | a->vm) & 0x10)) {
2552 return false;
2555 if (!fn) {
2556 /* Bad size (including size == 3, which is a different insn group) */
2557 return false;
2560 if (a->q && ((a->vd | a->vn) & 1)) {
2561 return false;
2564 if (!vfp_access_check(s)) {
2565 return true;
2568 /* a->vm is M:Vm, which encodes both register and index */
2569 idx = extract32(a->vm, a->size + 2, 2);
2570 a->vm = extract32(a->vm, 0, a->size + 2);
2571 rm_ofs = neon_full_reg_offset(a->vm);
2573 fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD);
2574 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus,
2575 vec_size, vec_size, idx, fn);
2576 tcg_temp_free_ptr(fpstatus);
2577 return true;
2580 #define DO_VMUL_F_2sc(NAME, FUNC) \
2581 static bool trans_##NAME##_F_2sc(DisasContext *s, arg_2scalar *a) \
2583 static gen_helper_gvec_3_ptr * const opfn[] = { \
2584 NULL, \
2585 gen_helper_##FUNC##_h, \
2586 gen_helper_##FUNC##_s, \
2587 NULL, \
2588 }; \
2589 if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) { \
2590 return false; \
2592 return do_2scalar_fp_vec(s, a, opfn[a->size]); \
2595 DO_VMUL_F_2sc(VMUL, gvec_fmul_idx)
2596 DO_VMUL_F_2sc(VMLA, gvec_fmla_nf_idx)
2597 DO_VMUL_F_2sc(VMLS, gvec_fmls_nf_idx)
2599 WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16)
2600 WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32)
2601 WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16)
2602 WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32)
2604 static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a)
2606 static NeonGenTwoOpFn * const opfn[] = {
2607 NULL,
2608 gen_VQDMULH_16,
2609 gen_VQDMULH_32,
2610 NULL,
2613 return do_2scalar(s, a, opfn[a->size], NULL);
2616 static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)
2618 static NeonGenTwoOpFn * const opfn[] = {
2619 NULL,
2620 gen_VQRDMULH_16,
2621 gen_VQRDMULH_32,
2622 NULL,
2625 return do_2scalar(s, a, opfn[a->size], NULL);
2628 static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
2629 NeonGenThreeOpEnvFn *opfn)
2632 * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn
2633 * performs a kind of fused op-then-accumulate using a helper
2634 * function that takes all of rd, rn and the scalar at once.
2636 TCGv_i32 scalar, rn, rd;
2637 int pass;
2639 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2640 return false;
2643 if (!dc_isar_feature(aa32_rdm, s)) {
2644 return false;
2647 /* UNDEF accesses to D16-D31 if they don't exist. */
2648 if (!dc_isar_feature(aa32_simd_r32, s) &&
2649 ((a->vd | a->vn | a->vm) & 0x10)) {
2650 return false;
2653 if (!opfn) {
2654 /* Bad size (including size == 3, which is a different insn group) */
2655 return false;
2658 if (a->q && ((a->vd | a->vn) & 1)) {
2659 return false;
2662 if (!vfp_access_check(s)) {
2663 return true;
2666 scalar = neon_get_scalar(a->size, a->vm);
2667 rn = tcg_temp_new_i32();
2668 rd = tcg_temp_new_i32();
2670 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
2671 read_neon_element32(rn, a->vn, pass, MO_32);
2672 read_neon_element32(rd, a->vd, pass, MO_32);
2673 opfn(rd, cpu_env, rn, scalar, rd);
2674 write_neon_element32(rd, a->vd, pass, MO_32);
2676 tcg_temp_free_i32(rn);
2677 tcg_temp_free_i32(rd);
2678 tcg_temp_free_i32(scalar);
2680 return true;
2683 static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a)
2685 static NeonGenThreeOpEnvFn *opfn[] = {
2686 NULL,
2687 gen_helper_neon_qrdmlah_s16,
2688 gen_helper_neon_qrdmlah_s32,
2689 NULL,
2691 return do_vqrdmlah_2sc(s, a, opfn[a->size]);
2694 static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
2696 static NeonGenThreeOpEnvFn *opfn[] = {
2697 NULL,
2698 gen_helper_neon_qrdmlsh_s16,
2699 gen_helper_neon_qrdmlsh_s32,
2700 NULL,
2702 return do_vqrdmlah_2sc(s, a, opfn[a->size]);
2705 static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
2706 NeonGenTwoOpWidenFn *opfn,
2707 NeonGenTwo64OpFn *accfn)
2710 * Two registers and a scalar, long operations: perform an
2711 * operation on the input elements and the scalar which produces
2712 * a double-width result, and then possibly perform an accumulation
2713 * operation of that result into the destination.
2715 TCGv_i32 scalar, rn;
2716 TCGv_i64 rn0_64, rn1_64;
2718 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2719 return false;
2722 /* UNDEF accesses to D16-D31 if they don't exist. */
2723 if (!dc_isar_feature(aa32_simd_r32, s) &&
2724 ((a->vd | a->vn | a->vm) & 0x10)) {
2725 return false;
2728 if (!opfn) {
2729 /* Bad size (including size == 3, which is a different insn group) */
2730 return false;
2733 if (a->vd & 1) {
2734 return false;
2737 if (!vfp_access_check(s)) {
2738 return true;
2741 scalar = neon_get_scalar(a->size, a->vm);
2743 /* Load all inputs before writing any outputs, in case of overlap */
2744 rn = tcg_temp_new_i32();
2745 read_neon_element32(rn, a->vn, 0, MO_32);
2746 rn0_64 = tcg_temp_new_i64();
2747 opfn(rn0_64, rn, scalar);
2749 read_neon_element32(rn, a->vn, 1, MO_32);
2750 rn1_64 = tcg_temp_new_i64();
2751 opfn(rn1_64, rn, scalar);
2752 tcg_temp_free_i32(rn);
2753 tcg_temp_free_i32(scalar);
2755 if (accfn) {
2756 TCGv_i64 t64 = tcg_temp_new_i64();
2757 read_neon_element64(t64, a->vd, 0, MO_64);
2758 accfn(rn0_64, t64, rn0_64);
2759 read_neon_element64(t64, a->vd, 1, MO_64);
2760 accfn(rn1_64, t64, rn1_64);
2761 tcg_temp_free_i64(t64);
2764 write_neon_element64(rn0_64, a->vd, 0, MO_64);
2765 write_neon_element64(rn1_64, a->vd, 1, MO_64);
2766 tcg_temp_free_i64(rn0_64);
2767 tcg_temp_free_i64(rn1_64);
2768 return true;
2771 static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a)
2773 static NeonGenTwoOpWidenFn * const opfn[] = {
2774 NULL,
2775 gen_helper_neon_mull_s16,
2776 gen_mull_s32,
2777 NULL,
2780 return do_2scalar_long(s, a, opfn[a->size], NULL);
2783 static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a)
2785 static NeonGenTwoOpWidenFn * const opfn[] = {
2786 NULL,
2787 gen_helper_neon_mull_u16,
2788 gen_mull_u32,
2789 NULL,
2792 return do_2scalar_long(s, a, opfn[a->size], NULL);
2795 #define DO_VMLAL_2SC(INSN, MULL, ACC) \
2796 static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a) \
2798 static NeonGenTwoOpWidenFn * const opfn[] = { \
2799 NULL, \
2800 gen_helper_neon_##MULL##16, \
2801 gen_##MULL##32, \
2802 NULL, \
2803 }; \
2804 static NeonGenTwo64OpFn * const accfn[] = { \
2805 NULL, \
2806 gen_helper_neon_##ACC##l_u32, \
2807 tcg_gen_##ACC##_i64, \
2808 NULL, \
2809 }; \
2810 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); \
2813 DO_VMLAL_2SC(VMLAL_S, mull_s, add)
2814 DO_VMLAL_2SC(VMLAL_U, mull_u, add)
2815 DO_VMLAL_2SC(VMLSL_S, mull_s, sub)
2816 DO_VMLAL_2SC(VMLSL_U, mull_u, sub)
2818 static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a)
2820 static NeonGenTwoOpWidenFn * const opfn[] = {
2821 NULL,
2822 gen_VQDMULL_16,
2823 gen_VQDMULL_32,
2824 NULL,
2827 return do_2scalar_long(s, a, opfn[a->size], NULL);
2830 static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a)
2832 static NeonGenTwoOpWidenFn * const opfn[] = {
2833 NULL,
2834 gen_VQDMULL_16,
2835 gen_VQDMULL_32,
2836 NULL,
2838 static NeonGenTwo64OpFn * const accfn[] = {
2839 NULL,
2840 gen_VQDMLAL_acc_16,
2841 gen_VQDMLAL_acc_32,
2842 NULL,
2845 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
2848 static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
2850 static NeonGenTwoOpWidenFn * const opfn[] = {
2851 NULL,
2852 gen_VQDMULL_16,
2853 gen_VQDMULL_32,
2854 NULL,
2856 static NeonGenTwo64OpFn * const accfn[] = {
2857 NULL,
2858 gen_VQDMLSL_acc_16,
2859 gen_VQDMLSL_acc_32,
2860 NULL,
2863 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
2866 static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
2868 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2869 return false;
2872 /* UNDEF accesses to D16-D31 if they don't exist. */
2873 if (!dc_isar_feature(aa32_simd_r32, s) &&
2874 ((a->vd | a->vn | a->vm) & 0x10)) {
2875 return false;
2878 if ((a->vn | a->vm | a->vd) & a->q) {
2879 return false;
2882 if (a->imm > 7 && !a->q) {
2883 return false;
2886 if (!vfp_access_check(s)) {
2887 return true;
2890 if (!a->q) {
2891 /* Extract 64 bits from <Vm:Vn> */
2892 TCGv_i64 left, right, dest;
2894 left = tcg_temp_new_i64();
2895 right = tcg_temp_new_i64();
2896 dest = tcg_temp_new_i64();
2898 read_neon_element64(right, a->vn, 0, MO_64);
2899 read_neon_element64(left, a->vm, 0, MO_64);
2900 tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
2901 write_neon_element64(dest, a->vd, 0, MO_64);
2903 tcg_temp_free_i64(left);
2904 tcg_temp_free_i64(right);
2905 tcg_temp_free_i64(dest);
2906 } else {
2907 /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */
2908 TCGv_i64 left, middle, right, destleft, destright;
2910 left = tcg_temp_new_i64();
2911 middle = tcg_temp_new_i64();
2912 right = tcg_temp_new_i64();
2913 destleft = tcg_temp_new_i64();
2914 destright = tcg_temp_new_i64();
2916 if (a->imm < 8) {
2917 read_neon_element64(right, a->vn, 0, MO_64);
2918 read_neon_element64(middle, a->vn, 1, MO_64);
2919 tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
2920 read_neon_element64(left, a->vm, 0, MO_64);
2921 tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
2922 } else {
2923 read_neon_element64(right, a->vn, 1, MO_64);
2924 read_neon_element64(middle, a->vm, 0, MO_64);
2925 tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
2926 read_neon_element64(left, a->vm, 1, MO_64);
2927 tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
2930 write_neon_element64(destright, a->vd, 0, MO_64);
2931 write_neon_element64(destleft, a->vd, 1, MO_64);
2933 tcg_temp_free_i64(destright);
2934 tcg_temp_free_i64(destleft);
2935 tcg_temp_free_i64(right);
2936 tcg_temp_free_i64(middle);
2937 tcg_temp_free_i64(left);
2939 return true;
2942 static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
2944 TCGv_i64 val, def;
2945 TCGv_i32 desc;
2947 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2948 return false;
2951 /* UNDEF accesses to D16-D31 if they don't exist. */
2952 if (!dc_isar_feature(aa32_simd_r32, s) &&
2953 ((a->vd | a->vn | a->vm) & 0x10)) {
2954 return false;
2957 if ((a->vn + a->len + 1) > 32) {
2959 * This is UNPREDICTABLE; we choose to UNDEF to avoid the
2960 * helper function running off the end of the register file.
2962 return false;
2965 if (!vfp_access_check(s)) {
2966 return true;
2969 desc = tcg_const_i32((a->vn << 2) | a->len);
2970 def = tcg_temp_new_i64();
2971 if (a->op) {
2972 read_neon_element64(def, a->vd, 0, MO_64);
2973 } else {
2974 tcg_gen_movi_i64(def, 0);
2976 val = tcg_temp_new_i64();
2977 read_neon_element64(val, a->vm, 0, MO_64);
2979 gen_helper_neon_tbl(val, cpu_env, desc, val, def);
2980 write_neon_element64(val, a->vd, 0, MO_64);
2982 tcg_temp_free_i64(def);
2983 tcg_temp_free_i64(val);
2984 tcg_temp_free_i32(desc);
2985 return true;
2988 static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
2990 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2991 return false;
2994 /* UNDEF accesses to D16-D31 if they don't exist. */
2995 if (!dc_isar_feature(aa32_simd_r32, s) &&
2996 ((a->vd | a->vm) & 0x10)) {
2997 return false;
3000 if (a->vd & a->q) {
3001 return false;
3004 if (!vfp_access_check(s)) {
3005 return true;
3008 tcg_gen_gvec_dup_mem(a->size, neon_full_reg_offset(a->vd),
3009 neon_element_offset(a->vm, a->index, a->size),
3010 a->q ? 16 : 8, a->q ? 16 : 8);
3011 return true;
3014 static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
3016 int pass, half;
3017 TCGv_i32 tmp[2];
3019 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3020 return false;
3023 /* UNDEF accesses to D16-D31 if they don't exist. */
3024 if (!dc_isar_feature(aa32_simd_r32, s) &&
3025 ((a->vd | a->vm) & 0x10)) {
3026 return false;
3029 if ((a->vd | a->vm) & a->q) {
3030 return false;
3033 if (a->size == 3) {
3034 return false;
3037 if (!vfp_access_check(s)) {
3038 return true;
3041 tmp[0] = tcg_temp_new_i32();
3042 tmp[1] = tcg_temp_new_i32();
3044 for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
3045 for (half = 0; half < 2; half++) {
3046 read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32);
3047 switch (a->size) {
3048 case 0:
3049 tcg_gen_bswap32_i32(tmp[half], tmp[half]);
3050 break;
3051 case 1:
3052 gen_swap_half(tmp[half], tmp[half]);
3053 break;
3054 case 2:
3055 break;
3056 default:
3057 g_assert_not_reached();
3060 write_neon_element32(tmp[1], a->vd, pass * 2, MO_32);
3061 write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32);
3064 tcg_temp_free_i32(tmp[0]);
3065 tcg_temp_free_i32(tmp[1]);
3066 return true;
3069 static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
3070 NeonGenWidenFn *widenfn,
3071 NeonGenTwo64OpFn *opfn,
3072 NeonGenTwo64OpFn *accfn)
3075 * Pairwise long operations: widen both halves of the pair,
3076 * combine the pairs with the opfn, and then possibly accumulate
3077 * into the destination with the accfn.
3079 int pass;
3081 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3082 return false;
3085 /* UNDEF accesses to D16-D31 if they don't exist. */
3086 if (!dc_isar_feature(aa32_simd_r32, s) &&
3087 ((a->vd | a->vm) & 0x10)) {
3088 return false;
3091 if ((a->vd | a->vm) & a->q) {
3092 return false;
3095 if (!widenfn) {
3096 return false;
3099 if (!vfp_access_check(s)) {
3100 return true;
3103 for (pass = 0; pass < a->q + 1; pass++) {
3104 TCGv_i32 tmp;
3105 TCGv_i64 rm0_64, rm1_64, rd_64;
3107 rm0_64 = tcg_temp_new_i64();
3108 rm1_64 = tcg_temp_new_i64();
3109 rd_64 = tcg_temp_new_i64();
3111 tmp = tcg_temp_new_i32();
3112 read_neon_element32(tmp, a->vm, pass * 2, MO_32);
3113 widenfn(rm0_64, tmp);
3114 read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32);
3115 widenfn(rm1_64, tmp);
3116 tcg_temp_free_i32(tmp);
3118 opfn(rd_64, rm0_64, rm1_64);
3119 tcg_temp_free_i64(rm0_64);
3120 tcg_temp_free_i64(rm1_64);
3122 if (accfn) {
3123 TCGv_i64 tmp64 = tcg_temp_new_i64();
3124 read_neon_element64(tmp64, a->vd, pass, MO_64);
3125 accfn(rd_64, tmp64, rd_64);
3126 tcg_temp_free_i64(tmp64);
3128 write_neon_element64(rd_64, a->vd, pass, MO_64);
3129 tcg_temp_free_i64(rd_64);
3131 return true;
3134 static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a)
3136 static NeonGenWidenFn * const widenfn[] = {
3137 gen_helper_neon_widen_s8,
3138 gen_helper_neon_widen_s16,
3139 tcg_gen_ext_i32_i64,
3140 NULL,
3142 static NeonGenTwo64OpFn * const opfn[] = {
3143 gen_helper_neon_paddl_u16,
3144 gen_helper_neon_paddl_u32,
3145 tcg_gen_add_i64,
3146 NULL,
3149 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
3152 static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a)
3154 static NeonGenWidenFn * const widenfn[] = {
3155 gen_helper_neon_widen_u8,
3156 gen_helper_neon_widen_u16,
3157 tcg_gen_extu_i32_i64,
3158 NULL,
3160 static NeonGenTwo64OpFn * const opfn[] = {
3161 gen_helper_neon_paddl_u16,
3162 gen_helper_neon_paddl_u32,
3163 tcg_gen_add_i64,
3164 NULL,
3167 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
3170 static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a)
3172 static NeonGenWidenFn * const widenfn[] = {
3173 gen_helper_neon_widen_s8,
3174 gen_helper_neon_widen_s16,
3175 tcg_gen_ext_i32_i64,
3176 NULL,
3178 static NeonGenTwo64OpFn * const opfn[] = {
3179 gen_helper_neon_paddl_u16,
3180 gen_helper_neon_paddl_u32,
3181 tcg_gen_add_i64,
3182 NULL,
3184 static NeonGenTwo64OpFn * const accfn[] = {
3185 gen_helper_neon_addl_u16,
3186 gen_helper_neon_addl_u32,
3187 tcg_gen_add_i64,
3188 NULL,
3191 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
3192 accfn[a->size]);
3195 static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a)
3197 static NeonGenWidenFn * const widenfn[] = {
3198 gen_helper_neon_widen_u8,
3199 gen_helper_neon_widen_u16,
3200 tcg_gen_extu_i32_i64,
3201 NULL,
3203 static NeonGenTwo64OpFn * const opfn[] = {
3204 gen_helper_neon_paddl_u16,
3205 gen_helper_neon_paddl_u32,
3206 tcg_gen_add_i64,
3207 NULL,
3209 static NeonGenTwo64OpFn * const accfn[] = {
3210 gen_helper_neon_addl_u16,
3211 gen_helper_neon_addl_u32,
3212 tcg_gen_add_i64,
3213 NULL,
3216 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
3217 accfn[a->size]);
3220 typedef void ZipFn(TCGv_ptr, TCGv_ptr);
3222 static bool do_zip_uzp(DisasContext *s, arg_2misc *a,
3223 ZipFn *fn)
3225 TCGv_ptr pd, pm;
3227 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3228 return false;
3231 /* UNDEF accesses to D16-D31 if they don't exist. */
3232 if (!dc_isar_feature(aa32_simd_r32, s) &&
3233 ((a->vd | a->vm) & 0x10)) {
3234 return false;
3237 if ((a->vd | a->vm) & a->q) {
3238 return false;
3241 if (!fn) {
3242 /* Bad size or size/q combination */
3243 return false;
3246 if (!vfp_access_check(s)) {
3247 return true;
3250 pd = vfp_reg_ptr(true, a->vd);
3251 pm = vfp_reg_ptr(true, a->vm);
3252 fn(pd, pm);
3253 tcg_temp_free_ptr(pd);
3254 tcg_temp_free_ptr(pm);
3255 return true;
3258 static bool trans_VUZP(DisasContext *s, arg_2misc *a)
3260 static ZipFn * const fn[2][4] = {
3262 gen_helper_neon_unzip8,
3263 gen_helper_neon_unzip16,
3264 NULL,
3265 NULL,
3266 }, {
3267 gen_helper_neon_qunzip8,
3268 gen_helper_neon_qunzip16,
3269 gen_helper_neon_qunzip32,
3270 NULL,
3273 return do_zip_uzp(s, a, fn[a->q][a->size]);
3276 static bool trans_VZIP(DisasContext *s, arg_2misc *a)
3278 static ZipFn * const fn[2][4] = {
3280 gen_helper_neon_zip8,
3281 gen_helper_neon_zip16,
3282 NULL,
3283 NULL,
3284 }, {
3285 gen_helper_neon_qzip8,
3286 gen_helper_neon_qzip16,
3287 gen_helper_neon_qzip32,
3288 NULL,
3291 return do_zip_uzp(s, a, fn[a->q][a->size]);
3294 static bool do_vmovn(DisasContext *s, arg_2misc *a,
3295 NeonGenNarrowEnvFn *narrowfn)
3297 TCGv_i64 rm;
3298 TCGv_i32 rd0, rd1;
3300 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3301 return false;
3304 /* UNDEF accesses to D16-D31 if they don't exist. */
3305 if (!dc_isar_feature(aa32_simd_r32, s) &&
3306 ((a->vd | a->vm) & 0x10)) {
3307 return false;
3310 if (a->vm & 1) {
3311 return false;
3314 if (!narrowfn) {
3315 return false;
3318 if (!vfp_access_check(s)) {
3319 return true;
3322 rm = tcg_temp_new_i64();
3323 rd0 = tcg_temp_new_i32();
3324 rd1 = tcg_temp_new_i32();
3326 read_neon_element64(rm, a->vm, 0, MO_64);
3327 narrowfn(rd0, cpu_env, rm);
3328 read_neon_element64(rm, a->vm, 1, MO_64);
3329 narrowfn(rd1, cpu_env, rm);
3330 write_neon_element32(rd0, a->vd, 0, MO_32);
3331 write_neon_element32(rd1, a->vd, 1, MO_32);
3332 tcg_temp_free_i32(rd0);
3333 tcg_temp_free_i32(rd1);
3334 tcg_temp_free_i64(rm);
3335 return true;
3338 #define DO_VMOVN(INSN, FUNC) \
3339 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3341 static NeonGenNarrowEnvFn * const narrowfn[] = { \
3342 FUNC##8, \
3343 FUNC##16, \
3344 FUNC##32, \
3345 NULL, \
3346 }; \
3347 return do_vmovn(s, a, narrowfn[a->size]); \
3350 DO_VMOVN(VMOVN, gen_neon_narrow_u)
3351 DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat)
3352 DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s)
3353 DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u)
3355 static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
3357 TCGv_i32 rm0, rm1;
3358 TCGv_i64 rd;
3359 static NeonGenWidenFn * const widenfns[] = {
3360 gen_helper_neon_widen_u8,
3361 gen_helper_neon_widen_u16,
3362 tcg_gen_extu_i32_i64,
3363 NULL,
3365 NeonGenWidenFn *widenfn = widenfns[a->size];
3367 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3368 return false;
3371 /* UNDEF accesses to D16-D31 if they don't exist. */
3372 if (!dc_isar_feature(aa32_simd_r32, s) &&
3373 ((a->vd | a->vm) & 0x10)) {
3374 return false;
3377 if (a->vd & 1) {
3378 return false;
3381 if (!widenfn) {
3382 return false;
3385 if (!vfp_access_check(s)) {
3386 return true;
3389 rd = tcg_temp_new_i64();
3390 rm0 = tcg_temp_new_i32();
3391 rm1 = tcg_temp_new_i32();
3393 read_neon_element32(rm0, a->vm, 0, MO_32);
3394 read_neon_element32(rm1, a->vm, 1, MO_32);
3396 widenfn(rd, rm0);
3397 tcg_gen_shli_i64(rd, rd, 8 << a->size);
3398 write_neon_element64(rd, a->vd, 0, MO_64);
3399 widenfn(rd, rm1);
3400 tcg_gen_shli_i64(rd, rd, 8 << a->size);
3401 write_neon_element64(rd, a->vd, 1, MO_64);
3403 tcg_temp_free_i64(rd);
3404 tcg_temp_free_i32(rm0);
3405 tcg_temp_free_i32(rm1);
3406 return true;
3409 static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
3411 TCGv_ptr fpst;
3412 TCGv_i32 ahp, tmp, tmp2, tmp3;
3414 if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
3415 !dc_isar_feature(aa32_fp16_spconv, s)) {
3416 return false;
3419 /* UNDEF accesses to D16-D31 if they don't exist. */
3420 if (!dc_isar_feature(aa32_simd_r32, s) &&
3421 ((a->vd | a->vm) & 0x10)) {
3422 return false;
3425 if ((a->vm & 1) || (a->size != 1)) {
3426 return false;
3429 if (!vfp_access_check(s)) {
3430 return true;
3433 fpst = fpstatus_ptr(FPST_STD);
3434 ahp = get_ahp_flag();
3435 tmp = tcg_temp_new_i32();
3436 read_neon_element32(tmp, a->vm, 0, MO_32);
3437 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
3438 tmp2 = tcg_temp_new_i32();
3439 read_neon_element32(tmp2, a->vm, 1, MO_32);
3440 gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
3441 tcg_gen_shli_i32(tmp2, tmp2, 16);
3442 tcg_gen_or_i32(tmp2, tmp2, tmp);
3443 read_neon_element32(tmp, a->vm, 2, MO_32);
3444 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
3445 tmp3 = tcg_temp_new_i32();
3446 read_neon_element32(tmp3, a->vm, 3, MO_32);
3447 write_neon_element32(tmp2, a->vd, 0, MO_32);
3448 tcg_temp_free_i32(tmp2);
3449 gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
3450 tcg_gen_shli_i32(tmp3, tmp3, 16);
3451 tcg_gen_or_i32(tmp3, tmp3, tmp);
3452 write_neon_element32(tmp3, a->vd, 1, MO_32);
3453 tcg_temp_free_i32(tmp3);
3454 tcg_temp_free_i32(tmp);
3455 tcg_temp_free_i32(ahp);
3456 tcg_temp_free_ptr(fpst);
3458 return true;
3461 static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
3463 TCGv_ptr fpst;
3464 TCGv_i32 ahp, tmp, tmp2, tmp3;
3466 if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
3467 !dc_isar_feature(aa32_fp16_spconv, s)) {
3468 return false;
3471 /* UNDEF accesses to D16-D31 if they don't exist. */
3472 if (!dc_isar_feature(aa32_simd_r32, s) &&
3473 ((a->vd | a->vm) & 0x10)) {
3474 return false;
3477 if ((a->vd & 1) || (a->size != 1)) {
3478 return false;
3481 if (!vfp_access_check(s)) {
3482 return true;
3485 fpst = fpstatus_ptr(FPST_STD);
3486 ahp = get_ahp_flag();
3487 tmp3 = tcg_temp_new_i32();
3488 tmp2 = tcg_temp_new_i32();
3489 tmp = tcg_temp_new_i32();
3490 read_neon_element32(tmp, a->vm, 0, MO_32);
3491 read_neon_element32(tmp2, a->vm, 1, MO_32);
3492 tcg_gen_ext16u_i32(tmp3, tmp);
3493 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
3494 write_neon_element32(tmp3, a->vd, 0, MO_32);
3495 tcg_gen_shri_i32(tmp, tmp, 16);
3496 gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
3497 write_neon_element32(tmp, a->vd, 1, MO_32);
3498 tcg_temp_free_i32(tmp);
3499 tcg_gen_ext16u_i32(tmp3, tmp2);
3500 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
3501 write_neon_element32(tmp3, a->vd, 2, MO_32);
3502 tcg_temp_free_i32(tmp3);
3503 tcg_gen_shri_i32(tmp2, tmp2, 16);
3504 gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
3505 write_neon_element32(tmp2, a->vd, 3, MO_32);
3506 tcg_temp_free_i32(tmp2);
3507 tcg_temp_free_i32(ahp);
3508 tcg_temp_free_ptr(fpst);
3510 return true;
3513 static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn)
3515 int vec_size = a->q ? 16 : 8;
3516 int rd_ofs = neon_full_reg_offset(a->vd);
3517 int rm_ofs = neon_full_reg_offset(a->vm);
3519 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3520 return false;
3523 /* UNDEF accesses to D16-D31 if they don't exist. */
3524 if (!dc_isar_feature(aa32_simd_r32, s) &&
3525 ((a->vd | a->vm) & 0x10)) {
3526 return false;
3529 if (a->size == 3) {
3530 return false;
3533 if ((a->vd | a->vm) & a->q) {
3534 return false;
3537 if (!vfp_access_check(s)) {
3538 return true;
3541 fn(a->size, rd_ofs, rm_ofs, vec_size, vec_size);
3543 return true;
3546 #define DO_2MISC_VEC(INSN, FN) \
3547 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3549 return do_2misc_vec(s, a, FN); \
3552 DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg)
3553 DO_2MISC_VEC(VABS, tcg_gen_gvec_abs)
3554 DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0)
3555 DO_2MISC_VEC(VCGT0, gen_gvec_cgt0)
3556 DO_2MISC_VEC(VCLE0, gen_gvec_cle0)
3557 DO_2MISC_VEC(VCGE0, gen_gvec_cge0)
3558 DO_2MISC_VEC(VCLT0, gen_gvec_clt0)
3560 static bool trans_VMVN(DisasContext *s, arg_2misc *a)
3562 if (a->size != 0) {
3563 return false;
3565 return do_2misc_vec(s, a, tcg_gen_gvec_not);
3568 #define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA) \
3569 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
3570 uint32_t rm_ofs, uint32_t oprsz, \
3571 uint32_t maxsz) \
3573 tcg_gen_gvec_3_ool(rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz, \
3574 DATA, FUNC); \
3577 #define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA) \
3578 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
3579 uint32_t rm_ofs, uint32_t oprsz, \
3580 uint32_t maxsz) \
3582 tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC); \
3585 WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0)
3586 WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aese, 1)
3587 WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0)
3588 WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesmc, 1)
3589 WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0)
3590 WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0)
3591 WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0)
3593 #define DO_2M_CRYPTO(INSN, FEATURE, SIZE) \
3594 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3596 if (!dc_isar_feature(FEATURE, s) || a->size != SIZE) { \
3597 return false; \
3599 return do_2misc_vec(s, a, gen_##INSN); \
3602 DO_2M_CRYPTO(AESE, aa32_aes, 0)
3603 DO_2M_CRYPTO(AESD, aa32_aes, 0)
3604 DO_2M_CRYPTO(AESMC, aa32_aes, 0)
3605 DO_2M_CRYPTO(AESIMC, aa32_aes, 0)
3606 DO_2M_CRYPTO(SHA1H, aa32_sha1, 2)
3607 DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2)
3608 DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2)
3610 static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
3612 TCGv_i32 tmp;
3613 int pass;
3615 /* Handle a 2-reg-misc operation by iterating 32 bits at a time */
3616 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3617 return false;
3620 /* UNDEF accesses to D16-D31 if they don't exist. */
3621 if (!dc_isar_feature(aa32_simd_r32, s) &&
3622 ((a->vd | a->vm) & 0x10)) {
3623 return false;
3626 if (!fn) {
3627 return false;
3630 if ((a->vd | a->vm) & a->q) {
3631 return false;
3634 if (!vfp_access_check(s)) {
3635 return true;
3638 tmp = tcg_temp_new_i32();
3639 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
3640 read_neon_element32(tmp, a->vm, pass, MO_32);
3641 fn(tmp, tmp);
3642 write_neon_element32(tmp, a->vd, pass, MO_32);
3644 tcg_temp_free_i32(tmp);
3646 return true;
3649 static bool trans_VREV32(DisasContext *s, arg_2misc *a)
3651 static NeonGenOneOpFn * const fn[] = {
3652 tcg_gen_bswap32_i32,
3653 gen_swap_half,
3654 NULL,
3655 NULL,
3657 return do_2misc(s, a, fn[a->size]);
3660 static bool trans_VREV16(DisasContext *s, arg_2misc *a)
3662 if (a->size != 0) {
3663 return false;
3665 return do_2misc(s, a, gen_rev16);
3668 static bool trans_VCLS(DisasContext *s, arg_2misc *a)
3670 static NeonGenOneOpFn * const fn[] = {
3671 gen_helper_neon_cls_s8,
3672 gen_helper_neon_cls_s16,
3673 gen_helper_neon_cls_s32,
3674 NULL,
3676 return do_2misc(s, a, fn[a->size]);
3679 static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm)
3681 tcg_gen_clzi_i32(rd, rm, 32);
3684 static bool trans_VCLZ(DisasContext *s, arg_2misc *a)
3686 static NeonGenOneOpFn * const fn[] = {
3687 gen_helper_neon_clz_u8,
3688 gen_helper_neon_clz_u16,
3689 do_VCLZ_32,
3690 NULL,
3692 return do_2misc(s, a, fn[a->size]);
3695 static bool trans_VCNT(DisasContext *s, arg_2misc *a)
3697 if (a->size != 0) {
3698 return false;
3700 return do_2misc(s, a, gen_helper_neon_cnt_u8);
3703 static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3704 uint32_t oprsz, uint32_t maxsz)
3706 tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs,
3707 vece == MO_16 ? 0x7fff : 0x7fffffff,
3708 oprsz, maxsz);
3711 static bool trans_VABS_F(DisasContext *s, arg_2misc *a)
3713 if (a->size == MO_16) {
3714 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3715 return false;
3717 } else if (a->size != MO_32) {
3718 return false;
3720 return do_2misc_vec(s, a, gen_VABS_F);
3723 static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3724 uint32_t oprsz, uint32_t maxsz)
3726 tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs,
3727 vece == MO_16 ? 0x8000 : 0x80000000,
3728 oprsz, maxsz);
3731 static bool trans_VNEG_F(DisasContext *s, arg_2misc *a)
3733 if (a->size == MO_16) {
3734 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3735 return false;
3737 } else if (a->size != MO_32) {
3738 return false;
3740 return do_2misc_vec(s, a, gen_VNEG_F);
3743 static bool trans_VRECPE(DisasContext *s, arg_2misc *a)
3745 if (a->size != 2) {
3746 return false;
3748 return do_2misc(s, a, gen_helper_recpe_u32);
3751 static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a)
3753 if (a->size != 2) {
3754 return false;
3756 return do_2misc(s, a, gen_helper_rsqrte_u32);
3759 #define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \
3760 static void WRAPNAME(TCGv_i32 d, TCGv_i32 m) \
3762 FUNC(d, cpu_env, m); \
3765 WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8)
3766 WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16)
3767 WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32)
3768 WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8)
3769 WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16)
3770 WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32)
3772 static bool trans_VQABS(DisasContext *s, arg_2misc *a)
3774 static NeonGenOneOpFn * const fn[] = {
3775 gen_VQABS_s8,
3776 gen_VQABS_s16,
3777 gen_VQABS_s32,
3778 NULL,
3780 return do_2misc(s, a, fn[a->size]);
3783 static bool trans_VQNEG(DisasContext *s, arg_2misc *a)
3785 static NeonGenOneOpFn * const fn[] = {
3786 gen_VQNEG_s8,
3787 gen_VQNEG_s16,
3788 gen_VQNEG_s32,
3789 NULL,
3791 return do_2misc(s, a, fn[a->size]);
3794 #define DO_2MISC_FP_VEC(INSN, HFUNC, SFUNC) \
3795 static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \
3796 uint32_t rm_ofs, \
3797 uint32_t oprsz, uint32_t maxsz) \
3799 static gen_helper_gvec_2_ptr * const fns[4] = { \
3800 NULL, HFUNC, SFUNC, NULL, \
3801 }; \
3802 TCGv_ptr fpst; \
3803 fpst = fpstatus_ptr(vece == MO_16 ? FPST_STD_F16 : FPST_STD); \
3804 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, 0, \
3805 fns[vece]); \
3806 tcg_temp_free_ptr(fpst); \
3808 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3810 if (a->size == MO_16) { \
3811 if (!dc_isar_feature(aa32_fp16_arith, s)) { \
3812 return false; \
3814 } else if (a->size != MO_32) { \
3815 return false; \
3817 return do_2misc_vec(s, a, gen_##INSN); \
3820 DO_2MISC_FP_VEC(VRECPE_F, gen_helper_gvec_frecpe_h, gen_helper_gvec_frecpe_s)
3821 DO_2MISC_FP_VEC(VRSQRTE_F, gen_helper_gvec_frsqrte_h, gen_helper_gvec_frsqrte_s)
3822 DO_2MISC_FP_VEC(VCGT0_F, gen_helper_gvec_fcgt0_h, gen_helper_gvec_fcgt0_s)
3823 DO_2MISC_FP_VEC(VCGE0_F, gen_helper_gvec_fcge0_h, gen_helper_gvec_fcge0_s)
3824 DO_2MISC_FP_VEC(VCEQ0_F, gen_helper_gvec_fceq0_h, gen_helper_gvec_fceq0_s)
3825 DO_2MISC_FP_VEC(VCLT0_F, gen_helper_gvec_fclt0_h, gen_helper_gvec_fclt0_s)
3826 DO_2MISC_FP_VEC(VCLE0_F, gen_helper_gvec_fcle0_h, gen_helper_gvec_fcle0_s)
3827 DO_2MISC_FP_VEC(VCVT_FS, gen_helper_gvec_sstoh, gen_helper_gvec_sitos)
3828 DO_2MISC_FP_VEC(VCVT_FU, gen_helper_gvec_ustoh, gen_helper_gvec_uitos)
3829 DO_2MISC_FP_VEC(VCVT_SF, gen_helper_gvec_tosszh, gen_helper_gvec_tosizs)
3830 DO_2MISC_FP_VEC(VCVT_UF, gen_helper_gvec_touszh, gen_helper_gvec_touizs)
3832 DO_2MISC_FP_VEC(VRINTX_impl, gen_helper_gvec_vrintx_h, gen_helper_gvec_vrintx_s)
3834 static bool trans_VRINTX(DisasContext *s, arg_2misc *a)
3836 if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
3837 return false;
3839 return trans_VRINTX_impl(s, a);
3842 #define DO_VEC_RMODE(INSN, RMODE, OP) \
3843 static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \
3844 uint32_t rm_ofs, \
3845 uint32_t oprsz, uint32_t maxsz) \
3847 static gen_helper_gvec_2_ptr * const fns[4] = { \
3848 NULL, \
3849 gen_helper_gvec_##OP##h, \
3850 gen_helper_gvec_##OP##s, \
3851 NULL, \
3852 }; \
3853 TCGv_ptr fpst; \
3854 fpst = fpstatus_ptr(vece == 1 ? FPST_STD_F16 : FPST_STD); \
3855 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, \
3856 arm_rmode_to_sf(RMODE), fns[vece]); \
3857 tcg_temp_free_ptr(fpst); \
3859 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3861 if (!arm_dc_feature(s, ARM_FEATURE_V8)) { \
3862 return false; \
3864 if (a->size == MO_16) { \
3865 if (!dc_isar_feature(aa32_fp16_arith, s)) { \
3866 return false; \
3868 } else if (a->size != MO_32) { \
3869 return false; \
3871 return do_2misc_vec(s, a, gen_##INSN); \
3874 DO_VEC_RMODE(VCVTAU, FPROUNDING_TIEAWAY, vcvt_rm_u)
3875 DO_VEC_RMODE(VCVTAS, FPROUNDING_TIEAWAY, vcvt_rm_s)
3876 DO_VEC_RMODE(VCVTNU, FPROUNDING_TIEEVEN, vcvt_rm_u)
3877 DO_VEC_RMODE(VCVTNS, FPROUNDING_TIEEVEN, vcvt_rm_s)
3878 DO_VEC_RMODE(VCVTPU, FPROUNDING_POSINF, vcvt_rm_u)
3879 DO_VEC_RMODE(VCVTPS, FPROUNDING_POSINF, vcvt_rm_s)
3880 DO_VEC_RMODE(VCVTMU, FPROUNDING_NEGINF, vcvt_rm_u)
3881 DO_VEC_RMODE(VCVTMS, FPROUNDING_NEGINF, vcvt_rm_s)
3883 DO_VEC_RMODE(VRINTN, FPROUNDING_TIEEVEN, vrint_rm_)
3884 DO_VEC_RMODE(VRINTA, FPROUNDING_TIEAWAY, vrint_rm_)
3885 DO_VEC_RMODE(VRINTZ, FPROUNDING_ZERO, vrint_rm_)
3886 DO_VEC_RMODE(VRINTM, FPROUNDING_NEGINF, vrint_rm_)
3887 DO_VEC_RMODE(VRINTP, FPROUNDING_POSINF, vrint_rm_)
3889 static bool trans_VSWP(DisasContext *s, arg_2misc *a)
3891 TCGv_i64 rm, rd;
3892 int pass;
3894 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3895 return false;
3898 /* UNDEF accesses to D16-D31 if they don't exist. */
3899 if (!dc_isar_feature(aa32_simd_r32, s) &&
3900 ((a->vd | a->vm) & 0x10)) {
3901 return false;
3904 if (a->size != 0) {
3905 return false;
3908 if ((a->vd | a->vm) & a->q) {
3909 return false;
3912 if (!vfp_access_check(s)) {
3913 return true;
3916 rm = tcg_temp_new_i64();
3917 rd = tcg_temp_new_i64();
3918 for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
3919 read_neon_element64(rm, a->vm, pass, MO_64);
3920 read_neon_element64(rd, a->vd, pass, MO_64);
3921 write_neon_element64(rm, a->vd, pass, MO_64);
3922 write_neon_element64(rd, a->vm, pass, MO_64);
3924 tcg_temp_free_i64(rm);
3925 tcg_temp_free_i64(rd);
3927 return true;
3929 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3931 TCGv_i32 rd, tmp;
3933 rd = tcg_temp_new_i32();
3934 tmp = tcg_temp_new_i32();
3936 tcg_gen_shli_i32(rd, t0, 8);
3937 tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3938 tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3939 tcg_gen_or_i32(rd, rd, tmp);
3941 tcg_gen_shri_i32(t1, t1, 8);
3942 tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3943 tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3944 tcg_gen_or_i32(t1, t1, tmp);
3945 tcg_gen_mov_i32(t0, rd);
3947 tcg_temp_free_i32(tmp);
3948 tcg_temp_free_i32(rd);
3951 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3953 TCGv_i32 rd, tmp;
3955 rd = tcg_temp_new_i32();
3956 tmp = tcg_temp_new_i32();
3958 tcg_gen_shli_i32(rd, t0, 16);
3959 tcg_gen_andi_i32(tmp, t1, 0xffff);
3960 tcg_gen_or_i32(rd, rd, tmp);
3961 tcg_gen_shri_i32(t1, t1, 16);
3962 tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3963 tcg_gen_or_i32(t1, t1, tmp);
3964 tcg_gen_mov_i32(t0, rd);
3966 tcg_temp_free_i32(tmp);
3967 tcg_temp_free_i32(rd);
3970 static bool trans_VTRN(DisasContext *s, arg_2misc *a)
3972 TCGv_i32 tmp, tmp2;
3973 int pass;
3975 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3976 return false;
3979 /* UNDEF accesses to D16-D31 if they don't exist. */
3980 if (!dc_isar_feature(aa32_simd_r32, s) &&
3981 ((a->vd | a->vm) & 0x10)) {
3982 return false;
3985 if ((a->vd | a->vm) & a->q) {
3986 return false;
3989 if (a->size == 3) {
3990 return false;
3993 if (!vfp_access_check(s)) {
3994 return true;
3997 tmp = tcg_temp_new_i32();
3998 tmp2 = tcg_temp_new_i32();
3999 if (a->size == MO_32) {
4000 for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) {
4001 read_neon_element32(tmp, a->vm, pass, MO_32);
4002 read_neon_element32(tmp2, a->vd, pass + 1, MO_32);
4003 write_neon_element32(tmp2, a->vm, pass, MO_32);
4004 write_neon_element32(tmp, a->vd, pass + 1, MO_32);
4006 } else {
4007 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
4008 read_neon_element32(tmp, a->vm, pass, MO_32);
4009 read_neon_element32(tmp2, a->vd, pass, MO_32);
4010 if (a->size == MO_8) {
4011 gen_neon_trn_u8(tmp, tmp2);
4012 } else {
4013 gen_neon_trn_u16(tmp, tmp2);
4015 write_neon_element32(tmp2, a->vm, pass, MO_32);
4016 write_neon_element32(tmp, a->vd, pass, MO_32);
4019 tcg_temp_free_i32(tmp);
4020 tcg_temp_free_i32(tmp2);
4021 return true;