2 * ARM translation: AArch32 Neon instructions
4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
7 * Copyright (c) 2020 Linaro, Ltd.
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
24 * This file is intended to be included from translate.c; it uses
25 * some macros and definitions provided by that file.
26 * It might be possible to convert it to a standalone .c file eventually.
29 static inline int plus1(DisasContext
*s
, int x
)
34 /* Include the generated Neon decoder */
35 #include "decode-neon-dp.inc.c"
36 #include "decode-neon-ls.inc.c"
37 #include "decode-neon-shared.inc.c"
39 static bool trans_VCMLA(DisasContext
*s
, arg_VCMLA
*a
)
43 gen_helper_gvec_3_ptr
*fn_gvec_ptr
;
45 if (!dc_isar_feature(aa32_vcma
, s
)
46 || (!a
->size
&& !dc_isar_feature(aa32_fp16_arith
, s
))) {
50 /* UNDEF accesses to D16-D31 if they don't exist. */
51 if (!dc_isar_feature(aa32_simd_r32
, s
) &&
52 ((a
->vd
| a
->vn
| a
->vm
) & 0x10)) {
56 if ((a
->vn
| a
->vm
| a
->vd
) & a
->q
) {
60 if (!vfp_access_check(s
)) {
64 opr_sz
= (1 + a
->q
) * 8;
65 fpst
= get_fpstatus_ptr(1);
66 fn_gvec_ptr
= a
->size
? gen_helper_gvec_fcmlas
: gen_helper_gvec_fcmlah
;
67 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a
->vd
),
68 vfp_reg_offset(1, a
->vn
),
69 vfp_reg_offset(1, a
->vm
),
70 fpst
, opr_sz
, opr_sz
, a
->rot
,
72 tcg_temp_free_ptr(fpst
);
76 static bool trans_VCADD(DisasContext
*s
, arg_VCADD
*a
)
80 gen_helper_gvec_3_ptr
*fn_gvec_ptr
;
82 if (!dc_isar_feature(aa32_vcma
, s
)
83 || (!a
->size
&& !dc_isar_feature(aa32_fp16_arith
, s
))) {
87 /* UNDEF accesses to D16-D31 if they don't exist. */
88 if (!dc_isar_feature(aa32_simd_r32
, s
) &&
89 ((a
->vd
| a
->vn
| a
->vm
) & 0x10)) {
93 if ((a
->vn
| a
->vm
| a
->vd
) & a
->q
) {
97 if (!vfp_access_check(s
)) {
101 opr_sz
= (1 + a
->q
) * 8;
102 fpst
= get_fpstatus_ptr(1);
103 fn_gvec_ptr
= a
->size
? gen_helper_gvec_fcadds
: gen_helper_gvec_fcaddh
;
104 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a
->vd
),
105 vfp_reg_offset(1, a
->vn
),
106 vfp_reg_offset(1, a
->vm
),
107 fpst
, opr_sz
, opr_sz
, a
->rot
,
109 tcg_temp_free_ptr(fpst
);
113 static bool trans_VDOT(DisasContext
*s
, arg_VDOT
*a
)
116 gen_helper_gvec_3
*fn_gvec
;
118 if (!dc_isar_feature(aa32_dp
, s
)) {
122 /* UNDEF accesses to D16-D31 if they don't exist. */
123 if (!dc_isar_feature(aa32_simd_r32
, s
) &&
124 ((a
->vd
| a
->vn
| a
->vm
) & 0x10)) {
128 if ((a
->vn
| a
->vm
| a
->vd
) & a
->q
) {
132 if (!vfp_access_check(s
)) {
136 opr_sz
= (1 + a
->q
) * 8;
137 fn_gvec
= a
->u
? gen_helper_gvec_udot_b
: gen_helper_gvec_sdot_b
;
138 tcg_gen_gvec_3_ool(vfp_reg_offset(1, a
->vd
),
139 vfp_reg_offset(1, a
->vn
),
140 vfp_reg_offset(1, a
->vm
),
141 opr_sz
, opr_sz
, 0, fn_gvec
);
145 static bool trans_VFML(DisasContext
*s
, arg_VFML
*a
)
149 if (!dc_isar_feature(aa32_fhm
, s
)) {
153 /* UNDEF accesses to D16-D31 if they don't exist. */
154 if (!dc_isar_feature(aa32_simd_r32
, s
) &&
163 if (!vfp_access_check(s
)) {
167 opr_sz
= (1 + a
->q
) * 8;
168 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a
->vd
),
169 vfp_reg_offset(a
->q
, a
->vn
),
170 vfp_reg_offset(a
->q
, a
->vm
),
171 cpu_env
, opr_sz
, opr_sz
, a
->s
, /* is_2 == 0 */
172 gen_helper_gvec_fmlal_a32
);
176 static bool trans_VCMLA_scalar(DisasContext
*s
, arg_VCMLA_scalar
*a
)
178 gen_helper_gvec_3_ptr
*fn_gvec_ptr
;
182 if (!dc_isar_feature(aa32_vcma
, s
)) {
185 if (a
->size
== 0 && !dc_isar_feature(aa32_fp16_arith
, s
)) {
189 /* UNDEF accesses to D16-D31 if they don't exist. */
190 if (!dc_isar_feature(aa32_simd_r32
, s
) &&
191 ((a
->vd
| a
->vn
| a
->vm
) & 0x10)) {
195 if ((a
->vd
| a
->vn
) & a
->q
) {
199 if (!vfp_access_check(s
)) {
203 fn_gvec_ptr
= (a
->size
? gen_helper_gvec_fcmlas_idx
204 : gen_helper_gvec_fcmlah_idx
);
205 opr_sz
= (1 + a
->q
) * 8;
206 fpst
= get_fpstatus_ptr(1);
207 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a
->vd
),
208 vfp_reg_offset(1, a
->vn
),
209 vfp_reg_offset(1, a
->vm
),
210 fpst
, opr_sz
, opr_sz
,
211 (a
->index
<< 2) | a
->rot
, fn_gvec_ptr
);
212 tcg_temp_free_ptr(fpst
);
216 static bool trans_VDOT_scalar(DisasContext
*s
, arg_VDOT_scalar
*a
)
218 gen_helper_gvec_3
*fn_gvec
;
222 if (!dc_isar_feature(aa32_dp
, s
)) {
226 /* UNDEF accesses to D16-D31 if they don't exist. */
227 if (!dc_isar_feature(aa32_simd_r32
, s
) &&
228 ((a
->vd
| a
->vn
) & 0x10)) {
232 if ((a
->vd
| a
->vn
) & a
->q
) {
236 if (!vfp_access_check(s
)) {
240 fn_gvec
= a
->u
? gen_helper_gvec_udot_idx_b
: gen_helper_gvec_sdot_idx_b
;
241 opr_sz
= (1 + a
->q
) * 8;
242 fpst
= get_fpstatus_ptr(1);
243 tcg_gen_gvec_3_ool(vfp_reg_offset(1, a
->vd
),
244 vfp_reg_offset(1, a
->vn
),
245 vfp_reg_offset(1, a
->rm
),
246 opr_sz
, opr_sz
, a
->index
, fn_gvec
);
247 tcg_temp_free_ptr(fpst
);
251 static bool trans_VFML_scalar(DisasContext
*s
, arg_VFML_scalar
*a
)
255 if (!dc_isar_feature(aa32_fhm
, s
)) {
259 /* UNDEF accesses to D16-D31 if they don't exist. */
260 if (!dc_isar_feature(aa32_simd_r32
, s
) &&
261 ((a
->vd
& 0x10) || (a
->q
&& (a
->vn
& 0x10)))) {
269 if (!vfp_access_check(s
)) {
273 opr_sz
= (1 + a
->q
) * 8;
274 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a
->vd
),
275 vfp_reg_offset(a
->q
, a
->vn
),
276 vfp_reg_offset(a
->q
, a
->rm
),
277 cpu_env
, opr_sz
, opr_sz
,
278 (a
->index
<< 2) | a
->s
, /* is_2 == 0 */
279 gen_helper_gvec_fmlal_idx_a32
);
287 } const neon_ls_element_type
[11] = {
301 static void gen_neon_ldst_base_update(DisasContext
*s
, int rm
, int rn
,
307 base
= load_reg(s
, rn
);
309 tcg_gen_addi_i32(base
, base
, stride
);
312 index
= load_reg(s
, rm
);
313 tcg_gen_add_i32(base
, base
, index
);
314 tcg_temp_free_i32(index
);
316 store_reg(s
, rn
, base
);
320 static bool trans_VLDST_multiple(DisasContext
*s
, arg_VLDST_multiple
*a
)
322 /* Neon load/store multiple structures */
323 int nregs
, interleave
, spacing
, reg
, n
;
324 MemOp endian
= s
->be_data
;
325 int mmu_idx
= get_mem_index(s
);
330 if (!arm_dc_feature(s
, ARM_FEATURE_NEON
)) {
334 /* UNDEF accesses to D16-D31 if they don't exist */
335 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vd
& 0x10)) {
341 /* Catch UNDEF cases for bad values of align field */
342 switch (a
->itype
& 0xc) {
356 nregs
= neon_ls_element_type
[a
->itype
].nregs
;
357 interleave
= neon_ls_element_type
[a
->itype
].interleave
;
358 spacing
= neon_ls_element_type
[a
->itype
].spacing
;
359 if (size
== 3 && (interleave
| spacing
) != 1) {
363 if (!vfp_access_check(s
)) {
367 /* For our purposes, bytes are always little-endian. */
372 * Consecutive little-endian elements from a single register
373 * can be promoted to a larger little-endian operation.
375 if (interleave
== 1 && endian
== MO_LE
) {
378 tmp64
= tcg_temp_new_i64();
379 addr
= tcg_temp_new_i32();
380 tmp
= tcg_const_i32(1 << size
);
381 load_reg_var(s
, addr
, a
->rn
);
382 for (reg
= 0; reg
< nregs
; reg
++) {
383 for (n
= 0; n
< 8 >> size
; n
++) {
385 for (xs
= 0; xs
< interleave
; xs
++) {
386 int tt
= a
->vd
+ reg
+ spacing
* xs
;
389 gen_aa32_ld_i64(s
, tmp64
, addr
, mmu_idx
, endian
| size
);
390 neon_store_element64(tt
, n
, size
, tmp64
);
392 neon_load_element64(tmp64
, tt
, n
, size
);
393 gen_aa32_st_i64(s
, tmp64
, addr
, mmu_idx
, endian
| size
);
395 tcg_gen_add_i32(addr
, addr
, tmp
);
399 tcg_temp_free_i32(addr
);
400 tcg_temp_free_i32(tmp
);
401 tcg_temp_free_i64(tmp64
);
403 gen_neon_ldst_base_update(s
, a
->rm
, a
->rn
, nregs
* interleave
* 8);
407 static bool trans_VLD_all_lanes(DisasContext
*s
, arg_VLD_all_lanes
*a
)
409 /* Neon load single structure to all lanes */
410 int reg
, stride
, vec_size
;
413 int nregs
= a
->n
+ 1;
416 if (!arm_dc_feature(s
, ARM_FEATURE_NEON
)) {
420 /* UNDEF accesses to D16-D31 if they don't exist */
421 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vd
& 0x10)) {
426 if (nregs
!= 4 || a
->a
== 0) {
429 /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */
432 if (nregs
== 1 && a
->a
== 1 && size
== 0) {
435 if (nregs
== 3 && a
->a
== 1) {
439 if (!vfp_access_check(s
)) {
444 * VLD1 to all lanes: T bit indicates how many Dregs to write.
445 * VLD2/3/4 to all lanes: T bit indicates register stride.
447 stride
= a
->t
? 2 : 1;
448 vec_size
= nregs
== 1 ? stride
* 8 : 8;
450 tmp
= tcg_temp_new_i32();
451 addr
= tcg_temp_new_i32();
452 load_reg_var(s
, addr
, a
->rn
);
453 for (reg
= 0; reg
< nregs
; reg
++) {
454 gen_aa32_ld_i32(s
, tmp
, addr
, get_mem_index(s
),
456 if ((vd
& 1) && vec_size
== 16) {
458 * We cannot write 16 bytes at once because the
459 * destination is unaligned.
461 tcg_gen_gvec_dup_i32(size
, neon_reg_offset(vd
, 0),
463 tcg_gen_gvec_mov(0, neon_reg_offset(vd
+ 1, 0),
464 neon_reg_offset(vd
, 0), 8, 8);
466 tcg_gen_gvec_dup_i32(size
, neon_reg_offset(vd
, 0),
467 vec_size
, vec_size
, tmp
);
469 tcg_gen_addi_i32(addr
, addr
, 1 << size
);
472 tcg_temp_free_i32(tmp
);
473 tcg_temp_free_i32(addr
);
475 gen_neon_ldst_base_update(s
, a
->rm
, a
->rn
, (1 << size
) * nregs
);
480 static bool trans_VLDST_single(DisasContext
*s
, arg_VLDST_single
*a
)
482 /* Neon load/store single structure to one lane */
484 int nregs
= a
->n
+ 1;
488 if (!arm_dc_feature(s
, ARM_FEATURE_NEON
)) {
492 /* UNDEF accesses to D16-D31 if they don't exist */
493 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vd
& 0x10)) {
497 /* Catch the UNDEF cases. This is unavoidably a bit messy. */
500 if (((a
->align
& (1 << a
->size
)) != 0) ||
501 (a
->size
== 2 && ((a
->align
& 3) == 1 || (a
->align
& 3) == 2))) {
506 if ((a
->align
& 1) != 0) {
511 if (a
->size
== 2 && (a
->align
& 2) != 0) {
516 if ((a
->size
== 2) && ((a
->align
& 3) == 3)) {
523 if ((vd
+ a
->stride
* (nregs
- 1)) > 31) {
525 * Attempts to write off the end of the register file are
526 * UNPREDICTABLE; we choose to UNDEF because otherwise we would
527 * access off the end of the array that holds the register data.
532 if (!vfp_access_check(s
)) {
536 tmp
= tcg_temp_new_i32();
537 addr
= tcg_temp_new_i32();
538 load_reg_var(s
, addr
, a
->rn
);
540 * TODO: if we implemented alignment exceptions, we should check
541 * addr against the alignment encoded in a->align here.
543 for (reg
= 0; reg
< nregs
; reg
++) {
545 gen_aa32_ld_i32(s
, tmp
, addr
, get_mem_index(s
),
546 s
->be_data
| a
->size
);
547 neon_store_element(vd
, a
->reg_idx
, a
->size
, tmp
);
549 neon_load_element(tmp
, vd
, a
->reg_idx
, a
->size
);
550 gen_aa32_st_i32(s
, tmp
, addr
, get_mem_index(s
),
551 s
->be_data
| a
->size
);
554 tcg_gen_addi_i32(addr
, addr
, 1 << a
->size
);
556 tcg_temp_free_i32(addr
);
557 tcg_temp_free_i32(tmp
);
559 gen_neon_ldst_base_update(s
, a
->rm
, a
->rn
, (1 << a
->size
) * nregs
);
564 static bool do_3same(DisasContext
*s
, arg_3same
*a
, GVecGen3Fn fn
)
566 int vec_size
= a
->q
? 16 : 8;
567 int rd_ofs
= neon_reg_offset(a
->vd
, 0);
568 int rn_ofs
= neon_reg_offset(a
->vn
, 0);
569 int rm_ofs
= neon_reg_offset(a
->vm
, 0);
571 if (!arm_dc_feature(s
, ARM_FEATURE_NEON
)) {
575 /* UNDEF accesses to D16-D31 if they don't exist. */
576 if (!dc_isar_feature(aa32_simd_r32
, s
) &&
577 ((a
->vd
| a
->vn
| a
->vm
) & 0x10)) {
581 if ((a
->vn
| a
->vm
| a
->vd
) & a
->q
) {
585 if (!vfp_access_check(s
)) {
589 fn(a
->size
, rd_ofs
, rn_ofs
, rm_ofs
, vec_size
, vec_size
);
593 #define DO_3SAME(INSN, FUNC) \
594 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
596 return do_3same(s, a, FUNC); \
599 DO_3SAME(VADD
, tcg_gen_gvec_add
)
600 DO_3SAME(VSUB
, tcg_gen_gvec_sub
)
601 DO_3SAME(VAND
, tcg_gen_gvec_and
)
602 DO_3SAME(VBIC
, tcg_gen_gvec_andc
)
603 DO_3SAME(VORR
, tcg_gen_gvec_or
)
604 DO_3SAME(VORN
, tcg_gen_gvec_orc
)
605 DO_3SAME(VEOR
, tcg_gen_gvec_xor
)
607 /* These insns are all gvec_bitsel but with the inputs in various orders. */
608 #define DO_3SAME_BITSEL(INSN, O1, O2, O3) \
609 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
610 uint32_t rn_ofs, uint32_t rm_ofs, \
611 uint32_t oprsz, uint32_t maxsz) \
613 tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz); \
615 DO_3SAME(INSN, gen_##INSN##_3s)
617 DO_3SAME_BITSEL(VBSL
, rd_ofs
, rn_ofs
, rm_ofs
)
618 DO_3SAME_BITSEL(VBIT
, rm_ofs
, rn_ofs
, rd_ofs
)
619 DO_3SAME_BITSEL(VBIF
, rm_ofs
, rd_ofs
, rn_ofs
)
621 #define DO_3SAME_NO_SZ_3(INSN, FUNC) \
622 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
624 if (a->size == 3) { \
627 return do_3same(s, a, FUNC); \
630 DO_3SAME_NO_SZ_3(VMAX_S
, tcg_gen_gvec_smax
)
631 DO_3SAME_NO_SZ_3(VMAX_U
, tcg_gen_gvec_umax
)
632 DO_3SAME_NO_SZ_3(VMIN_S
, tcg_gen_gvec_smin
)
633 DO_3SAME_NO_SZ_3(VMIN_U
, tcg_gen_gvec_umin
)
635 #define DO_3SAME_CMP(INSN, COND) \
636 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
637 uint32_t rn_ofs, uint32_t rm_ofs, \
638 uint32_t oprsz, uint32_t maxsz) \
640 tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \
642 DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
644 DO_3SAME_CMP(VCGT_S
, TCG_COND_GT
)
645 DO_3SAME_CMP(VCGT_U
, TCG_COND_GTU
)
646 DO_3SAME_CMP(VCGE_S
, TCG_COND_GE
)
647 DO_3SAME_CMP(VCGE_U
, TCG_COND_GEU
)
648 DO_3SAME_CMP(VCEQ
, TCG_COND_EQ
)
650 static void gen_VTST_3s(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
651 uint32_t rm_ofs
, uint32_t oprsz
, uint32_t maxsz
)
653 tcg_gen_gvec_3(rd_ofs
, rn_ofs
, rm_ofs
, oprsz
, maxsz
, &cmtst_op
[vece
]);
655 DO_3SAME_NO_SZ_3(VTST
, gen_VTST_3s
)