2 * ARM translation: AArch32 Neon instructions
4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
7 * Copyright (c) 2020 Linaro, Ltd.
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
24 * This file is intended to be included from translate.c; it uses
25 * some macros and definitions provided by that file.
26 * It might be possible to convert it to a standalone .c file eventually.
29 static inline int plus1(DisasContext
*s
, int x
)
34 /* Include the generated Neon decoder */
35 #include "decode-neon-dp.inc.c"
36 #include "decode-neon-ls.inc.c"
37 #include "decode-neon-shared.inc.c"
39 static bool trans_VCMLA(DisasContext
*s
, arg_VCMLA
*a
)
43 gen_helper_gvec_3_ptr
*fn_gvec_ptr
;
45 if (!dc_isar_feature(aa32_vcma
, s
)
46 || (!a
->size
&& !dc_isar_feature(aa32_fp16_arith
, s
))) {
50 /* UNDEF accesses to D16-D31 if they don't exist. */
51 if (!dc_isar_feature(aa32_simd_r32
, s
) &&
52 ((a
->vd
| a
->vn
| a
->vm
) & 0x10)) {
56 if ((a
->vn
| a
->vm
| a
->vd
) & a
->q
) {
60 if (!vfp_access_check(s
)) {
64 opr_sz
= (1 + a
->q
) * 8;
65 fpst
= get_fpstatus_ptr(1);
66 fn_gvec_ptr
= a
->size
? gen_helper_gvec_fcmlas
: gen_helper_gvec_fcmlah
;
67 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a
->vd
),
68 vfp_reg_offset(1, a
->vn
),
69 vfp_reg_offset(1, a
->vm
),
70 fpst
, opr_sz
, opr_sz
, a
->rot
,
72 tcg_temp_free_ptr(fpst
);
76 static bool trans_VCADD(DisasContext
*s
, arg_VCADD
*a
)
80 gen_helper_gvec_3_ptr
*fn_gvec_ptr
;
82 if (!dc_isar_feature(aa32_vcma
, s
)
83 || (!a
->size
&& !dc_isar_feature(aa32_fp16_arith
, s
))) {
87 /* UNDEF accesses to D16-D31 if they don't exist. */
88 if (!dc_isar_feature(aa32_simd_r32
, s
) &&
89 ((a
->vd
| a
->vn
| a
->vm
) & 0x10)) {
93 if ((a
->vn
| a
->vm
| a
->vd
) & a
->q
) {
97 if (!vfp_access_check(s
)) {
101 opr_sz
= (1 + a
->q
) * 8;
102 fpst
= get_fpstatus_ptr(1);
103 fn_gvec_ptr
= a
->size
? gen_helper_gvec_fcadds
: gen_helper_gvec_fcaddh
;
104 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a
->vd
),
105 vfp_reg_offset(1, a
->vn
),
106 vfp_reg_offset(1, a
->vm
),
107 fpst
, opr_sz
, opr_sz
, a
->rot
,
109 tcg_temp_free_ptr(fpst
);
113 static bool trans_VDOT(DisasContext
*s
, arg_VDOT
*a
)
116 gen_helper_gvec_3
*fn_gvec
;
118 if (!dc_isar_feature(aa32_dp
, s
)) {
122 /* UNDEF accesses to D16-D31 if they don't exist. */
123 if (!dc_isar_feature(aa32_simd_r32
, s
) &&
124 ((a
->vd
| a
->vn
| a
->vm
) & 0x10)) {
128 if ((a
->vn
| a
->vm
| a
->vd
) & a
->q
) {
132 if (!vfp_access_check(s
)) {
136 opr_sz
= (1 + a
->q
) * 8;
137 fn_gvec
= a
->u
? gen_helper_gvec_udot_b
: gen_helper_gvec_sdot_b
;
138 tcg_gen_gvec_3_ool(vfp_reg_offset(1, a
->vd
),
139 vfp_reg_offset(1, a
->vn
),
140 vfp_reg_offset(1, a
->vm
),
141 opr_sz
, opr_sz
, 0, fn_gvec
);
145 static bool trans_VFML(DisasContext
*s
, arg_VFML
*a
)
149 if (!dc_isar_feature(aa32_fhm
, s
)) {
153 /* UNDEF accesses to D16-D31 if they don't exist. */
154 if (!dc_isar_feature(aa32_simd_r32
, s
) &&
163 if (!vfp_access_check(s
)) {
167 opr_sz
= (1 + a
->q
) * 8;
168 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a
->vd
),
169 vfp_reg_offset(a
->q
, a
->vn
),
170 vfp_reg_offset(a
->q
, a
->vm
),
171 cpu_env
, opr_sz
, opr_sz
, a
->s
, /* is_2 == 0 */
172 gen_helper_gvec_fmlal_a32
);
176 static bool trans_VCMLA_scalar(DisasContext
*s
, arg_VCMLA_scalar
*a
)
178 gen_helper_gvec_3_ptr
*fn_gvec_ptr
;
182 if (!dc_isar_feature(aa32_vcma
, s
)) {
185 if (a
->size
== 0 && !dc_isar_feature(aa32_fp16_arith
, s
)) {
189 /* UNDEF accesses to D16-D31 if they don't exist. */
190 if (!dc_isar_feature(aa32_simd_r32
, s
) &&
191 ((a
->vd
| a
->vn
| a
->vm
) & 0x10)) {
195 if ((a
->vd
| a
->vn
) & a
->q
) {
199 if (!vfp_access_check(s
)) {
203 fn_gvec_ptr
= (a
->size
? gen_helper_gvec_fcmlas_idx
204 : gen_helper_gvec_fcmlah_idx
);
205 opr_sz
= (1 + a
->q
) * 8;
206 fpst
= get_fpstatus_ptr(1);
207 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a
->vd
),
208 vfp_reg_offset(1, a
->vn
),
209 vfp_reg_offset(1, a
->vm
),
210 fpst
, opr_sz
, opr_sz
,
211 (a
->index
<< 2) | a
->rot
, fn_gvec_ptr
);
212 tcg_temp_free_ptr(fpst
);
216 static bool trans_VDOT_scalar(DisasContext
*s
, arg_VDOT_scalar
*a
)
218 gen_helper_gvec_3
*fn_gvec
;
222 if (!dc_isar_feature(aa32_dp
, s
)) {
226 /* UNDEF accesses to D16-D31 if they don't exist. */
227 if (!dc_isar_feature(aa32_simd_r32
, s
) &&
228 ((a
->vd
| a
->vn
) & 0x10)) {
232 if ((a
->vd
| a
->vn
) & a
->q
) {
236 if (!vfp_access_check(s
)) {
240 fn_gvec
= a
->u
? gen_helper_gvec_udot_idx_b
: gen_helper_gvec_sdot_idx_b
;
241 opr_sz
= (1 + a
->q
) * 8;
242 fpst
= get_fpstatus_ptr(1);
243 tcg_gen_gvec_3_ool(vfp_reg_offset(1, a
->vd
),
244 vfp_reg_offset(1, a
->vn
),
245 vfp_reg_offset(1, a
->rm
),
246 opr_sz
, opr_sz
, a
->index
, fn_gvec
);
247 tcg_temp_free_ptr(fpst
);
251 static bool trans_VFML_scalar(DisasContext
*s
, arg_VFML_scalar
*a
)
255 if (!dc_isar_feature(aa32_fhm
, s
)) {
259 /* UNDEF accesses to D16-D31 if they don't exist. */
260 if (!dc_isar_feature(aa32_simd_r32
, s
) &&
261 ((a
->vd
& 0x10) || (a
->q
&& (a
->vn
& 0x10)))) {
269 if (!vfp_access_check(s
)) {
273 opr_sz
= (1 + a
->q
) * 8;
274 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a
->vd
),
275 vfp_reg_offset(a
->q
, a
->vn
),
276 vfp_reg_offset(a
->q
, a
->rm
),
277 cpu_env
, opr_sz
, opr_sz
,
278 (a
->index
<< 2) | a
->s
, /* is_2 == 0 */
279 gen_helper_gvec_fmlal_idx_a32
);
287 } const neon_ls_element_type
[11] = {
301 static void gen_neon_ldst_base_update(DisasContext
*s
, int rm
, int rn
,
307 base
= load_reg(s
, rn
);
309 tcg_gen_addi_i32(base
, base
, stride
);
312 index
= load_reg(s
, rm
);
313 tcg_gen_add_i32(base
, base
, index
);
314 tcg_temp_free_i32(index
);
316 store_reg(s
, rn
, base
);
320 static bool trans_VLDST_multiple(DisasContext
*s
, arg_VLDST_multiple
*a
)
322 /* Neon load/store multiple structures */
323 int nregs
, interleave
, spacing
, reg
, n
;
324 MemOp endian
= s
->be_data
;
325 int mmu_idx
= get_mem_index(s
);
330 if (!arm_dc_feature(s
, ARM_FEATURE_NEON
)) {
334 /* UNDEF accesses to D16-D31 if they don't exist */
335 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vd
& 0x10)) {
341 /* Catch UNDEF cases for bad values of align field */
342 switch (a
->itype
& 0xc) {
356 nregs
= neon_ls_element_type
[a
->itype
].nregs
;
357 interleave
= neon_ls_element_type
[a
->itype
].interleave
;
358 spacing
= neon_ls_element_type
[a
->itype
].spacing
;
359 if (size
== 3 && (interleave
| spacing
) != 1) {
363 if (!vfp_access_check(s
)) {
367 /* For our purposes, bytes are always little-endian. */
372 * Consecutive little-endian elements from a single register
373 * can be promoted to a larger little-endian operation.
375 if (interleave
== 1 && endian
== MO_LE
) {
378 tmp64
= tcg_temp_new_i64();
379 addr
= tcg_temp_new_i32();
380 tmp
= tcg_const_i32(1 << size
);
381 load_reg_var(s
, addr
, a
->rn
);
382 for (reg
= 0; reg
< nregs
; reg
++) {
383 for (n
= 0; n
< 8 >> size
; n
++) {
385 for (xs
= 0; xs
< interleave
; xs
++) {
386 int tt
= a
->vd
+ reg
+ spacing
* xs
;
389 gen_aa32_ld_i64(s
, tmp64
, addr
, mmu_idx
, endian
| size
);
390 neon_store_element64(tt
, n
, size
, tmp64
);
392 neon_load_element64(tmp64
, tt
, n
, size
);
393 gen_aa32_st_i64(s
, tmp64
, addr
, mmu_idx
, endian
| size
);
395 tcg_gen_add_i32(addr
, addr
, tmp
);
399 tcg_temp_free_i32(addr
);
400 tcg_temp_free_i32(tmp
);
401 tcg_temp_free_i64(tmp64
);
403 gen_neon_ldst_base_update(s
, a
->rm
, a
->rn
, nregs
* interleave
* 8);
407 static bool trans_VLD_all_lanes(DisasContext
*s
, arg_VLD_all_lanes
*a
)
409 /* Neon load single structure to all lanes */
410 int reg
, stride
, vec_size
;
413 int nregs
= a
->n
+ 1;
416 if (!arm_dc_feature(s
, ARM_FEATURE_NEON
)) {
420 /* UNDEF accesses to D16-D31 if they don't exist */
421 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vd
& 0x10)) {
426 if (nregs
!= 4 || a
->a
== 0) {
429 /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */
432 if (nregs
== 1 && a
->a
== 1 && size
== 0) {
435 if (nregs
== 3 && a
->a
== 1) {
439 if (!vfp_access_check(s
)) {
444 * VLD1 to all lanes: T bit indicates how many Dregs to write.
445 * VLD2/3/4 to all lanes: T bit indicates register stride.
447 stride
= a
->t
? 2 : 1;
448 vec_size
= nregs
== 1 ? stride
* 8 : 8;
450 tmp
= tcg_temp_new_i32();
451 addr
= tcg_temp_new_i32();
452 load_reg_var(s
, addr
, a
->rn
);
453 for (reg
= 0; reg
< nregs
; reg
++) {
454 gen_aa32_ld_i32(s
, tmp
, addr
, get_mem_index(s
),
456 if ((vd
& 1) && vec_size
== 16) {
458 * We cannot write 16 bytes at once because the
459 * destination is unaligned.
461 tcg_gen_gvec_dup_i32(size
, neon_reg_offset(vd
, 0),
463 tcg_gen_gvec_mov(0, neon_reg_offset(vd
+ 1, 0),
464 neon_reg_offset(vd
, 0), 8, 8);
466 tcg_gen_gvec_dup_i32(size
, neon_reg_offset(vd
, 0),
467 vec_size
, vec_size
, tmp
);
469 tcg_gen_addi_i32(addr
, addr
, 1 << size
);
472 tcg_temp_free_i32(tmp
);
473 tcg_temp_free_i32(addr
);
475 gen_neon_ldst_base_update(s
, a
->rm
, a
->rn
, (1 << size
) * nregs
);
480 static bool trans_VLDST_single(DisasContext
*s
, arg_VLDST_single
*a
)
482 /* Neon load/store single structure to one lane */
484 int nregs
= a
->n
+ 1;
488 if (!arm_dc_feature(s
, ARM_FEATURE_NEON
)) {
492 /* UNDEF accesses to D16-D31 if they don't exist */
493 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vd
& 0x10)) {
497 /* Catch the UNDEF cases. This is unavoidably a bit messy. */
500 if (((a
->align
& (1 << a
->size
)) != 0) ||
501 (a
->size
== 2 && ((a
->align
& 3) == 1 || (a
->align
& 3) == 2))) {
506 if ((a
->align
& 1) != 0) {
511 if (a
->size
== 2 && (a
->align
& 2) != 0) {
516 if ((a
->size
== 2) && ((a
->align
& 3) == 3)) {
523 if ((vd
+ a
->stride
* (nregs
- 1)) > 31) {
525 * Attempts to write off the end of the register file are
526 * UNPREDICTABLE; we choose to UNDEF because otherwise we would
527 * access off the end of the array that holds the register data.
532 if (!vfp_access_check(s
)) {
536 tmp
= tcg_temp_new_i32();
537 addr
= tcg_temp_new_i32();
538 load_reg_var(s
, addr
, a
->rn
);
540 * TODO: if we implemented alignment exceptions, we should check
541 * addr against the alignment encoded in a->align here.
543 for (reg
= 0; reg
< nregs
; reg
++) {
545 gen_aa32_ld_i32(s
, tmp
, addr
, get_mem_index(s
),
546 s
->be_data
| a
->size
);
547 neon_store_element(vd
, a
->reg_idx
, a
->size
, tmp
);
549 neon_load_element(tmp
, vd
, a
->reg_idx
, a
->size
);
550 gen_aa32_st_i32(s
, tmp
, addr
, get_mem_index(s
),
551 s
->be_data
| a
->size
);
554 tcg_gen_addi_i32(addr
, addr
, 1 << a
->size
);
556 tcg_temp_free_i32(addr
);
557 tcg_temp_free_i32(tmp
);
559 gen_neon_ldst_base_update(s
, a
->rm
, a
->rn
, (1 << a
->size
) * nregs
);
564 static bool do_3same(DisasContext
*s
, arg_3same
*a
, GVecGen3Fn fn
)
566 int vec_size
= a
->q
? 16 : 8;
567 int rd_ofs
= neon_reg_offset(a
->vd
, 0);
568 int rn_ofs
= neon_reg_offset(a
->vn
, 0);
569 int rm_ofs
= neon_reg_offset(a
->vm
, 0);
571 if (!arm_dc_feature(s
, ARM_FEATURE_NEON
)) {
575 /* UNDEF accesses to D16-D31 if they don't exist. */
576 if (!dc_isar_feature(aa32_simd_r32
, s
) &&
577 ((a
->vd
| a
->vn
| a
->vm
) & 0x10)) {
581 if ((a
->vn
| a
->vm
| a
->vd
) & a
->q
) {
585 if (!vfp_access_check(s
)) {
589 fn(a
->size
, rd_ofs
, rn_ofs
, rm_ofs
, vec_size
, vec_size
);
593 #define DO_3SAME(INSN, FUNC) \
594 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
596 return do_3same(s, a, FUNC); \
599 DO_3SAME(VADD
, tcg_gen_gvec_add
)
600 DO_3SAME(VSUB
, tcg_gen_gvec_sub
)
601 DO_3SAME(VAND
, tcg_gen_gvec_and
)
602 DO_3SAME(VBIC
, tcg_gen_gvec_andc
)
603 DO_3SAME(VORR
, tcg_gen_gvec_or
)
604 DO_3SAME(VORN
, tcg_gen_gvec_orc
)
605 DO_3SAME(VEOR
, tcg_gen_gvec_xor
)
606 DO_3SAME(VSHL_S
, gen_gvec_sshl
)
607 DO_3SAME(VSHL_U
, gen_gvec_ushl
)
608 DO_3SAME(VQADD_S
, gen_gvec_sqadd_qc
)
609 DO_3SAME(VQADD_U
, gen_gvec_uqadd_qc
)
610 DO_3SAME(VQSUB_S
, gen_gvec_sqsub_qc
)
611 DO_3SAME(VQSUB_U
, gen_gvec_uqsub_qc
)
613 /* These insns are all gvec_bitsel but with the inputs in various orders. */
614 #define DO_3SAME_BITSEL(INSN, O1, O2, O3) \
615 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
616 uint32_t rn_ofs, uint32_t rm_ofs, \
617 uint32_t oprsz, uint32_t maxsz) \
619 tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz); \
621 DO_3SAME(INSN, gen_##INSN##_3s)
623 DO_3SAME_BITSEL(VBSL
, rd_ofs
, rn_ofs
, rm_ofs
)
624 DO_3SAME_BITSEL(VBIT
, rm_ofs
, rn_ofs
, rd_ofs
)
625 DO_3SAME_BITSEL(VBIF
, rm_ofs
, rd_ofs
, rn_ofs
)
627 #define DO_3SAME_NO_SZ_3(INSN, FUNC) \
628 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
630 if (a->size == 3) { \
633 return do_3same(s, a, FUNC); \
636 DO_3SAME_NO_SZ_3(VMAX_S
, tcg_gen_gvec_smax
)
637 DO_3SAME_NO_SZ_3(VMAX_U
, tcg_gen_gvec_umax
)
638 DO_3SAME_NO_SZ_3(VMIN_S
, tcg_gen_gvec_smin
)
639 DO_3SAME_NO_SZ_3(VMIN_U
, tcg_gen_gvec_umin
)
640 DO_3SAME_NO_SZ_3(VMUL
, tcg_gen_gvec_mul
)
641 DO_3SAME_NO_SZ_3(VMLA
, gen_gvec_mla
)
642 DO_3SAME_NO_SZ_3(VMLS
, gen_gvec_mls
)
643 DO_3SAME_NO_SZ_3(VTST
, gen_gvec_cmtst
)
644 DO_3SAME_NO_SZ_3(VABD_S
, gen_gvec_sabd
)
645 DO_3SAME_NO_SZ_3(VABA_S
, gen_gvec_saba
)
646 DO_3SAME_NO_SZ_3(VABD_U
, gen_gvec_uabd
)
647 DO_3SAME_NO_SZ_3(VABA_U
, gen_gvec_uaba
)
649 #define DO_3SAME_CMP(INSN, COND) \
650 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
651 uint32_t rn_ofs, uint32_t rm_ofs, \
652 uint32_t oprsz, uint32_t maxsz) \
654 tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \
656 DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
658 DO_3SAME_CMP(VCGT_S
, TCG_COND_GT
)
659 DO_3SAME_CMP(VCGT_U
, TCG_COND_GTU
)
660 DO_3SAME_CMP(VCGE_S
, TCG_COND_GE
)
661 DO_3SAME_CMP(VCGE_U
, TCG_COND_GEU
)
662 DO_3SAME_CMP(VCEQ
, TCG_COND_EQ
)
664 static void gen_VMUL_p_3s(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
665 uint32_t rm_ofs
, uint32_t oprsz
, uint32_t maxsz
)
667 tcg_gen_gvec_3_ool(rd_ofs
, rn_ofs
, rm_ofs
, oprsz
, maxsz
,
668 0, gen_helper_gvec_pmul_b
);
671 static bool trans_VMUL_p_3s(DisasContext
*s
, arg_3same
*a
)
676 return do_3same(s
, a
, gen_VMUL_p_3s
);
679 #define DO_VQRDMLAH(INSN, FUNC) \
680 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
682 if (!dc_isar_feature(aa32_rdm, s)) { \
685 if (a->size != 1 && a->size != 2) { \
688 return do_3same(s, a, FUNC); \
691 DO_VQRDMLAH(VQRDMLAH
, gen_gvec_sqrdmlah_qc
)
692 DO_VQRDMLAH(VQRDMLSH
, gen_gvec_sqrdmlsh_qc
)
694 static bool trans_SHA1_3s(DisasContext
*s
, arg_SHA1_3s
*a
)
696 TCGv_ptr ptr1
, ptr2
, ptr3
;
699 if (!arm_dc_feature(s
, ARM_FEATURE_NEON
) ||
700 !dc_isar_feature(aa32_sha1
, s
)) {
704 /* UNDEF accesses to D16-D31 if they don't exist. */
705 if (!dc_isar_feature(aa32_simd_r32
, s
) &&
706 ((a
->vd
| a
->vn
| a
->vm
) & 0x10)) {
710 if ((a
->vn
| a
->vm
| a
->vd
) & 1) {
714 if (!vfp_access_check(s
)) {
718 ptr1
= vfp_reg_ptr(true, a
->vd
);
719 ptr2
= vfp_reg_ptr(true, a
->vn
);
720 ptr3
= vfp_reg_ptr(true, a
->vm
);
721 tmp
= tcg_const_i32(a
->optype
);
722 gen_helper_crypto_sha1_3reg(ptr1
, ptr2
, ptr3
, tmp
);
723 tcg_temp_free_i32(tmp
);
724 tcg_temp_free_ptr(ptr1
);
725 tcg_temp_free_ptr(ptr2
);
726 tcg_temp_free_ptr(ptr3
);
731 static bool trans_SHA256H_3s(DisasContext
*s
, arg_SHA256H_3s
*a
)
733 TCGv_ptr ptr1
, ptr2
, ptr3
;
735 if (!arm_dc_feature(s
, ARM_FEATURE_NEON
) ||
736 !dc_isar_feature(aa32_sha2
, s
)) {
740 /* UNDEF accesses to D16-D31 if they don't exist. */
741 if (!dc_isar_feature(aa32_simd_r32
, s
) &&
742 ((a
->vd
| a
->vn
| a
->vm
) & 0x10)) {
746 if ((a
->vn
| a
->vm
| a
->vd
) & 1) {
750 if (!vfp_access_check(s
)) {
754 ptr1
= vfp_reg_ptr(true, a
->vd
);
755 ptr2
= vfp_reg_ptr(true, a
->vn
);
756 ptr3
= vfp_reg_ptr(true, a
->vm
);
757 gen_helper_crypto_sha256h(ptr1
, ptr2
, ptr3
);
758 tcg_temp_free_ptr(ptr1
);
759 tcg_temp_free_ptr(ptr2
);
760 tcg_temp_free_ptr(ptr3
);
765 static bool trans_SHA256H2_3s(DisasContext
*s
, arg_SHA256H2_3s
*a
)
767 TCGv_ptr ptr1
, ptr2
, ptr3
;
769 if (!arm_dc_feature(s
, ARM_FEATURE_NEON
) ||
770 !dc_isar_feature(aa32_sha2
, s
)) {
774 /* UNDEF accesses to D16-D31 if they don't exist. */
775 if (!dc_isar_feature(aa32_simd_r32
, s
) &&
776 ((a
->vd
| a
->vn
| a
->vm
) & 0x10)) {
780 if ((a
->vn
| a
->vm
| a
->vd
) & 1) {
784 if (!vfp_access_check(s
)) {
788 ptr1
= vfp_reg_ptr(true, a
->vd
);
789 ptr2
= vfp_reg_ptr(true, a
->vn
);
790 ptr3
= vfp_reg_ptr(true, a
->vm
);
791 gen_helper_crypto_sha256h2(ptr1
, ptr2
, ptr3
);
792 tcg_temp_free_ptr(ptr1
);
793 tcg_temp_free_ptr(ptr2
);
794 tcg_temp_free_ptr(ptr3
);
799 static bool trans_SHA256SU1_3s(DisasContext
*s
, arg_SHA256SU1_3s
*a
)
801 TCGv_ptr ptr1
, ptr2
, ptr3
;
803 if (!arm_dc_feature(s
, ARM_FEATURE_NEON
) ||
804 !dc_isar_feature(aa32_sha2
, s
)) {
808 /* UNDEF accesses to D16-D31 if they don't exist. */
809 if (!dc_isar_feature(aa32_simd_r32
, s
) &&
810 ((a
->vd
| a
->vn
| a
->vm
) & 0x10)) {
814 if ((a
->vn
| a
->vm
| a
->vd
) & 1) {
818 if (!vfp_access_check(s
)) {
822 ptr1
= vfp_reg_ptr(true, a
->vd
);
823 ptr2
= vfp_reg_ptr(true, a
->vn
);
824 ptr3
= vfp_reg_ptr(true, a
->vm
);
825 gen_helper_crypto_sha256su1(ptr1
, ptr2
, ptr3
);
826 tcg_temp_free_ptr(ptr1
);
827 tcg_temp_free_ptr(ptr2
);
828 tcg_temp_free_ptr(ptr3
);
833 #define DO_3SAME_64(INSN, FUNC) \
834 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
835 uint32_t rn_ofs, uint32_t rm_ofs, \
836 uint32_t oprsz, uint32_t maxsz) \
838 static const GVecGen3 op = { .fni8 = FUNC }; \
839 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op); \
841 DO_3SAME(INSN, gen_##INSN##_3s)
843 #define DO_3SAME_64_ENV(INSN, FUNC) \
844 static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) \
846 FUNC(d, cpu_env, n, m); \
848 DO_3SAME_64(INSN, gen_##INSN##_elt)
850 DO_3SAME_64(VRSHL_S64
, gen_helper_neon_rshl_s64
)
851 DO_3SAME_64(VRSHL_U64
, gen_helper_neon_rshl_u64
)
852 DO_3SAME_64_ENV(VQSHL_S64
, gen_helper_neon_qshl_s64
)
853 DO_3SAME_64_ENV(VQSHL_U64
, gen_helper_neon_qshl_u64
)
854 DO_3SAME_64_ENV(VQRSHL_S64
, gen_helper_neon_qrshl_s64
)
855 DO_3SAME_64_ENV(VQRSHL_U64
, gen_helper_neon_qrshl_u64
)
857 #define DO_3SAME_32(INSN, FUNC) \
858 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
859 uint32_t rn_ofs, uint32_t rm_ofs, \
860 uint32_t oprsz, uint32_t maxsz) \
862 static const GVecGen3 ops[4] = { \
863 { .fni4 = gen_helper_neon_##FUNC##8 }, \
864 { .fni4 = gen_helper_neon_##FUNC##16 }, \
865 { .fni4 = gen_helper_neon_##FUNC##32 }, \
868 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
870 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
875 return do_3same(s, a, gen_##INSN##_3s); \
879 * Some helper functions need to be passed the cpu_env. In order
880 * to use those with the gvec APIs like tcg_gen_gvec_3() we need
881 * to create wrapper functions whose prototype is a NeonGenTwoOpFn()
882 * and which call a NeonGenTwoOpEnvFn().
884 #define WRAP_ENV_FN(WRAPNAME, FUNC) \
885 static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m) \
887 FUNC(d, cpu_env, n, m); \
890 #define DO_3SAME_32_ENV(INSN, FUNC) \
891 WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8); \
892 WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16); \
893 WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32); \
894 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
895 uint32_t rn_ofs, uint32_t rm_ofs, \
896 uint32_t oprsz, uint32_t maxsz) \
898 static const GVecGen3 ops[4] = { \
899 { .fni4 = gen_##INSN##_tramp8 }, \
900 { .fni4 = gen_##INSN##_tramp16 }, \
901 { .fni4 = gen_##INSN##_tramp32 }, \
904 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
906 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
911 return do_3same(s, a, gen_##INSN##_3s); \
914 DO_3SAME_32(VHADD_S
, hadd_s
)
915 DO_3SAME_32(VHADD_U
, hadd_u
)
916 DO_3SAME_32(VHSUB_S
, hsub_s
)
917 DO_3SAME_32(VHSUB_U
, hsub_u
)
918 DO_3SAME_32(VRHADD_S
, rhadd_s
)
919 DO_3SAME_32(VRHADD_U
, rhadd_u
)
920 DO_3SAME_32(VRSHL_S
, rshl_s
)
921 DO_3SAME_32(VRSHL_U
, rshl_u
)
923 DO_3SAME_32_ENV(VQSHL_S
, qshl_s
)
924 DO_3SAME_32_ENV(VQSHL_U
, qshl_u
)
925 DO_3SAME_32_ENV(VQRSHL_S
, qrshl_s
)
926 DO_3SAME_32_ENV(VQRSHL_U
, qrshl_u
)