2 * AArch64 SVE translation
4 * Copyright (c) 2018 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
22 #include "exec/exec-all.h"
23 #include "tcg/tcg-op.h"
24 #include "tcg/tcg-op-gvec.h"
25 #include "tcg/tcg-gvec-desc.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
35 #include "fpu/softfloat.h"
38 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
39 TCGv_i64
, uint32_t, uint32_t);
41 typedef void gen_helper_gvec_flags_3(TCGv_i32
, TCGv_ptr
, TCGv_ptr
,
43 typedef void gen_helper_gvec_flags_4(TCGv_i32
, TCGv_ptr
, TCGv_ptr
,
44 TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
46 typedef void gen_helper_gvec_mem(TCGv_env
, TCGv_ptr
, TCGv_i64
, TCGv_i32
);
47 typedef void gen_helper_gvec_mem_scatter(TCGv_env
, TCGv_ptr
, TCGv_ptr
,
48 TCGv_ptr
, TCGv_i64
, TCGv_i32
);
51 * Helpers for extracting complex instruction fields.
54 /* See e.g. ASR (immediate, predicated).
55 * Returns -1 for unallocated encoding; diagnose later.
57 static int tszimm_esz(DisasContext
*s
, int x
)
59 x
>>= 3; /* discard imm3 */
63 static int tszimm_shr(DisasContext
*s
, int x
)
65 return (16 << tszimm_esz(s
, x
)) - x
;
68 /* See e.g. LSL (immediate, predicated). */
69 static int tszimm_shl(DisasContext
*s
, int x
)
71 return x
- (8 << tszimm_esz(s
, x
));
74 static inline int plus1(DisasContext
*s
, int x
)
79 /* The SH bit is in bit 8. Extract the low 8 and shift. */
80 static inline int expand_imm_sh8s(DisasContext
*s
, int x
)
82 return (int8_t)x
<< (x
& 0x100 ? 8 : 0);
85 static inline int expand_imm_sh8u(DisasContext
*s
, int x
)
87 return (uint8_t)x
<< (x
& 0x100 ? 8 : 0);
90 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
91 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
93 static inline int msz_dtype(DisasContext
*s
, int msz
)
95 static const uint8_t dtype
[4] = { 0, 5, 10, 15 };
100 * Include the generated decoder.
103 #include "decode-sve.c.inc"
106 * Implement all of the translator functions referenced by the decoder.
109 /* Return the offset info CPUARMState of the predicate vector register Pn.
110 * Note for this purpose, FFR is P16.
112 static inline int pred_full_reg_offset(DisasContext
*s
, int regno
)
114 return offsetof(CPUARMState
, vfp
.pregs
[regno
]);
117 /* Return the byte size of the whole predicate register, VL / 64. */
118 static inline int pred_full_reg_size(DisasContext
*s
)
120 return s
->sve_len
>> 3;
123 /* Round up the size of a register to a size allowed by
124 * the tcg vector infrastructure. Any operation which uses this
125 * size may assume that the bits above pred_full_reg_size are zero,
126 * and must leave them the same way.
128 * Note that this is not needed for the vector registers as they
129 * are always properly sized for tcg vectors.
131 static int size_for_gvec(int size
)
136 return QEMU_ALIGN_UP(size
, 16);
140 static int pred_gvec_reg_size(DisasContext
*s
)
142 return size_for_gvec(pred_full_reg_size(s
));
145 /* Invoke an out-of-line helper on 2 Zregs. */
146 static void gen_gvec_ool_zz(DisasContext
*s
, gen_helper_gvec_2
*fn
,
147 int rd
, int rn
, int data
)
149 unsigned vsz
= vec_full_reg_size(s
);
150 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, rd
),
151 vec_full_reg_offset(s
, rn
),
155 /* Invoke an out-of-line helper on 3 Zregs. */
156 static void gen_gvec_ool_zzz(DisasContext
*s
, gen_helper_gvec_3
*fn
,
157 int rd
, int rn
, int rm
, int data
)
159 unsigned vsz
= vec_full_reg_size(s
);
160 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, rd
),
161 vec_full_reg_offset(s
, rn
),
162 vec_full_reg_offset(s
, rm
),
166 /* Invoke an out-of-line helper on 2 Zregs and a predicate. */
167 static void gen_gvec_ool_zzp(DisasContext
*s
, gen_helper_gvec_3
*fn
,
168 int rd
, int rn
, int pg
, int data
)
170 unsigned vsz
= vec_full_reg_size(s
);
171 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, rd
),
172 vec_full_reg_offset(s
, rn
),
173 pred_full_reg_offset(s
, pg
),
177 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */
178 static void gen_gvec_ool_zzzp(DisasContext
*s
, gen_helper_gvec_4
*fn
,
179 int rd
, int rn
, int rm
, int pg
, int data
)
181 unsigned vsz
= vec_full_reg_size(s
);
182 tcg_gen_gvec_4_ool(vec_full_reg_offset(s
, rd
),
183 vec_full_reg_offset(s
, rn
),
184 vec_full_reg_offset(s
, rm
),
185 pred_full_reg_offset(s
, pg
),
189 /* Invoke a vector expander on two Zregs. */
190 static void gen_gvec_fn_zz(DisasContext
*s
, GVecGen2Fn
*gvec_fn
,
191 int esz
, int rd
, int rn
)
193 unsigned vsz
= vec_full_reg_size(s
);
194 gvec_fn(esz
, vec_full_reg_offset(s
, rd
),
195 vec_full_reg_offset(s
, rn
), vsz
, vsz
);
198 /* Invoke a vector expander on three Zregs. */
199 static void gen_gvec_fn_zzz(DisasContext
*s
, GVecGen3Fn
*gvec_fn
,
200 int esz
, int rd
, int rn
, int rm
)
202 unsigned vsz
= vec_full_reg_size(s
);
203 gvec_fn(esz
, vec_full_reg_offset(s
, rd
),
204 vec_full_reg_offset(s
, rn
),
205 vec_full_reg_offset(s
, rm
), vsz
, vsz
);
208 /* Invoke a vector move on two Zregs. */
209 static bool do_mov_z(DisasContext
*s
, int rd
, int rn
)
211 if (sve_access_check(s
)) {
212 gen_gvec_fn_zz(s
, tcg_gen_gvec_mov
, MO_8
, rd
, rn
);
217 /* Initialize a Zreg with replications of a 64-bit immediate. */
218 static void do_dupi_z(DisasContext
*s
, int rd
, uint64_t word
)
220 unsigned vsz
= vec_full_reg_size(s
);
221 tcg_gen_gvec_dup_imm(MO_64
, vec_full_reg_offset(s
, rd
), vsz
, vsz
, word
);
224 /* Invoke a vector expander on three Pregs. */
225 static void gen_gvec_fn_ppp(DisasContext
*s
, GVecGen3Fn
*gvec_fn
,
226 int rd
, int rn
, int rm
)
228 unsigned psz
= pred_gvec_reg_size(s
);
229 gvec_fn(MO_64
, pred_full_reg_offset(s
, rd
),
230 pred_full_reg_offset(s
, rn
),
231 pred_full_reg_offset(s
, rm
), psz
, psz
);
234 /* Invoke a vector move on two Pregs. */
235 static bool do_mov_p(DisasContext
*s
, int rd
, int rn
)
237 if (sve_access_check(s
)) {
238 unsigned psz
= pred_gvec_reg_size(s
);
239 tcg_gen_gvec_mov(MO_8
, pred_full_reg_offset(s
, rd
),
240 pred_full_reg_offset(s
, rn
), psz
, psz
);
245 /* Set the cpu flags as per a return from an SVE helper. */
246 static void do_pred_flags(TCGv_i32 t
)
248 tcg_gen_mov_i32(cpu_NF
, t
);
249 tcg_gen_andi_i32(cpu_ZF
, t
, 2);
250 tcg_gen_andi_i32(cpu_CF
, t
, 1);
251 tcg_gen_movi_i32(cpu_VF
, 0);
254 /* Subroutines computing the ARM PredTest psuedofunction. */
255 static void do_predtest1(TCGv_i64 d
, TCGv_i64 g
)
257 TCGv_i32 t
= tcg_temp_new_i32();
259 gen_helper_sve_predtest1(t
, d
, g
);
261 tcg_temp_free_i32(t
);
264 static void do_predtest(DisasContext
*s
, int dofs
, int gofs
, int words
)
266 TCGv_ptr dptr
= tcg_temp_new_ptr();
267 TCGv_ptr gptr
= tcg_temp_new_ptr();
270 tcg_gen_addi_ptr(dptr
, cpu_env
, dofs
);
271 tcg_gen_addi_ptr(gptr
, cpu_env
, gofs
);
272 t
= tcg_const_i32(words
);
274 gen_helper_sve_predtest(t
, dptr
, gptr
, t
);
275 tcg_temp_free_ptr(dptr
);
276 tcg_temp_free_ptr(gptr
);
279 tcg_temp_free_i32(t
);
282 /* For each element size, the bits within a predicate word that are active. */
283 const uint64_t pred_esz_masks
[4] = {
284 0xffffffffffffffffull
, 0x5555555555555555ull
,
285 0x1111111111111111ull
, 0x0101010101010101ull
289 *** SVE Logical - Unpredicated Group
292 static bool do_zzz_fn(DisasContext
*s
, arg_rrr_esz
*a
, GVecGen3Fn
*gvec_fn
)
294 if (sve_access_check(s
)) {
295 gen_gvec_fn_zzz(s
, gvec_fn
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
300 static bool trans_AND_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
302 return do_zzz_fn(s
, a
, tcg_gen_gvec_and
);
305 static bool trans_ORR_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
307 return do_zzz_fn(s
, a
, tcg_gen_gvec_or
);
310 static bool trans_EOR_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
312 return do_zzz_fn(s
, a
, tcg_gen_gvec_xor
);
315 static bool trans_BIC_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
317 return do_zzz_fn(s
, a
, tcg_gen_gvec_andc
);
321 *** SVE Integer Arithmetic - Unpredicated Group
324 static bool trans_ADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
326 return do_zzz_fn(s
, a
, tcg_gen_gvec_add
);
329 static bool trans_SUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
331 return do_zzz_fn(s
, a
, tcg_gen_gvec_sub
);
334 static bool trans_SQADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
336 return do_zzz_fn(s
, a
, tcg_gen_gvec_ssadd
);
339 static bool trans_SQSUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
341 return do_zzz_fn(s
, a
, tcg_gen_gvec_sssub
);
344 static bool trans_UQADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
346 return do_zzz_fn(s
, a
, tcg_gen_gvec_usadd
);
349 static bool trans_UQSUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
351 return do_zzz_fn(s
, a
, tcg_gen_gvec_ussub
);
355 *** SVE Integer Arithmetic - Binary Predicated Group
358 static bool do_zpzz_ool(DisasContext
*s
, arg_rprr_esz
*a
, gen_helper_gvec_4
*fn
)
363 if (sve_access_check(s
)) {
364 gen_gvec_ool_zzzp(s
, fn
, a
->rd
, a
->rn
, a
->rm
, a
->pg
, 0);
369 /* Select active elememnts from Zn and inactive elements from Zm,
370 * storing the result in Zd.
372 static void do_sel_z(DisasContext
*s
, int rd
, int rn
, int rm
, int pg
, int esz
)
374 static gen_helper_gvec_4
* const fns
[4] = {
375 gen_helper_sve_sel_zpzz_b
, gen_helper_sve_sel_zpzz_h
,
376 gen_helper_sve_sel_zpzz_s
, gen_helper_sve_sel_zpzz_d
378 gen_gvec_ool_zzzp(s
, fns
[esz
], rd
, rn
, rm
, pg
, 0);
381 #define DO_ZPZZ(NAME, name) \
382 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a) \
384 static gen_helper_gvec_4 * const fns[4] = { \
385 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
386 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
388 return do_zpzz_ool(s, a, fns[a->esz]); \
407 DO_ZPZZ(SMULH
, smulh
)
408 DO_ZPZZ(UMULH
, umulh
)
414 static bool trans_SDIV_zpzz(DisasContext
*s
, arg_rprr_esz
*a
)
416 static gen_helper_gvec_4
* const fns
[4] = {
417 NULL
, NULL
, gen_helper_sve_sdiv_zpzz_s
, gen_helper_sve_sdiv_zpzz_d
419 return do_zpzz_ool(s
, a
, fns
[a
->esz
]);
422 static bool trans_UDIV_zpzz(DisasContext
*s
, arg_rprr_esz
*a
)
424 static gen_helper_gvec_4
* const fns
[4] = {
425 NULL
, NULL
, gen_helper_sve_udiv_zpzz_s
, gen_helper_sve_udiv_zpzz_d
427 return do_zpzz_ool(s
, a
, fns
[a
->esz
]);
430 static bool trans_SEL_zpzz(DisasContext
*s
, arg_rprr_esz
*a
)
432 if (sve_access_check(s
)) {
433 do_sel_z(s
, a
->rd
, a
->rn
, a
->rm
, a
->pg
, a
->esz
);
441 *** SVE Integer Arithmetic - Unary Predicated Group
444 static bool do_zpz_ool(DisasContext
*s
, arg_rpr_esz
*a
, gen_helper_gvec_3
*fn
)
449 if (sve_access_check(s
)) {
450 gen_gvec_ool_zzp(s
, fn
, a
->rd
, a
->rn
, a
->pg
, 0);
455 #define DO_ZPZ(NAME, name) \
456 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
458 static gen_helper_gvec_3 * const fns[4] = { \
459 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
460 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
462 return do_zpz_ool(s, a, fns[a->esz]); \
467 DO_ZPZ(CNT_zpz
, cnt_zpz
)
469 DO_ZPZ(NOT_zpz
, not_zpz
)
473 static bool trans_FABS(DisasContext
*s
, arg_rpr_esz
*a
)
475 static gen_helper_gvec_3
* const fns
[4] = {
477 gen_helper_sve_fabs_h
,
478 gen_helper_sve_fabs_s
,
479 gen_helper_sve_fabs_d
481 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
484 static bool trans_FNEG(DisasContext
*s
, arg_rpr_esz
*a
)
486 static gen_helper_gvec_3
* const fns
[4] = {
488 gen_helper_sve_fneg_h
,
489 gen_helper_sve_fneg_s
,
490 gen_helper_sve_fneg_d
492 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
495 static bool trans_SXTB(DisasContext
*s
, arg_rpr_esz
*a
)
497 static gen_helper_gvec_3
* const fns
[4] = {
499 gen_helper_sve_sxtb_h
,
500 gen_helper_sve_sxtb_s
,
501 gen_helper_sve_sxtb_d
503 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
506 static bool trans_UXTB(DisasContext
*s
, arg_rpr_esz
*a
)
508 static gen_helper_gvec_3
* const fns
[4] = {
510 gen_helper_sve_uxtb_h
,
511 gen_helper_sve_uxtb_s
,
512 gen_helper_sve_uxtb_d
514 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
517 static bool trans_SXTH(DisasContext
*s
, arg_rpr_esz
*a
)
519 static gen_helper_gvec_3
* const fns
[4] = {
521 gen_helper_sve_sxth_s
,
522 gen_helper_sve_sxth_d
524 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
527 static bool trans_UXTH(DisasContext
*s
, arg_rpr_esz
*a
)
529 static gen_helper_gvec_3
* const fns
[4] = {
531 gen_helper_sve_uxth_s
,
532 gen_helper_sve_uxth_d
534 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
537 static bool trans_SXTW(DisasContext
*s
, arg_rpr_esz
*a
)
539 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_sxtw_d
: NULL
);
542 static bool trans_UXTW(DisasContext
*s
, arg_rpr_esz
*a
)
544 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_uxtw_d
: NULL
);
550 *** SVE Integer Reduction Group
553 typedef void gen_helper_gvec_reduc(TCGv_i64
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
554 static bool do_vpz_ool(DisasContext
*s
, arg_rpr_esz
*a
,
555 gen_helper_gvec_reduc
*fn
)
557 unsigned vsz
= vec_full_reg_size(s
);
565 if (!sve_access_check(s
)) {
569 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
570 temp
= tcg_temp_new_i64();
571 t_zn
= tcg_temp_new_ptr();
572 t_pg
= tcg_temp_new_ptr();
574 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
575 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
576 fn(temp
, t_zn
, t_pg
, desc
);
577 tcg_temp_free_ptr(t_zn
);
578 tcg_temp_free_ptr(t_pg
);
579 tcg_temp_free_i32(desc
);
581 write_fp_dreg(s
, a
->rd
, temp
);
582 tcg_temp_free_i64(temp
);
586 #define DO_VPZ(NAME, name) \
587 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
589 static gen_helper_gvec_reduc * const fns[4] = { \
590 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
591 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
593 return do_vpz_ool(s, a, fns[a->esz]); \
606 static bool trans_SADDV(DisasContext
*s
, arg_rpr_esz
*a
)
608 static gen_helper_gvec_reduc
* const fns
[4] = {
609 gen_helper_sve_saddv_b
, gen_helper_sve_saddv_h
,
610 gen_helper_sve_saddv_s
, NULL
612 return do_vpz_ool(s
, a
, fns
[a
->esz
]);
618 *** SVE Shift by Immediate - Predicated Group
622 * Copy Zn into Zd, storing zeros into inactive elements.
623 * If invert, store zeros into the active elements.
625 static bool do_movz_zpz(DisasContext
*s
, int rd
, int rn
, int pg
,
626 int esz
, bool invert
)
628 static gen_helper_gvec_3
* const fns
[4] = {
629 gen_helper_sve_movz_b
, gen_helper_sve_movz_h
,
630 gen_helper_sve_movz_s
, gen_helper_sve_movz_d
,
633 if (sve_access_check(s
)) {
634 gen_gvec_ool_zzp(s
, fns
[esz
], rd
, rn
, pg
, invert
);
639 static bool do_zpzi_ool(DisasContext
*s
, arg_rpri_esz
*a
,
640 gen_helper_gvec_3
*fn
)
642 if (sve_access_check(s
)) {
643 gen_gvec_ool_zzp(s
, fn
, a
->rd
, a
->rn
, a
->pg
, a
->imm
);
648 static bool trans_ASR_zpzi(DisasContext
*s
, arg_rpri_esz
*a
)
650 static gen_helper_gvec_3
* const fns
[4] = {
651 gen_helper_sve_asr_zpzi_b
, gen_helper_sve_asr_zpzi_h
,
652 gen_helper_sve_asr_zpzi_s
, gen_helper_sve_asr_zpzi_d
,
655 /* Invalid tsz encoding -- see tszimm_esz. */
658 /* Shift by element size is architecturally valid. For
659 arithmetic right-shift, it's the same as by one less. */
660 a
->imm
= MIN(a
->imm
, (8 << a
->esz
) - 1);
661 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
664 static bool trans_LSR_zpzi(DisasContext
*s
, arg_rpri_esz
*a
)
666 static gen_helper_gvec_3
* const fns
[4] = {
667 gen_helper_sve_lsr_zpzi_b
, gen_helper_sve_lsr_zpzi_h
,
668 gen_helper_sve_lsr_zpzi_s
, gen_helper_sve_lsr_zpzi_d
,
673 /* Shift by element size is architecturally valid.
674 For logical shifts, it is a zeroing operation. */
675 if (a
->imm
>= (8 << a
->esz
)) {
676 return do_movz_zpz(s
, a
->rd
, a
->rd
, a
->pg
, a
->esz
, true);
678 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
682 static bool trans_LSL_zpzi(DisasContext
*s
, arg_rpri_esz
*a
)
684 static gen_helper_gvec_3
* const fns
[4] = {
685 gen_helper_sve_lsl_zpzi_b
, gen_helper_sve_lsl_zpzi_h
,
686 gen_helper_sve_lsl_zpzi_s
, gen_helper_sve_lsl_zpzi_d
,
691 /* Shift by element size is architecturally valid.
692 For logical shifts, it is a zeroing operation. */
693 if (a
->imm
>= (8 << a
->esz
)) {
694 return do_movz_zpz(s
, a
->rd
, a
->rd
, a
->pg
, a
->esz
, true);
696 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
700 static bool trans_ASRD(DisasContext
*s
, arg_rpri_esz
*a
)
702 static gen_helper_gvec_3
* const fns
[4] = {
703 gen_helper_sve_asrd_b
, gen_helper_sve_asrd_h
,
704 gen_helper_sve_asrd_s
, gen_helper_sve_asrd_d
,
709 /* Shift by element size is architecturally valid. For arithmetic
710 right shift for division, it is a zeroing operation. */
711 if (a
->imm
>= (8 << a
->esz
)) {
712 return do_movz_zpz(s
, a
->rd
, a
->rd
, a
->pg
, a
->esz
, true);
714 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
719 *** SVE Bitwise Shift - Predicated Group
722 #define DO_ZPZW(NAME, name) \
723 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a) \
725 static gen_helper_gvec_4 * const fns[3] = { \
726 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
727 gen_helper_sve_##name##_zpzw_s, \
729 if (a->esz < 0 || a->esz >= 3) { \
732 return do_zpzz_ool(s, a, fns[a->esz]); \
742 *** SVE Bitwise Shift - Unpredicated Group
745 static bool do_shift_imm(DisasContext
*s
, arg_rri_esz
*a
, bool asr
,
746 void (*gvec_fn
)(unsigned, uint32_t, uint32_t,
747 int64_t, uint32_t, uint32_t))
750 /* Invalid tsz encoding -- see tszimm_esz. */
753 if (sve_access_check(s
)) {
754 unsigned vsz
= vec_full_reg_size(s
);
755 /* Shift by element size is architecturally valid. For
756 arithmetic right-shift, it's the same as by one less.
757 Otherwise it is a zeroing operation. */
758 if (a
->imm
>= 8 << a
->esz
) {
760 a
->imm
= (8 << a
->esz
) - 1;
762 do_dupi_z(s
, a
->rd
, 0);
766 gvec_fn(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
767 vec_full_reg_offset(s
, a
->rn
), a
->imm
, vsz
, vsz
);
772 static bool trans_ASR_zzi(DisasContext
*s
, arg_rri_esz
*a
)
774 return do_shift_imm(s
, a
, true, tcg_gen_gvec_sari
);
777 static bool trans_LSR_zzi(DisasContext
*s
, arg_rri_esz
*a
)
779 return do_shift_imm(s
, a
, false, tcg_gen_gvec_shri
);
782 static bool trans_LSL_zzi(DisasContext
*s
, arg_rri_esz
*a
)
784 return do_shift_imm(s
, a
, false, tcg_gen_gvec_shli
);
787 static bool do_zzw_ool(DisasContext
*s
, arg_rrr_esz
*a
, gen_helper_gvec_3
*fn
)
792 if (sve_access_check(s
)) {
793 gen_gvec_ool_zzz(s
, fn
, a
->rd
, a
->rn
, a
->rm
, 0);
798 #define DO_ZZW(NAME, name) \
799 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a) \
801 static gen_helper_gvec_3 * const fns[4] = { \
802 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
803 gen_helper_sve_##name##_zzw_s, NULL \
805 return do_zzw_ool(s, a, fns[a->esz]); \
815 *** SVE Integer Multiply-Add Group
818 static bool do_zpzzz_ool(DisasContext
*s
, arg_rprrr_esz
*a
,
819 gen_helper_gvec_5
*fn
)
821 if (sve_access_check(s
)) {
822 unsigned vsz
= vec_full_reg_size(s
);
823 tcg_gen_gvec_5_ool(vec_full_reg_offset(s
, a
->rd
),
824 vec_full_reg_offset(s
, a
->ra
),
825 vec_full_reg_offset(s
, a
->rn
),
826 vec_full_reg_offset(s
, a
->rm
),
827 pred_full_reg_offset(s
, a
->pg
),
833 #define DO_ZPZZZ(NAME, name) \
834 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
836 static gen_helper_gvec_5 * const fns[4] = { \
837 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
838 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
840 return do_zpzzz_ool(s, a, fns[a->esz]); \
849 *** SVE Index Generation Group
852 static void do_index(DisasContext
*s
, int esz
, int rd
,
853 TCGv_i64 start
, TCGv_i64 incr
)
855 unsigned vsz
= vec_full_reg_size(s
);
856 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
857 TCGv_ptr t_zd
= tcg_temp_new_ptr();
859 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, rd
));
861 gen_helper_sve_index_d(t_zd
, start
, incr
, desc
);
863 typedef void index_fn(TCGv_ptr
, TCGv_i32
, TCGv_i32
, TCGv_i32
);
864 static index_fn
* const fns
[3] = {
865 gen_helper_sve_index_b
,
866 gen_helper_sve_index_h
,
867 gen_helper_sve_index_s
,
869 TCGv_i32 s32
= tcg_temp_new_i32();
870 TCGv_i32 i32
= tcg_temp_new_i32();
872 tcg_gen_extrl_i64_i32(s32
, start
);
873 tcg_gen_extrl_i64_i32(i32
, incr
);
874 fns
[esz
](t_zd
, s32
, i32
, desc
);
876 tcg_temp_free_i32(s32
);
877 tcg_temp_free_i32(i32
);
879 tcg_temp_free_ptr(t_zd
);
880 tcg_temp_free_i32(desc
);
883 static bool trans_INDEX_ii(DisasContext
*s
, arg_INDEX_ii
*a
)
885 if (sve_access_check(s
)) {
886 TCGv_i64 start
= tcg_const_i64(a
->imm1
);
887 TCGv_i64 incr
= tcg_const_i64(a
->imm2
);
888 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
889 tcg_temp_free_i64(start
);
890 tcg_temp_free_i64(incr
);
895 static bool trans_INDEX_ir(DisasContext
*s
, arg_INDEX_ir
*a
)
897 if (sve_access_check(s
)) {
898 TCGv_i64 start
= tcg_const_i64(a
->imm
);
899 TCGv_i64 incr
= cpu_reg(s
, a
->rm
);
900 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
901 tcg_temp_free_i64(start
);
906 static bool trans_INDEX_ri(DisasContext
*s
, arg_INDEX_ri
*a
)
908 if (sve_access_check(s
)) {
909 TCGv_i64 start
= cpu_reg(s
, a
->rn
);
910 TCGv_i64 incr
= tcg_const_i64(a
->imm
);
911 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
912 tcg_temp_free_i64(incr
);
917 static bool trans_INDEX_rr(DisasContext
*s
, arg_INDEX_rr
*a
)
919 if (sve_access_check(s
)) {
920 TCGv_i64 start
= cpu_reg(s
, a
->rn
);
921 TCGv_i64 incr
= cpu_reg(s
, a
->rm
);
922 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
928 *** SVE Stack Allocation Group
931 static bool trans_ADDVL(DisasContext
*s
, arg_ADDVL
*a
)
933 if (sve_access_check(s
)) {
934 TCGv_i64 rd
= cpu_reg_sp(s
, a
->rd
);
935 TCGv_i64 rn
= cpu_reg_sp(s
, a
->rn
);
936 tcg_gen_addi_i64(rd
, rn
, a
->imm
* vec_full_reg_size(s
));
941 static bool trans_ADDPL(DisasContext
*s
, arg_ADDPL
*a
)
943 if (sve_access_check(s
)) {
944 TCGv_i64 rd
= cpu_reg_sp(s
, a
->rd
);
945 TCGv_i64 rn
= cpu_reg_sp(s
, a
->rn
);
946 tcg_gen_addi_i64(rd
, rn
, a
->imm
* pred_full_reg_size(s
));
951 static bool trans_RDVL(DisasContext
*s
, arg_RDVL
*a
)
953 if (sve_access_check(s
)) {
954 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
955 tcg_gen_movi_i64(reg
, a
->imm
* vec_full_reg_size(s
));
961 *** SVE Compute Vector Address Group
964 static bool do_adr(DisasContext
*s
, arg_rrri
*a
, gen_helper_gvec_3
*fn
)
966 if (sve_access_check(s
)) {
967 gen_gvec_ool_zzz(s
, fn
, a
->rd
, a
->rn
, a
->rm
, a
->imm
);
972 static bool trans_ADR_p32(DisasContext
*s
, arg_rrri
*a
)
974 return do_adr(s
, a
, gen_helper_sve_adr_p32
);
977 static bool trans_ADR_p64(DisasContext
*s
, arg_rrri
*a
)
979 return do_adr(s
, a
, gen_helper_sve_adr_p64
);
982 static bool trans_ADR_s32(DisasContext
*s
, arg_rrri
*a
)
984 return do_adr(s
, a
, gen_helper_sve_adr_s32
);
987 static bool trans_ADR_u32(DisasContext
*s
, arg_rrri
*a
)
989 return do_adr(s
, a
, gen_helper_sve_adr_u32
);
993 *** SVE Integer Misc - Unpredicated Group
996 static bool trans_FEXPA(DisasContext
*s
, arg_rr_esz
*a
)
998 static gen_helper_gvec_2
* const fns
[4] = {
1000 gen_helper_sve_fexpa_h
,
1001 gen_helper_sve_fexpa_s
,
1002 gen_helper_sve_fexpa_d
,
1007 if (sve_access_check(s
)) {
1008 gen_gvec_ool_zz(s
, fns
[a
->esz
], a
->rd
, a
->rn
, 0);
1013 static bool trans_FTSSEL(DisasContext
*s
, arg_rrr_esz
*a
)
1015 static gen_helper_gvec_3
* const fns
[4] = {
1017 gen_helper_sve_ftssel_h
,
1018 gen_helper_sve_ftssel_s
,
1019 gen_helper_sve_ftssel_d
,
1024 if (sve_access_check(s
)) {
1025 gen_gvec_ool_zzz(s
, fns
[a
->esz
], a
->rd
, a
->rn
, a
->rm
, 0);
1031 *** SVE Predicate Logical Operations Group
1034 static bool do_pppp_flags(DisasContext
*s
, arg_rprr_s
*a
,
1035 const GVecGen4
*gvec_op
)
1037 if (!sve_access_check(s
)) {
1041 unsigned psz
= pred_gvec_reg_size(s
);
1042 int dofs
= pred_full_reg_offset(s
, a
->rd
);
1043 int nofs
= pred_full_reg_offset(s
, a
->rn
);
1044 int mofs
= pred_full_reg_offset(s
, a
->rm
);
1045 int gofs
= pred_full_reg_offset(s
, a
->pg
);
1048 tcg_gen_gvec_4(dofs
, nofs
, mofs
, gofs
, psz
, psz
, gvec_op
);
1053 /* Do the operation and the flags generation in temps. */
1054 TCGv_i64 pd
= tcg_temp_new_i64();
1055 TCGv_i64 pn
= tcg_temp_new_i64();
1056 TCGv_i64 pm
= tcg_temp_new_i64();
1057 TCGv_i64 pg
= tcg_temp_new_i64();
1059 tcg_gen_ld_i64(pn
, cpu_env
, nofs
);
1060 tcg_gen_ld_i64(pm
, cpu_env
, mofs
);
1061 tcg_gen_ld_i64(pg
, cpu_env
, gofs
);
1063 gvec_op
->fni8(pd
, pn
, pm
, pg
);
1064 tcg_gen_st_i64(pd
, cpu_env
, dofs
);
1066 do_predtest1(pd
, pg
);
1068 tcg_temp_free_i64(pd
);
1069 tcg_temp_free_i64(pn
);
1070 tcg_temp_free_i64(pm
);
1071 tcg_temp_free_i64(pg
);
1073 /* The operation and flags generation is large. The computation
1074 * of the flags depends on the original contents of the guarding
1075 * predicate. If the destination overwrites the guarding predicate,
1076 * then the easiest way to get this right is to save a copy.
1079 if (a
->rd
== a
->pg
) {
1080 tofs
= offsetof(CPUARMState
, vfp
.preg_tmp
);
1081 tcg_gen_gvec_mov(0, tofs
, gofs
, psz
, psz
);
1084 tcg_gen_gvec_4(dofs
, nofs
, mofs
, gofs
, psz
, psz
, gvec_op
);
1085 do_predtest(s
, dofs
, tofs
, psz
/ 8);
1090 static void gen_and_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1092 tcg_gen_and_i64(pd
, pn
, pm
);
1093 tcg_gen_and_i64(pd
, pd
, pg
);
1096 static void gen_and_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1097 TCGv_vec pm
, TCGv_vec pg
)
1099 tcg_gen_and_vec(vece
, pd
, pn
, pm
);
1100 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1103 static bool trans_AND_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1105 static const GVecGen4 op
= {
1106 .fni8
= gen_and_pg_i64
,
1107 .fniv
= gen_and_pg_vec
,
1108 .fno
= gen_helper_sve_and_pppp
,
1109 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1113 if (!sve_access_check(s
)) {
1116 if (a
->rn
== a
->rm
) {
1117 if (a
->pg
== a
->rn
) {
1118 do_mov_p(s
, a
->rd
, a
->rn
);
1120 gen_gvec_fn_ppp(s
, tcg_gen_gvec_and
, a
->rd
, a
->rn
, a
->pg
);
1123 } else if (a
->pg
== a
->rn
|| a
->pg
== a
->rm
) {
1124 gen_gvec_fn_ppp(s
, tcg_gen_gvec_and
, a
->rd
, a
->rn
, a
->rm
);
1128 return do_pppp_flags(s
, a
, &op
);
1131 static void gen_bic_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1133 tcg_gen_andc_i64(pd
, pn
, pm
);
1134 tcg_gen_and_i64(pd
, pd
, pg
);
1137 static void gen_bic_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1138 TCGv_vec pm
, TCGv_vec pg
)
1140 tcg_gen_andc_vec(vece
, pd
, pn
, pm
);
1141 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1144 static bool trans_BIC_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1146 static const GVecGen4 op
= {
1147 .fni8
= gen_bic_pg_i64
,
1148 .fniv
= gen_bic_pg_vec
,
1149 .fno
= gen_helper_sve_bic_pppp
,
1150 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1153 if (!a
->s
&& a
->pg
== a
->rn
) {
1154 if (sve_access_check(s
)) {
1155 gen_gvec_fn_ppp(s
, tcg_gen_gvec_andc
, a
->rd
, a
->rn
, a
->rm
);
1159 return do_pppp_flags(s
, a
, &op
);
1162 static void gen_eor_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1164 tcg_gen_xor_i64(pd
, pn
, pm
);
1165 tcg_gen_and_i64(pd
, pd
, pg
);
1168 static void gen_eor_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1169 TCGv_vec pm
, TCGv_vec pg
)
1171 tcg_gen_xor_vec(vece
, pd
, pn
, pm
);
1172 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1175 static bool trans_EOR_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1177 static const GVecGen4 op
= {
1178 .fni8
= gen_eor_pg_i64
,
1179 .fniv
= gen_eor_pg_vec
,
1180 .fno
= gen_helper_sve_eor_pppp
,
1181 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1183 return do_pppp_flags(s
, a
, &op
);
1186 static bool trans_SEL_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1191 if (sve_access_check(s
)) {
1192 unsigned psz
= pred_gvec_reg_size(s
);
1193 tcg_gen_gvec_bitsel(MO_8
, pred_full_reg_offset(s
, a
->rd
),
1194 pred_full_reg_offset(s
, a
->pg
),
1195 pred_full_reg_offset(s
, a
->rn
),
1196 pred_full_reg_offset(s
, a
->rm
), psz
, psz
);
1201 static void gen_orr_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1203 tcg_gen_or_i64(pd
, pn
, pm
);
1204 tcg_gen_and_i64(pd
, pd
, pg
);
1207 static void gen_orr_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1208 TCGv_vec pm
, TCGv_vec pg
)
1210 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1211 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1214 static bool trans_ORR_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1216 static const GVecGen4 op
= {
1217 .fni8
= gen_orr_pg_i64
,
1218 .fniv
= gen_orr_pg_vec
,
1219 .fno
= gen_helper_sve_orr_pppp
,
1220 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1223 if (!a
->s
&& a
->pg
== a
->rn
&& a
->rn
== a
->rm
) {
1224 return do_mov_p(s
, a
->rd
, a
->rn
);
1226 return do_pppp_flags(s
, a
, &op
);
1229 static void gen_orn_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1231 tcg_gen_orc_i64(pd
, pn
, pm
);
1232 tcg_gen_and_i64(pd
, pd
, pg
);
1235 static void gen_orn_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1236 TCGv_vec pm
, TCGv_vec pg
)
1238 tcg_gen_orc_vec(vece
, pd
, pn
, pm
);
1239 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1242 static bool trans_ORN_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1244 static const GVecGen4 op
= {
1245 .fni8
= gen_orn_pg_i64
,
1246 .fniv
= gen_orn_pg_vec
,
1247 .fno
= gen_helper_sve_orn_pppp
,
1248 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1250 return do_pppp_flags(s
, a
, &op
);
1253 static void gen_nor_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1255 tcg_gen_or_i64(pd
, pn
, pm
);
1256 tcg_gen_andc_i64(pd
, pg
, pd
);
1259 static void gen_nor_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1260 TCGv_vec pm
, TCGv_vec pg
)
1262 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1263 tcg_gen_andc_vec(vece
, pd
, pg
, pd
);
1266 static bool trans_NOR_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1268 static const GVecGen4 op
= {
1269 .fni8
= gen_nor_pg_i64
,
1270 .fniv
= gen_nor_pg_vec
,
1271 .fno
= gen_helper_sve_nor_pppp
,
1272 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1274 return do_pppp_flags(s
, a
, &op
);
1277 static void gen_nand_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1279 tcg_gen_and_i64(pd
, pn
, pm
);
1280 tcg_gen_andc_i64(pd
, pg
, pd
);
1283 static void gen_nand_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1284 TCGv_vec pm
, TCGv_vec pg
)
1286 tcg_gen_and_vec(vece
, pd
, pn
, pm
);
1287 tcg_gen_andc_vec(vece
, pd
, pg
, pd
);
1290 static bool trans_NAND_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1292 static const GVecGen4 op
= {
1293 .fni8
= gen_nand_pg_i64
,
1294 .fniv
= gen_nand_pg_vec
,
1295 .fno
= gen_helper_sve_nand_pppp
,
1296 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1298 return do_pppp_flags(s
, a
, &op
);
1302 *** SVE Predicate Misc Group
1305 static bool trans_PTEST(DisasContext
*s
, arg_PTEST
*a
)
1307 if (sve_access_check(s
)) {
1308 int nofs
= pred_full_reg_offset(s
, a
->rn
);
1309 int gofs
= pred_full_reg_offset(s
, a
->pg
);
1310 int words
= DIV_ROUND_UP(pred_full_reg_size(s
), 8);
1313 TCGv_i64 pn
= tcg_temp_new_i64();
1314 TCGv_i64 pg
= tcg_temp_new_i64();
1316 tcg_gen_ld_i64(pn
, cpu_env
, nofs
);
1317 tcg_gen_ld_i64(pg
, cpu_env
, gofs
);
1318 do_predtest1(pn
, pg
);
1320 tcg_temp_free_i64(pn
);
1321 tcg_temp_free_i64(pg
);
1323 do_predtest(s
, nofs
, gofs
, words
);
1329 /* See the ARM pseudocode DecodePredCount. */
1330 static unsigned decode_pred_count(unsigned fullsz
, int pattern
, int esz
)
1332 unsigned elements
= fullsz
>> esz
;
1336 case 0x0: /* POW2 */
1337 return pow2floor(elements
);
1348 case 0x9: /* VL16 */
1349 case 0xa: /* VL32 */
1350 case 0xb: /* VL64 */
1351 case 0xc: /* VL128 */
1352 case 0xd: /* VL256 */
1353 bound
= 16 << (pattern
- 9);
1355 case 0x1d: /* MUL4 */
1356 return elements
- elements
% 4;
1357 case 0x1e: /* MUL3 */
1358 return elements
- elements
% 3;
1359 case 0x1f: /* ALL */
1361 default: /* #uimm5 */
1364 return elements
>= bound
? bound
: 0;
1367 /* This handles all of the predicate initialization instructions,
1368 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1369 * so that decode_pred_count returns 0. For SETFFR, we will have
1370 * set RD == 16 == FFR.
1372 static bool do_predset(DisasContext
*s
, int esz
, int rd
, int pat
, bool setflag
)
1374 if (!sve_access_check(s
)) {
1378 unsigned fullsz
= vec_full_reg_size(s
);
1379 unsigned ofs
= pred_full_reg_offset(s
, rd
);
1380 unsigned numelem
, setsz
, i
;
1381 uint64_t word
, lastword
;
1384 numelem
= decode_pred_count(fullsz
, pat
, esz
);
1386 /* Determine what we must store into each bit, and how many. */
1388 lastword
= word
= 0;
1391 setsz
= numelem
<< esz
;
1392 lastword
= word
= pred_esz_masks
[esz
];
1394 lastword
&= MAKE_64BIT_MASK(0, setsz
% 64);
1398 t
= tcg_temp_new_i64();
1400 tcg_gen_movi_i64(t
, lastword
);
1401 tcg_gen_st_i64(t
, cpu_env
, ofs
);
1405 if (word
== lastword
) {
1406 unsigned maxsz
= size_for_gvec(fullsz
/ 8);
1407 unsigned oprsz
= size_for_gvec(setsz
/ 8);
1409 if (oprsz
* 8 == setsz
) {
1410 tcg_gen_gvec_dup_imm(MO_64
, ofs
, oprsz
, maxsz
, word
);
1418 tcg_gen_movi_i64(t
, word
);
1419 for (i
= 0; i
< QEMU_ALIGN_DOWN(setsz
, 8); i
+= 8) {
1420 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1422 if (lastword
!= word
) {
1423 tcg_gen_movi_i64(t
, lastword
);
1424 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1428 tcg_gen_movi_i64(t
, 0);
1429 for (; i
< fullsz
; i
+= 8) {
1430 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1435 tcg_temp_free_i64(t
);
1439 tcg_gen_movi_i32(cpu_NF
, -(word
!= 0));
1440 tcg_gen_movi_i32(cpu_CF
, word
== 0);
1441 tcg_gen_movi_i32(cpu_VF
, 0);
1442 tcg_gen_mov_i32(cpu_ZF
, cpu_NF
);
1447 static bool trans_PTRUE(DisasContext
*s
, arg_PTRUE
*a
)
1449 return do_predset(s
, a
->esz
, a
->rd
, a
->pat
, a
->s
);
1452 static bool trans_SETFFR(DisasContext
*s
, arg_SETFFR
*a
)
1454 /* Note pat == 31 is #all, to set all elements. */
1455 return do_predset(s
, 0, FFR_PRED_NUM
, 31, false);
1458 static bool trans_PFALSE(DisasContext
*s
, arg_PFALSE
*a
)
1460 /* Note pat == 32 is #unimp, to set no elements. */
1461 return do_predset(s
, 0, a
->rd
, 32, false);
1464 static bool trans_RDFFR_p(DisasContext
*s
, arg_RDFFR_p
*a
)
1466 /* The path through do_pppp_flags is complicated enough to want to avoid
1467 * duplication. Frob the arguments into the form of a predicated AND.
1469 arg_rprr_s alt_a
= {
1470 .rd
= a
->rd
, .pg
= a
->pg
, .s
= a
->s
,
1471 .rn
= FFR_PRED_NUM
, .rm
= FFR_PRED_NUM
,
1473 return trans_AND_pppp(s
, &alt_a
);
1476 static bool trans_RDFFR(DisasContext
*s
, arg_RDFFR
*a
)
1478 return do_mov_p(s
, a
->rd
, FFR_PRED_NUM
);
1481 static bool trans_WRFFR(DisasContext
*s
, arg_WRFFR
*a
)
1483 return do_mov_p(s
, FFR_PRED_NUM
, a
->rn
);
1486 static bool do_pfirst_pnext(DisasContext
*s
, arg_rr_esz
*a
,
1487 void (*gen_fn
)(TCGv_i32
, TCGv_ptr
,
1488 TCGv_ptr
, TCGv_i32
))
1490 if (!sve_access_check(s
)) {
1494 TCGv_ptr t_pd
= tcg_temp_new_ptr();
1495 TCGv_ptr t_pg
= tcg_temp_new_ptr();
1499 desc
= FIELD_DP32(desc
, PREDDESC
, OPRSZ
, pred_full_reg_size(s
));
1500 desc
= FIELD_DP32(desc
, PREDDESC
, ESZ
, a
->esz
);
1502 tcg_gen_addi_ptr(t_pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
1503 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
1504 t
= tcg_const_i32(desc
);
1506 gen_fn(t
, t_pd
, t_pg
, t
);
1507 tcg_temp_free_ptr(t_pd
);
1508 tcg_temp_free_ptr(t_pg
);
1511 tcg_temp_free_i32(t
);
1515 static bool trans_PFIRST(DisasContext
*s
, arg_rr_esz
*a
)
1517 return do_pfirst_pnext(s
, a
, gen_helper_sve_pfirst
);
1520 static bool trans_PNEXT(DisasContext
*s
, arg_rr_esz
*a
)
1522 return do_pfirst_pnext(s
, a
, gen_helper_sve_pnext
);
1526 *** SVE Element Count Group
1529 /* Perform an inline saturating addition of a 32-bit value within
1530 * a 64-bit register. The second operand is known to be positive,
1531 * which halves the comparisions we must perform to bound the result.
1533 static void do_sat_addsub_32(TCGv_i64 reg
, TCGv_i64 val
, bool u
, bool d
)
1539 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1541 tcg_gen_ext32u_i64(reg
, reg
);
1543 tcg_gen_ext32s_i64(reg
, reg
);
1546 tcg_gen_sub_i64(reg
, reg
, val
);
1547 ibound
= (u
? 0 : INT32_MIN
);
1550 tcg_gen_add_i64(reg
, reg
, val
);
1551 ibound
= (u
? UINT32_MAX
: INT32_MAX
);
1554 bound
= tcg_const_i64(ibound
);
1555 tcg_gen_movcond_i64(cond
, reg
, reg
, bound
, bound
, reg
);
1556 tcg_temp_free_i64(bound
);
1559 /* Similarly with 64-bit values. */
1560 static void do_sat_addsub_64(TCGv_i64 reg
, TCGv_i64 val
, bool u
, bool d
)
1562 TCGv_i64 t0
= tcg_temp_new_i64();
1563 TCGv_i64 t1
= tcg_temp_new_i64();
1568 tcg_gen_sub_i64(t0
, reg
, val
);
1569 tcg_gen_movi_i64(t1
, 0);
1570 tcg_gen_movcond_i64(TCG_COND_LTU
, reg
, reg
, val
, t1
, t0
);
1572 tcg_gen_add_i64(t0
, reg
, val
);
1573 tcg_gen_movi_i64(t1
, -1);
1574 tcg_gen_movcond_i64(TCG_COND_LTU
, reg
, t0
, reg
, t1
, t0
);
1578 /* Detect signed overflow for subtraction. */
1579 tcg_gen_xor_i64(t0
, reg
, val
);
1580 tcg_gen_sub_i64(t1
, reg
, val
);
1581 tcg_gen_xor_i64(reg
, reg
, t1
);
1582 tcg_gen_and_i64(t0
, t0
, reg
);
1584 /* Bound the result. */
1585 tcg_gen_movi_i64(reg
, INT64_MIN
);
1586 t2
= tcg_const_i64(0);
1587 tcg_gen_movcond_i64(TCG_COND_LT
, reg
, t0
, t2
, reg
, t1
);
1589 /* Detect signed overflow for addition. */
1590 tcg_gen_xor_i64(t0
, reg
, val
);
1591 tcg_gen_add_i64(reg
, reg
, val
);
1592 tcg_gen_xor_i64(t1
, reg
, val
);
1593 tcg_gen_andc_i64(t0
, t1
, t0
);
1595 /* Bound the result. */
1596 tcg_gen_movi_i64(t1
, INT64_MAX
);
1597 t2
= tcg_const_i64(0);
1598 tcg_gen_movcond_i64(TCG_COND_LT
, reg
, t0
, t2
, t1
, reg
);
1600 tcg_temp_free_i64(t2
);
1602 tcg_temp_free_i64(t0
);
1603 tcg_temp_free_i64(t1
);
1606 /* Similarly with a vector and a scalar operand. */
1607 static void do_sat_addsub_vec(DisasContext
*s
, int esz
, int rd
, int rn
,
1608 TCGv_i64 val
, bool u
, bool d
)
1610 unsigned vsz
= vec_full_reg_size(s
);
1611 TCGv_ptr dptr
, nptr
;
1615 dptr
= tcg_temp_new_ptr();
1616 nptr
= tcg_temp_new_ptr();
1617 tcg_gen_addi_ptr(dptr
, cpu_env
, vec_full_reg_offset(s
, rd
));
1618 tcg_gen_addi_ptr(nptr
, cpu_env
, vec_full_reg_offset(s
, rn
));
1619 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
1623 t32
= tcg_temp_new_i32();
1624 tcg_gen_extrl_i64_i32(t32
, val
);
1626 tcg_gen_neg_i32(t32
, t32
);
1629 gen_helper_sve_uqaddi_b(dptr
, nptr
, t32
, desc
);
1631 gen_helper_sve_sqaddi_b(dptr
, nptr
, t32
, desc
);
1633 tcg_temp_free_i32(t32
);
1637 t32
= tcg_temp_new_i32();
1638 tcg_gen_extrl_i64_i32(t32
, val
);
1640 tcg_gen_neg_i32(t32
, t32
);
1643 gen_helper_sve_uqaddi_h(dptr
, nptr
, t32
, desc
);
1645 gen_helper_sve_sqaddi_h(dptr
, nptr
, t32
, desc
);
1647 tcg_temp_free_i32(t32
);
1651 t64
= tcg_temp_new_i64();
1653 tcg_gen_neg_i64(t64
, val
);
1655 tcg_gen_mov_i64(t64
, val
);
1658 gen_helper_sve_uqaddi_s(dptr
, nptr
, t64
, desc
);
1660 gen_helper_sve_sqaddi_s(dptr
, nptr
, t64
, desc
);
1662 tcg_temp_free_i64(t64
);
1668 gen_helper_sve_uqsubi_d(dptr
, nptr
, val
, desc
);
1670 gen_helper_sve_uqaddi_d(dptr
, nptr
, val
, desc
);
1673 t64
= tcg_temp_new_i64();
1674 tcg_gen_neg_i64(t64
, val
);
1675 gen_helper_sve_sqaddi_d(dptr
, nptr
, t64
, desc
);
1676 tcg_temp_free_i64(t64
);
1678 gen_helper_sve_sqaddi_d(dptr
, nptr
, val
, desc
);
1683 g_assert_not_reached();
1686 tcg_temp_free_ptr(dptr
);
1687 tcg_temp_free_ptr(nptr
);
1688 tcg_temp_free_i32(desc
);
1691 static bool trans_CNT_r(DisasContext
*s
, arg_CNT_r
*a
)
1693 if (sve_access_check(s
)) {
1694 unsigned fullsz
= vec_full_reg_size(s
);
1695 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1696 tcg_gen_movi_i64(cpu_reg(s
, a
->rd
), numelem
* a
->imm
);
1701 static bool trans_INCDEC_r(DisasContext
*s
, arg_incdec_cnt
*a
)
1703 if (sve_access_check(s
)) {
1704 unsigned fullsz
= vec_full_reg_size(s
);
1705 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1706 int inc
= numelem
* a
->imm
* (a
->d
? -1 : 1);
1707 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1709 tcg_gen_addi_i64(reg
, reg
, inc
);
1714 static bool trans_SINCDEC_r_32(DisasContext
*s
, arg_incdec_cnt
*a
)
1716 if (!sve_access_check(s
)) {
1720 unsigned fullsz
= vec_full_reg_size(s
);
1721 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1722 int inc
= numelem
* a
->imm
;
1723 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1725 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1728 tcg_gen_ext32u_i64(reg
, reg
);
1730 tcg_gen_ext32s_i64(reg
, reg
);
1733 TCGv_i64 t
= tcg_const_i64(inc
);
1734 do_sat_addsub_32(reg
, t
, a
->u
, a
->d
);
1735 tcg_temp_free_i64(t
);
1740 static bool trans_SINCDEC_r_64(DisasContext
*s
, arg_incdec_cnt
*a
)
1742 if (!sve_access_check(s
)) {
1746 unsigned fullsz
= vec_full_reg_size(s
);
1747 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1748 int inc
= numelem
* a
->imm
;
1749 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1752 TCGv_i64 t
= tcg_const_i64(inc
);
1753 do_sat_addsub_64(reg
, t
, a
->u
, a
->d
);
1754 tcg_temp_free_i64(t
);
1759 static bool trans_INCDEC_v(DisasContext
*s
, arg_incdec2_cnt
*a
)
1765 unsigned fullsz
= vec_full_reg_size(s
);
1766 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1767 int inc
= numelem
* a
->imm
;
1770 if (sve_access_check(s
)) {
1771 TCGv_i64 t
= tcg_const_i64(a
->d
? -inc
: inc
);
1772 tcg_gen_gvec_adds(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
1773 vec_full_reg_offset(s
, a
->rn
),
1775 tcg_temp_free_i64(t
);
1778 do_mov_z(s
, a
->rd
, a
->rn
);
1783 static bool trans_SINCDEC_v(DisasContext
*s
, arg_incdec2_cnt
*a
)
1789 unsigned fullsz
= vec_full_reg_size(s
);
1790 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1791 int inc
= numelem
* a
->imm
;
1794 if (sve_access_check(s
)) {
1795 TCGv_i64 t
= tcg_const_i64(inc
);
1796 do_sat_addsub_vec(s
, a
->esz
, a
->rd
, a
->rn
, t
, a
->u
, a
->d
);
1797 tcg_temp_free_i64(t
);
1800 do_mov_z(s
, a
->rd
, a
->rn
);
1806 *** SVE Bitwise Immediate Group
1809 static bool do_zz_dbm(DisasContext
*s
, arg_rr_dbm
*a
, GVecGen2iFn
*gvec_fn
)
1812 if (!logic_imm_decode_wmask(&imm
, extract32(a
->dbm
, 12, 1),
1813 extract32(a
->dbm
, 0, 6),
1814 extract32(a
->dbm
, 6, 6))) {
1817 if (sve_access_check(s
)) {
1818 unsigned vsz
= vec_full_reg_size(s
);
1819 gvec_fn(MO_64
, vec_full_reg_offset(s
, a
->rd
),
1820 vec_full_reg_offset(s
, a
->rn
), imm
, vsz
, vsz
);
1825 static bool trans_AND_zzi(DisasContext
*s
, arg_rr_dbm
*a
)
1827 return do_zz_dbm(s
, a
, tcg_gen_gvec_andi
);
1830 static bool trans_ORR_zzi(DisasContext
*s
, arg_rr_dbm
*a
)
1832 return do_zz_dbm(s
, a
, tcg_gen_gvec_ori
);
1835 static bool trans_EOR_zzi(DisasContext
*s
, arg_rr_dbm
*a
)
1837 return do_zz_dbm(s
, a
, tcg_gen_gvec_xori
);
1840 static bool trans_DUPM(DisasContext
*s
, arg_DUPM
*a
)
1843 if (!logic_imm_decode_wmask(&imm
, extract32(a
->dbm
, 12, 1),
1844 extract32(a
->dbm
, 0, 6),
1845 extract32(a
->dbm
, 6, 6))) {
1848 if (sve_access_check(s
)) {
1849 do_dupi_z(s
, a
->rd
, imm
);
1855 *** SVE Integer Wide Immediate - Predicated Group
1858 /* Implement all merging copies. This is used for CPY (immediate),
1859 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1861 static void do_cpy_m(DisasContext
*s
, int esz
, int rd
, int rn
, int pg
,
1864 typedef void gen_cpy(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i64
, TCGv_i32
);
1865 static gen_cpy
* const fns
[4] = {
1866 gen_helper_sve_cpy_m_b
, gen_helper_sve_cpy_m_h
,
1867 gen_helper_sve_cpy_m_s
, gen_helper_sve_cpy_m_d
,
1869 unsigned vsz
= vec_full_reg_size(s
);
1870 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
1871 TCGv_ptr t_zd
= tcg_temp_new_ptr();
1872 TCGv_ptr t_zn
= tcg_temp_new_ptr();
1873 TCGv_ptr t_pg
= tcg_temp_new_ptr();
1875 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, rd
));
1876 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, rn
));
1877 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
1879 fns
[esz
](t_zd
, t_zn
, t_pg
, val
, desc
);
1881 tcg_temp_free_ptr(t_zd
);
1882 tcg_temp_free_ptr(t_zn
);
1883 tcg_temp_free_ptr(t_pg
);
1884 tcg_temp_free_i32(desc
);
1887 static bool trans_FCPY(DisasContext
*s
, arg_FCPY
*a
)
1892 if (sve_access_check(s
)) {
1893 /* Decode the VFP immediate. */
1894 uint64_t imm
= vfp_expand_imm(a
->esz
, a
->imm
);
1895 TCGv_i64 t_imm
= tcg_const_i64(imm
);
1896 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rn
, a
->pg
, t_imm
);
1897 tcg_temp_free_i64(t_imm
);
1902 static bool trans_CPY_m_i(DisasContext
*s
, arg_rpri_esz
*a
)
1904 if (a
->esz
== 0 && extract32(s
->insn
, 13, 1)) {
1907 if (sve_access_check(s
)) {
1908 TCGv_i64 t_imm
= tcg_const_i64(a
->imm
);
1909 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rn
, a
->pg
, t_imm
);
1910 tcg_temp_free_i64(t_imm
);
1915 static bool trans_CPY_z_i(DisasContext
*s
, arg_CPY_z_i
*a
)
1917 static gen_helper_gvec_2i
* const fns
[4] = {
1918 gen_helper_sve_cpy_z_b
, gen_helper_sve_cpy_z_h
,
1919 gen_helper_sve_cpy_z_s
, gen_helper_sve_cpy_z_d
,
1922 if (a
->esz
== 0 && extract32(s
->insn
, 13, 1)) {
1925 if (sve_access_check(s
)) {
1926 unsigned vsz
= vec_full_reg_size(s
);
1927 TCGv_i64 t_imm
= tcg_const_i64(a
->imm
);
1928 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s
, a
->rd
),
1929 pred_full_reg_offset(s
, a
->pg
),
1930 t_imm
, vsz
, vsz
, 0, fns
[a
->esz
]);
1931 tcg_temp_free_i64(t_imm
);
1937 *** SVE Permute Extract Group
1940 static bool trans_EXT(DisasContext
*s
, arg_EXT
*a
)
1942 if (!sve_access_check(s
)) {
1946 unsigned vsz
= vec_full_reg_size(s
);
1947 unsigned n_ofs
= a
->imm
>= vsz
? 0 : a
->imm
;
1948 unsigned n_siz
= vsz
- n_ofs
;
1949 unsigned d
= vec_full_reg_offset(s
, a
->rd
);
1950 unsigned n
= vec_full_reg_offset(s
, a
->rn
);
1951 unsigned m
= vec_full_reg_offset(s
, a
->rm
);
1953 /* Use host vector move insns if we have appropriate sizes
1954 * and no unfortunate overlap.
1957 && n_ofs
== size_for_gvec(n_ofs
)
1958 && n_siz
== size_for_gvec(n_siz
)
1959 && (d
!= n
|| n_siz
<= n_ofs
)) {
1960 tcg_gen_gvec_mov(0, d
, n
+ n_ofs
, n_siz
, n_siz
);
1962 tcg_gen_gvec_mov(0, d
+ n_siz
, m
, n_ofs
, n_ofs
);
1965 tcg_gen_gvec_3_ool(d
, n
, m
, vsz
, vsz
, n_ofs
, gen_helper_sve_ext
);
1971 *** SVE Permute - Unpredicated Group
1974 static bool trans_DUP_s(DisasContext
*s
, arg_DUP_s
*a
)
1976 if (sve_access_check(s
)) {
1977 unsigned vsz
= vec_full_reg_size(s
);
1978 tcg_gen_gvec_dup_i64(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
1979 vsz
, vsz
, cpu_reg_sp(s
, a
->rn
));
1984 static bool trans_DUP_x(DisasContext
*s
, arg_DUP_x
*a
)
1986 if ((a
->imm
& 0x1f) == 0) {
1989 if (sve_access_check(s
)) {
1990 unsigned vsz
= vec_full_reg_size(s
);
1991 unsigned dofs
= vec_full_reg_offset(s
, a
->rd
);
1992 unsigned esz
, index
;
1994 esz
= ctz32(a
->imm
);
1995 index
= a
->imm
>> (esz
+ 1);
1997 if ((index
<< esz
) < vsz
) {
1998 unsigned nofs
= vec_reg_offset(s
, a
->rn
, index
, esz
);
1999 tcg_gen_gvec_dup_mem(esz
, dofs
, nofs
, vsz
, vsz
);
2002 * While dup_mem handles 128-bit elements, dup_imm does not.
2003 * Thankfully element size doesn't matter for splatting zero.
2005 tcg_gen_gvec_dup_imm(MO_64
, dofs
, vsz
, vsz
, 0);
2011 static void do_insr_i64(DisasContext
*s
, arg_rrr_esz
*a
, TCGv_i64 val
)
2013 typedef void gen_insr(TCGv_ptr
, TCGv_ptr
, TCGv_i64
, TCGv_i32
);
2014 static gen_insr
* const fns
[4] = {
2015 gen_helper_sve_insr_b
, gen_helper_sve_insr_h
,
2016 gen_helper_sve_insr_s
, gen_helper_sve_insr_d
,
2018 unsigned vsz
= vec_full_reg_size(s
);
2019 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
2020 TCGv_ptr t_zd
= tcg_temp_new_ptr();
2021 TCGv_ptr t_zn
= tcg_temp_new_ptr();
2023 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, a
->rd
));
2024 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
2026 fns
[a
->esz
](t_zd
, t_zn
, val
, desc
);
2028 tcg_temp_free_ptr(t_zd
);
2029 tcg_temp_free_ptr(t_zn
);
2030 tcg_temp_free_i32(desc
);
2033 static bool trans_INSR_f(DisasContext
*s
, arg_rrr_esz
*a
)
2035 if (sve_access_check(s
)) {
2036 TCGv_i64 t
= tcg_temp_new_i64();
2037 tcg_gen_ld_i64(t
, cpu_env
, vec_reg_offset(s
, a
->rm
, 0, MO_64
));
2038 do_insr_i64(s
, a
, t
);
2039 tcg_temp_free_i64(t
);
2044 static bool trans_INSR_r(DisasContext
*s
, arg_rrr_esz
*a
)
2046 if (sve_access_check(s
)) {
2047 do_insr_i64(s
, a
, cpu_reg(s
, a
->rm
));
2052 static bool trans_REV_v(DisasContext
*s
, arg_rr_esz
*a
)
2054 static gen_helper_gvec_2
* const fns
[4] = {
2055 gen_helper_sve_rev_b
, gen_helper_sve_rev_h
,
2056 gen_helper_sve_rev_s
, gen_helper_sve_rev_d
2059 if (sve_access_check(s
)) {
2060 gen_gvec_ool_zz(s
, fns
[a
->esz
], a
->rd
, a
->rn
, 0);
2065 static bool trans_TBL(DisasContext
*s
, arg_rrr_esz
*a
)
2067 static gen_helper_gvec_3
* const fns
[4] = {
2068 gen_helper_sve_tbl_b
, gen_helper_sve_tbl_h
,
2069 gen_helper_sve_tbl_s
, gen_helper_sve_tbl_d
2072 if (sve_access_check(s
)) {
2073 gen_gvec_ool_zzz(s
, fns
[a
->esz
], a
->rd
, a
->rn
, a
->rm
, 0);
2078 static bool trans_UNPK(DisasContext
*s
, arg_UNPK
*a
)
2080 static gen_helper_gvec_2
* const fns
[4][2] = {
2082 { gen_helper_sve_sunpk_h
, gen_helper_sve_uunpk_h
},
2083 { gen_helper_sve_sunpk_s
, gen_helper_sve_uunpk_s
},
2084 { gen_helper_sve_sunpk_d
, gen_helper_sve_uunpk_d
},
2090 if (sve_access_check(s
)) {
2091 unsigned vsz
= vec_full_reg_size(s
);
2092 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, a
->rd
),
2093 vec_full_reg_offset(s
, a
->rn
)
2094 + (a
->h
? vsz
/ 2 : 0),
2095 vsz
, vsz
, 0, fns
[a
->esz
][a
->u
]);
2101 *** SVE Permute - Predicates Group
2104 static bool do_perm_pred3(DisasContext
*s
, arg_rrr_esz
*a
, bool high_odd
,
2105 gen_helper_gvec_3
*fn
)
2107 if (!sve_access_check(s
)) {
2111 unsigned vsz
= pred_full_reg_size(s
);
2113 TCGv_ptr t_d
= tcg_temp_new_ptr();
2114 TCGv_ptr t_n
= tcg_temp_new_ptr();
2115 TCGv_ptr t_m
= tcg_temp_new_ptr();
2119 desc
= FIELD_DP32(desc
, PREDDESC
, OPRSZ
, vsz
);
2120 desc
= FIELD_DP32(desc
, PREDDESC
, ESZ
, a
->esz
);
2121 desc
= FIELD_DP32(desc
, PREDDESC
, DATA
, high_odd
);
2123 tcg_gen_addi_ptr(t_d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2124 tcg_gen_addi_ptr(t_n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2125 tcg_gen_addi_ptr(t_m
, cpu_env
, pred_full_reg_offset(s
, a
->rm
));
2126 t_desc
= tcg_const_i32(desc
);
2128 fn(t_d
, t_n
, t_m
, t_desc
);
2130 tcg_temp_free_ptr(t_d
);
2131 tcg_temp_free_ptr(t_n
);
2132 tcg_temp_free_ptr(t_m
);
2133 tcg_temp_free_i32(t_desc
);
2137 static bool do_perm_pred2(DisasContext
*s
, arg_rr_esz
*a
, bool high_odd
,
2138 gen_helper_gvec_2
*fn
)
2140 if (!sve_access_check(s
)) {
2144 unsigned vsz
= pred_full_reg_size(s
);
2145 TCGv_ptr t_d
= tcg_temp_new_ptr();
2146 TCGv_ptr t_n
= tcg_temp_new_ptr();
2150 tcg_gen_addi_ptr(t_d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2151 tcg_gen_addi_ptr(t_n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2153 desc
= FIELD_DP32(desc
, PREDDESC
, OPRSZ
, vsz
);
2154 desc
= FIELD_DP32(desc
, PREDDESC
, ESZ
, a
->esz
);
2155 desc
= FIELD_DP32(desc
, PREDDESC
, DATA
, high_odd
);
2156 t_desc
= tcg_const_i32(desc
);
2158 fn(t_d
, t_n
, t_desc
);
2160 tcg_temp_free_i32(t_desc
);
2161 tcg_temp_free_ptr(t_d
);
2162 tcg_temp_free_ptr(t_n
);
2166 static bool trans_ZIP1_p(DisasContext
*s
, arg_rrr_esz
*a
)
2168 return do_perm_pred3(s
, a
, 0, gen_helper_sve_zip_p
);
2171 static bool trans_ZIP2_p(DisasContext
*s
, arg_rrr_esz
*a
)
2173 return do_perm_pred3(s
, a
, 1, gen_helper_sve_zip_p
);
2176 static bool trans_UZP1_p(DisasContext
*s
, arg_rrr_esz
*a
)
2178 return do_perm_pred3(s
, a
, 0, gen_helper_sve_uzp_p
);
2181 static bool trans_UZP2_p(DisasContext
*s
, arg_rrr_esz
*a
)
2183 return do_perm_pred3(s
, a
, 1, gen_helper_sve_uzp_p
);
2186 static bool trans_TRN1_p(DisasContext
*s
, arg_rrr_esz
*a
)
2188 return do_perm_pred3(s
, a
, 0, gen_helper_sve_trn_p
);
2191 static bool trans_TRN2_p(DisasContext
*s
, arg_rrr_esz
*a
)
2193 return do_perm_pred3(s
, a
, 1, gen_helper_sve_trn_p
);
2196 static bool trans_REV_p(DisasContext
*s
, arg_rr_esz
*a
)
2198 return do_perm_pred2(s
, a
, 0, gen_helper_sve_rev_p
);
2201 static bool trans_PUNPKLO(DisasContext
*s
, arg_PUNPKLO
*a
)
2203 return do_perm_pred2(s
, a
, 0, gen_helper_sve_punpk_p
);
2206 static bool trans_PUNPKHI(DisasContext
*s
, arg_PUNPKHI
*a
)
2208 return do_perm_pred2(s
, a
, 1, gen_helper_sve_punpk_p
);
2212 *** SVE Permute - Interleaving Group
2215 static bool do_zip(DisasContext
*s
, arg_rrr_esz
*a
, bool high
)
2217 static gen_helper_gvec_3
* const fns
[4] = {
2218 gen_helper_sve_zip_b
, gen_helper_sve_zip_h
,
2219 gen_helper_sve_zip_s
, gen_helper_sve_zip_d
,
2222 if (sve_access_check(s
)) {
2223 unsigned vsz
= vec_full_reg_size(s
);
2224 unsigned high_ofs
= high
? vsz
/ 2 : 0;
2225 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
2226 vec_full_reg_offset(s
, a
->rn
) + high_ofs
,
2227 vec_full_reg_offset(s
, a
->rm
) + high_ofs
,
2228 vsz
, vsz
, 0, fns
[a
->esz
]);
2233 static bool do_zzz_data_ool(DisasContext
*s
, arg_rrr_esz
*a
, int data
,
2234 gen_helper_gvec_3
*fn
)
2236 if (sve_access_check(s
)) {
2237 gen_gvec_ool_zzz(s
, fn
, a
->rd
, a
->rn
, a
->rm
, data
);
2242 static bool trans_ZIP1_z(DisasContext
*s
, arg_rrr_esz
*a
)
2244 return do_zip(s
, a
, false);
2247 static bool trans_ZIP2_z(DisasContext
*s
, arg_rrr_esz
*a
)
2249 return do_zip(s
, a
, true);
2252 static gen_helper_gvec_3
* const uzp_fns
[4] = {
2253 gen_helper_sve_uzp_b
, gen_helper_sve_uzp_h
,
2254 gen_helper_sve_uzp_s
, gen_helper_sve_uzp_d
,
2257 static bool trans_UZP1_z(DisasContext
*s
, arg_rrr_esz
*a
)
2259 return do_zzz_data_ool(s
, a
, 0, uzp_fns
[a
->esz
]);
2262 static bool trans_UZP2_z(DisasContext
*s
, arg_rrr_esz
*a
)
2264 return do_zzz_data_ool(s
, a
, 1 << a
->esz
, uzp_fns
[a
->esz
]);
2267 static gen_helper_gvec_3
* const trn_fns
[4] = {
2268 gen_helper_sve_trn_b
, gen_helper_sve_trn_h
,
2269 gen_helper_sve_trn_s
, gen_helper_sve_trn_d
,
2272 static bool trans_TRN1_z(DisasContext
*s
, arg_rrr_esz
*a
)
2274 return do_zzz_data_ool(s
, a
, 0, trn_fns
[a
->esz
]);
2277 static bool trans_TRN2_z(DisasContext
*s
, arg_rrr_esz
*a
)
2279 return do_zzz_data_ool(s
, a
, 1 << a
->esz
, trn_fns
[a
->esz
]);
2283 *** SVE Permute Vector - Predicated Group
2286 static bool trans_COMPACT(DisasContext
*s
, arg_rpr_esz
*a
)
2288 static gen_helper_gvec_3
* const fns
[4] = {
2289 NULL
, NULL
, gen_helper_sve_compact_s
, gen_helper_sve_compact_d
2291 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2294 /* Call the helper that computes the ARM LastActiveElement pseudocode
2295 * function, scaled by the element size. This includes the not found
2296 * indication; e.g. not found for esz=3 is -8.
2298 static void find_last_active(DisasContext
*s
, TCGv_i32 ret
, int esz
, int pg
)
2300 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2301 * round up, as we do elsewhere, because we need the exact size.
2303 TCGv_ptr t_p
= tcg_temp_new_ptr();
2305 unsigned vsz
= pred_full_reg_size(s
);
2309 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, esz
);
2311 tcg_gen_addi_ptr(t_p
, cpu_env
, pred_full_reg_offset(s
, pg
));
2312 t_desc
= tcg_const_i32(desc
);
2314 gen_helper_sve_last_active_element(ret
, t_p
, t_desc
);
2316 tcg_temp_free_i32(t_desc
);
2317 tcg_temp_free_ptr(t_p
);
2320 /* Increment LAST to the offset of the next element in the vector,
2321 * wrapping around to 0.
2323 static void incr_last_active(DisasContext
*s
, TCGv_i32 last
, int esz
)
2325 unsigned vsz
= vec_full_reg_size(s
);
2327 tcg_gen_addi_i32(last
, last
, 1 << esz
);
2328 if (is_power_of_2(vsz
)) {
2329 tcg_gen_andi_i32(last
, last
, vsz
- 1);
2331 TCGv_i32 max
= tcg_const_i32(vsz
);
2332 TCGv_i32 zero
= tcg_const_i32(0);
2333 tcg_gen_movcond_i32(TCG_COND_GEU
, last
, last
, max
, zero
, last
);
2334 tcg_temp_free_i32(max
);
2335 tcg_temp_free_i32(zero
);
2339 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2340 static void wrap_last_active(DisasContext
*s
, TCGv_i32 last
, int esz
)
2342 unsigned vsz
= vec_full_reg_size(s
);
2344 if (is_power_of_2(vsz
)) {
2345 tcg_gen_andi_i32(last
, last
, vsz
- 1);
2347 TCGv_i32 max
= tcg_const_i32(vsz
- (1 << esz
));
2348 TCGv_i32 zero
= tcg_const_i32(0);
2349 tcg_gen_movcond_i32(TCG_COND_LT
, last
, last
, zero
, max
, last
);
2350 tcg_temp_free_i32(max
);
2351 tcg_temp_free_i32(zero
);
2355 /* Load an unsigned element of ESZ from BASE+OFS. */
2356 static TCGv_i64
load_esz(TCGv_ptr base
, int ofs
, int esz
)
2358 TCGv_i64 r
= tcg_temp_new_i64();
2362 tcg_gen_ld8u_i64(r
, base
, ofs
);
2365 tcg_gen_ld16u_i64(r
, base
, ofs
);
2368 tcg_gen_ld32u_i64(r
, base
, ofs
);
2371 tcg_gen_ld_i64(r
, base
, ofs
);
2374 g_assert_not_reached();
2379 /* Load an unsigned element of ESZ from RM[LAST]. */
2380 static TCGv_i64
load_last_active(DisasContext
*s
, TCGv_i32 last
,
2383 TCGv_ptr p
= tcg_temp_new_ptr();
2386 /* Convert offset into vector into offset into ENV.
2387 * The final adjustment for the vector register base
2388 * is added via constant offset to the load.
2390 #ifdef HOST_WORDS_BIGENDIAN
2391 /* Adjust for element ordering. See vec_reg_offset. */
2393 tcg_gen_xori_i32(last
, last
, 8 - (1 << esz
));
2396 tcg_gen_ext_i32_ptr(p
, last
);
2397 tcg_gen_add_ptr(p
, p
, cpu_env
);
2399 r
= load_esz(p
, vec_full_reg_offset(s
, rm
), esz
);
2400 tcg_temp_free_ptr(p
);
2405 /* Compute CLAST for a Zreg. */
2406 static bool do_clast_vector(DisasContext
*s
, arg_rprr_esz
*a
, bool before
)
2411 unsigned vsz
, esz
= a
->esz
;
2413 if (!sve_access_check(s
)) {
2417 last
= tcg_temp_local_new_i32();
2418 over
= gen_new_label();
2420 find_last_active(s
, last
, esz
, a
->pg
);
2422 /* There is of course no movcond for a 2048-bit vector,
2423 * so we must branch over the actual store.
2425 tcg_gen_brcondi_i32(TCG_COND_LT
, last
, 0, over
);
2428 incr_last_active(s
, last
, esz
);
2431 ele
= load_last_active(s
, last
, a
->rm
, esz
);
2432 tcg_temp_free_i32(last
);
2434 vsz
= vec_full_reg_size(s
);
2435 tcg_gen_gvec_dup_i64(esz
, vec_full_reg_offset(s
, a
->rd
), vsz
, vsz
, ele
);
2436 tcg_temp_free_i64(ele
);
2438 /* If this insn used MOVPRFX, we may need a second move. */
2439 if (a
->rd
!= a
->rn
) {
2440 TCGLabel
*done
= gen_new_label();
2443 gen_set_label(over
);
2444 do_mov_z(s
, a
->rd
, a
->rn
);
2446 gen_set_label(done
);
2448 gen_set_label(over
);
2453 static bool trans_CLASTA_z(DisasContext
*s
, arg_rprr_esz
*a
)
2455 return do_clast_vector(s
, a
, false);
2458 static bool trans_CLASTB_z(DisasContext
*s
, arg_rprr_esz
*a
)
2460 return do_clast_vector(s
, a
, true);
2463 /* Compute CLAST for a scalar. */
2464 static void do_clast_scalar(DisasContext
*s
, int esz
, int pg
, int rm
,
2465 bool before
, TCGv_i64 reg_val
)
2467 TCGv_i32 last
= tcg_temp_new_i32();
2468 TCGv_i64 ele
, cmp
, zero
;
2470 find_last_active(s
, last
, esz
, pg
);
2472 /* Extend the original value of last prior to incrementing. */
2473 cmp
= tcg_temp_new_i64();
2474 tcg_gen_ext_i32_i64(cmp
, last
);
2477 incr_last_active(s
, last
, esz
);
2480 /* The conceit here is that while last < 0 indicates not found, after
2481 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2482 * from which we can load garbage. We then discard the garbage with
2483 * a conditional move.
2485 ele
= load_last_active(s
, last
, rm
, esz
);
2486 tcg_temp_free_i32(last
);
2488 zero
= tcg_const_i64(0);
2489 tcg_gen_movcond_i64(TCG_COND_GE
, reg_val
, cmp
, zero
, ele
, reg_val
);
2491 tcg_temp_free_i64(zero
);
2492 tcg_temp_free_i64(cmp
);
2493 tcg_temp_free_i64(ele
);
2496 /* Compute CLAST for a Vreg. */
2497 static bool do_clast_fp(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2499 if (sve_access_check(s
)) {
2501 int ofs
= vec_reg_offset(s
, a
->rd
, 0, esz
);
2502 TCGv_i64 reg
= load_esz(cpu_env
, ofs
, esz
);
2504 do_clast_scalar(s
, esz
, a
->pg
, a
->rn
, before
, reg
);
2505 write_fp_dreg(s
, a
->rd
, reg
);
2506 tcg_temp_free_i64(reg
);
2511 static bool trans_CLASTA_v(DisasContext
*s
, arg_rpr_esz
*a
)
2513 return do_clast_fp(s
, a
, false);
2516 static bool trans_CLASTB_v(DisasContext
*s
, arg_rpr_esz
*a
)
2518 return do_clast_fp(s
, a
, true);
2521 /* Compute CLAST for a Xreg. */
2522 static bool do_clast_general(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2526 if (!sve_access_check(s
)) {
2530 reg
= cpu_reg(s
, a
->rd
);
2533 tcg_gen_ext8u_i64(reg
, reg
);
2536 tcg_gen_ext16u_i64(reg
, reg
);
2539 tcg_gen_ext32u_i64(reg
, reg
);
2544 g_assert_not_reached();
2547 do_clast_scalar(s
, a
->esz
, a
->pg
, a
->rn
, before
, reg
);
2551 static bool trans_CLASTA_r(DisasContext
*s
, arg_rpr_esz
*a
)
2553 return do_clast_general(s
, a
, false);
2556 static bool trans_CLASTB_r(DisasContext
*s
, arg_rpr_esz
*a
)
2558 return do_clast_general(s
, a
, true);
2561 /* Compute LAST for a scalar. */
2562 static TCGv_i64
do_last_scalar(DisasContext
*s
, int esz
,
2563 int pg
, int rm
, bool before
)
2565 TCGv_i32 last
= tcg_temp_new_i32();
2568 find_last_active(s
, last
, esz
, pg
);
2570 wrap_last_active(s
, last
, esz
);
2572 incr_last_active(s
, last
, esz
);
2575 ret
= load_last_active(s
, last
, rm
, esz
);
2576 tcg_temp_free_i32(last
);
2580 /* Compute LAST for a Vreg. */
2581 static bool do_last_fp(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2583 if (sve_access_check(s
)) {
2584 TCGv_i64 val
= do_last_scalar(s
, a
->esz
, a
->pg
, a
->rn
, before
);
2585 write_fp_dreg(s
, a
->rd
, val
);
2586 tcg_temp_free_i64(val
);
2591 static bool trans_LASTA_v(DisasContext
*s
, arg_rpr_esz
*a
)
2593 return do_last_fp(s
, a
, false);
2596 static bool trans_LASTB_v(DisasContext
*s
, arg_rpr_esz
*a
)
2598 return do_last_fp(s
, a
, true);
2601 /* Compute LAST for a Xreg. */
2602 static bool do_last_general(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2604 if (sve_access_check(s
)) {
2605 TCGv_i64 val
= do_last_scalar(s
, a
->esz
, a
->pg
, a
->rn
, before
);
2606 tcg_gen_mov_i64(cpu_reg(s
, a
->rd
), val
);
2607 tcg_temp_free_i64(val
);
2612 static bool trans_LASTA_r(DisasContext
*s
, arg_rpr_esz
*a
)
2614 return do_last_general(s
, a
, false);
2617 static bool trans_LASTB_r(DisasContext
*s
, arg_rpr_esz
*a
)
2619 return do_last_general(s
, a
, true);
2622 static bool trans_CPY_m_r(DisasContext
*s
, arg_rpr_esz
*a
)
2624 if (sve_access_check(s
)) {
2625 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rd
, a
->pg
, cpu_reg_sp(s
, a
->rn
));
2630 static bool trans_CPY_m_v(DisasContext
*s
, arg_rpr_esz
*a
)
2632 if (sve_access_check(s
)) {
2633 int ofs
= vec_reg_offset(s
, a
->rn
, 0, a
->esz
);
2634 TCGv_i64 t
= load_esz(cpu_env
, ofs
, a
->esz
);
2635 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rd
, a
->pg
, t
);
2636 tcg_temp_free_i64(t
);
2641 static bool trans_REVB(DisasContext
*s
, arg_rpr_esz
*a
)
2643 static gen_helper_gvec_3
* const fns
[4] = {
2645 gen_helper_sve_revb_h
,
2646 gen_helper_sve_revb_s
,
2647 gen_helper_sve_revb_d
,
2649 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2652 static bool trans_REVH(DisasContext
*s
, arg_rpr_esz
*a
)
2654 static gen_helper_gvec_3
* const fns
[4] = {
2657 gen_helper_sve_revh_s
,
2658 gen_helper_sve_revh_d
,
2660 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2663 static bool trans_REVW(DisasContext
*s
, arg_rpr_esz
*a
)
2665 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_revw_d
: NULL
);
2668 static bool trans_RBIT(DisasContext
*s
, arg_rpr_esz
*a
)
2670 static gen_helper_gvec_3
* const fns
[4] = {
2671 gen_helper_sve_rbit_b
,
2672 gen_helper_sve_rbit_h
,
2673 gen_helper_sve_rbit_s
,
2674 gen_helper_sve_rbit_d
,
2676 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2679 static bool trans_SPLICE(DisasContext
*s
, arg_rprr_esz
*a
)
2681 if (sve_access_check(s
)) {
2682 gen_gvec_ool_zzzp(s
, gen_helper_sve_splice
,
2683 a
->rd
, a
->rn
, a
->rm
, a
->pg
, a
->esz
);
2689 *** SVE Integer Compare - Vectors Group
2692 static bool do_ppzz_flags(DisasContext
*s
, arg_rprr_esz
*a
,
2693 gen_helper_gvec_flags_4
*gen_fn
)
2695 TCGv_ptr pd
, zn
, zm
, pg
;
2699 if (gen_fn
== NULL
) {
2702 if (!sve_access_check(s
)) {
2706 vsz
= vec_full_reg_size(s
);
2707 t
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
2708 pd
= tcg_temp_new_ptr();
2709 zn
= tcg_temp_new_ptr();
2710 zm
= tcg_temp_new_ptr();
2711 pg
= tcg_temp_new_ptr();
2713 tcg_gen_addi_ptr(pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2714 tcg_gen_addi_ptr(zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
2715 tcg_gen_addi_ptr(zm
, cpu_env
, vec_full_reg_offset(s
, a
->rm
));
2716 tcg_gen_addi_ptr(pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2718 gen_fn(t
, pd
, zn
, zm
, pg
, t
);
2720 tcg_temp_free_ptr(pd
);
2721 tcg_temp_free_ptr(zn
);
2722 tcg_temp_free_ptr(zm
);
2723 tcg_temp_free_ptr(pg
);
2727 tcg_temp_free_i32(t
);
2731 #define DO_PPZZ(NAME, name) \
2732 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
2734 static gen_helper_gvec_flags_4 * const fns[4] = { \
2735 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2736 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2738 return do_ppzz_flags(s, a, fns[a->esz]); \
2741 DO_PPZZ(CMPEQ
, cmpeq
)
2742 DO_PPZZ(CMPNE
, cmpne
)
2743 DO_PPZZ(CMPGT
, cmpgt
)
2744 DO_PPZZ(CMPGE
, cmpge
)
2745 DO_PPZZ(CMPHI
, cmphi
)
2746 DO_PPZZ(CMPHS
, cmphs
)
2750 #define DO_PPZW(NAME, name) \
2751 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \
2753 static gen_helper_gvec_flags_4 * const fns[4] = { \
2754 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2755 gen_helper_sve_##name##_ppzw_s, NULL \
2757 return do_ppzz_flags(s, a, fns[a->esz]); \
2760 DO_PPZW(CMPEQ
, cmpeq
)
2761 DO_PPZW(CMPNE
, cmpne
)
2762 DO_PPZW(CMPGT
, cmpgt
)
2763 DO_PPZW(CMPGE
, cmpge
)
2764 DO_PPZW(CMPHI
, cmphi
)
2765 DO_PPZW(CMPHS
, cmphs
)
2766 DO_PPZW(CMPLT
, cmplt
)
2767 DO_PPZW(CMPLE
, cmple
)
2768 DO_PPZW(CMPLO
, cmplo
)
2769 DO_PPZW(CMPLS
, cmpls
)
2774 *** SVE Integer Compare - Immediate Groups
2777 static bool do_ppzi_flags(DisasContext
*s
, arg_rpri_esz
*a
,
2778 gen_helper_gvec_flags_3
*gen_fn
)
2780 TCGv_ptr pd
, zn
, pg
;
2784 if (gen_fn
== NULL
) {
2787 if (!sve_access_check(s
)) {
2791 vsz
= vec_full_reg_size(s
);
2792 t
= tcg_const_i32(simd_desc(vsz
, vsz
, a
->imm
));
2793 pd
= tcg_temp_new_ptr();
2794 zn
= tcg_temp_new_ptr();
2795 pg
= tcg_temp_new_ptr();
2797 tcg_gen_addi_ptr(pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2798 tcg_gen_addi_ptr(zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
2799 tcg_gen_addi_ptr(pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2801 gen_fn(t
, pd
, zn
, pg
, t
);
2803 tcg_temp_free_ptr(pd
);
2804 tcg_temp_free_ptr(zn
);
2805 tcg_temp_free_ptr(pg
);
2809 tcg_temp_free_i32(t
);
2813 #define DO_PPZI(NAME, name) \
2814 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \
2816 static gen_helper_gvec_flags_3 * const fns[4] = { \
2817 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2818 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2820 return do_ppzi_flags(s, a, fns[a->esz]); \
2823 DO_PPZI(CMPEQ
, cmpeq
)
2824 DO_PPZI(CMPNE
, cmpne
)
2825 DO_PPZI(CMPGT
, cmpgt
)
2826 DO_PPZI(CMPGE
, cmpge
)
2827 DO_PPZI(CMPHI
, cmphi
)
2828 DO_PPZI(CMPHS
, cmphs
)
2829 DO_PPZI(CMPLT
, cmplt
)
2830 DO_PPZI(CMPLE
, cmple
)
2831 DO_PPZI(CMPLO
, cmplo
)
2832 DO_PPZI(CMPLS
, cmpls
)
2837 *** SVE Partition Break Group
2840 static bool do_brk3(DisasContext
*s
, arg_rprr_s
*a
,
2841 gen_helper_gvec_4
*fn
, gen_helper_gvec_flags_4
*fn_s
)
2843 if (!sve_access_check(s
)) {
2847 unsigned vsz
= pred_full_reg_size(s
);
2849 /* Predicate sizes may be smaller and cannot use simd_desc. */
2850 TCGv_ptr d
= tcg_temp_new_ptr();
2851 TCGv_ptr n
= tcg_temp_new_ptr();
2852 TCGv_ptr m
= tcg_temp_new_ptr();
2853 TCGv_ptr g
= tcg_temp_new_ptr();
2854 TCGv_i32 t
= tcg_const_i32(vsz
- 2);
2856 tcg_gen_addi_ptr(d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2857 tcg_gen_addi_ptr(n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2858 tcg_gen_addi_ptr(m
, cpu_env
, pred_full_reg_offset(s
, a
->rm
));
2859 tcg_gen_addi_ptr(g
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2862 fn_s(t
, d
, n
, m
, g
, t
);
2867 tcg_temp_free_ptr(d
);
2868 tcg_temp_free_ptr(n
);
2869 tcg_temp_free_ptr(m
);
2870 tcg_temp_free_ptr(g
);
2871 tcg_temp_free_i32(t
);
2875 static bool do_brk2(DisasContext
*s
, arg_rpr_s
*a
,
2876 gen_helper_gvec_3
*fn
, gen_helper_gvec_flags_3
*fn_s
)
2878 if (!sve_access_check(s
)) {
2882 unsigned vsz
= pred_full_reg_size(s
);
2884 /* Predicate sizes may be smaller and cannot use simd_desc. */
2885 TCGv_ptr d
= tcg_temp_new_ptr();
2886 TCGv_ptr n
= tcg_temp_new_ptr();
2887 TCGv_ptr g
= tcg_temp_new_ptr();
2888 TCGv_i32 t
= tcg_const_i32(vsz
- 2);
2890 tcg_gen_addi_ptr(d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2891 tcg_gen_addi_ptr(n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2892 tcg_gen_addi_ptr(g
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2895 fn_s(t
, d
, n
, g
, t
);
2900 tcg_temp_free_ptr(d
);
2901 tcg_temp_free_ptr(n
);
2902 tcg_temp_free_ptr(g
);
2903 tcg_temp_free_i32(t
);
2907 static bool trans_BRKPA(DisasContext
*s
, arg_rprr_s
*a
)
2909 return do_brk3(s
, a
, gen_helper_sve_brkpa
, gen_helper_sve_brkpas
);
2912 static bool trans_BRKPB(DisasContext
*s
, arg_rprr_s
*a
)
2914 return do_brk3(s
, a
, gen_helper_sve_brkpb
, gen_helper_sve_brkpbs
);
2917 static bool trans_BRKA_m(DisasContext
*s
, arg_rpr_s
*a
)
2919 return do_brk2(s
, a
, gen_helper_sve_brka_m
, gen_helper_sve_brkas_m
);
2922 static bool trans_BRKB_m(DisasContext
*s
, arg_rpr_s
*a
)
2924 return do_brk2(s
, a
, gen_helper_sve_brkb_m
, gen_helper_sve_brkbs_m
);
2927 static bool trans_BRKA_z(DisasContext
*s
, arg_rpr_s
*a
)
2929 return do_brk2(s
, a
, gen_helper_sve_brka_z
, gen_helper_sve_brkas_z
);
2932 static bool trans_BRKB_z(DisasContext
*s
, arg_rpr_s
*a
)
2934 return do_brk2(s
, a
, gen_helper_sve_brkb_z
, gen_helper_sve_brkbs_z
);
2937 static bool trans_BRKN(DisasContext
*s
, arg_rpr_s
*a
)
2939 return do_brk2(s
, a
, gen_helper_sve_brkn
, gen_helper_sve_brkns
);
2943 *** SVE Predicate Count Group
2946 static void do_cntp(DisasContext
*s
, TCGv_i64 val
, int esz
, int pn
, int pg
)
2948 unsigned psz
= pred_full_reg_size(s
);
2953 tcg_gen_ld_i64(val
, cpu_env
, pred_full_reg_offset(s
, pn
));
2955 TCGv_i64 g
= tcg_temp_new_i64();
2956 tcg_gen_ld_i64(g
, cpu_env
, pred_full_reg_offset(s
, pg
));
2957 tcg_gen_and_i64(val
, val
, g
);
2958 tcg_temp_free_i64(g
);
2961 /* Reduce the pred_esz_masks value simply to reduce the
2962 * size of the code generated here.
2964 psz_mask
= MAKE_64BIT_MASK(0, psz
* 8);
2965 tcg_gen_andi_i64(val
, val
, pred_esz_masks
[esz
] & psz_mask
);
2967 tcg_gen_ctpop_i64(val
, val
);
2969 TCGv_ptr t_pn
= tcg_temp_new_ptr();
2970 TCGv_ptr t_pg
= tcg_temp_new_ptr();
2975 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, esz
);
2977 tcg_gen_addi_ptr(t_pn
, cpu_env
, pred_full_reg_offset(s
, pn
));
2978 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
2979 t_desc
= tcg_const_i32(desc
);
2981 gen_helper_sve_cntp(val
, t_pn
, t_pg
, t_desc
);
2982 tcg_temp_free_ptr(t_pn
);
2983 tcg_temp_free_ptr(t_pg
);
2984 tcg_temp_free_i32(t_desc
);
2988 static bool trans_CNTP(DisasContext
*s
, arg_CNTP
*a
)
2990 if (sve_access_check(s
)) {
2991 do_cntp(s
, cpu_reg(s
, a
->rd
), a
->esz
, a
->rn
, a
->pg
);
2996 static bool trans_INCDECP_r(DisasContext
*s
, arg_incdec_pred
*a
)
2998 if (sve_access_check(s
)) {
2999 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
3000 TCGv_i64 val
= tcg_temp_new_i64();
3002 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3004 tcg_gen_sub_i64(reg
, reg
, val
);
3006 tcg_gen_add_i64(reg
, reg
, val
);
3008 tcg_temp_free_i64(val
);
3013 static bool trans_INCDECP_z(DisasContext
*s
, arg_incdec2_pred
*a
)
3018 if (sve_access_check(s
)) {
3019 unsigned vsz
= vec_full_reg_size(s
);
3020 TCGv_i64 val
= tcg_temp_new_i64();
3021 GVecGen2sFn
*gvec_fn
= a
->d
? tcg_gen_gvec_subs
: tcg_gen_gvec_adds
;
3023 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3024 gvec_fn(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
3025 vec_full_reg_offset(s
, a
->rn
), val
, vsz
, vsz
);
3030 static bool trans_SINCDECP_r_32(DisasContext
*s
, arg_incdec_pred
*a
)
3032 if (sve_access_check(s
)) {
3033 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
3034 TCGv_i64 val
= tcg_temp_new_i64();
3036 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3037 do_sat_addsub_32(reg
, val
, a
->u
, a
->d
);
3042 static bool trans_SINCDECP_r_64(DisasContext
*s
, arg_incdec_pred
*a
)
3044 if (sve_access_check(s
)) {
3045 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
3046 TCGv_i64 val
= tcg_temp_new_i64();
3048 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3049 do_sat_addsub_64(reg
, val
, a
->u
, a
->d
);
3054 static bool trans_SINCDECP_z(DisasContext
*s
, arg_incdec2_pred
*a
)
3059 if (sve_access_check(s
)) {
3060 TCGv_i64 val
= tcg_temp_new_i64();
3061 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3062 do_sat_addsub_vec(s
, a
->esz
, a
->rd
, a
->rn
, val
, a
->u
, a
->d
);
3068 *** SVE Integer Compare Scalars Group
3071 static bool trans_CTERM(DisasContext
*s
, arg_CTERM
*a
)
3073 if (!sve_access_check(s
)) {
3077 TCGCond cond
= (a
->ne
? TCG_COND_NE
: TCG_COND_EQ
);
3078 TCGv_i64 rn
= read_cpu_reg(s
, a
->rn
, a
->sf
);
3079 TCGv_i64 rm
= read_cpu_reg(s
, a
->rm
, a
->sf
);
3080 TCGv_i64 cmp
= tcg_temp_new_i64();
3082 tcg_gen_setcond_i64(cond
, cmp
, rn
, rm
);
3083 tcg_gen_extrl_i64_i32(cpu_NF
, cmp
);
3084 tcg_temp_free_i64(cmp
);
3086 /* VF = !NF & !CF. */
3087 tcg_gen_xori_i32(cpu_VF
, cpu_NF
, 1);
3088 tcg_gen_andc_i32(cpu_VF
, cpu_VF
, cpu_CF
);
3090 /* Both NF and VF actually look at bit 31. */
3091 tcg_gen_neg_i32(cpu_NF
, cpu_NF
);
3092 tcg_gen_neg_i32(cpu_VF
, cpu_VF
);
3096 static bool trans_WHILE(DisasContext
*s
, arg_WHILE
*a
)
3098 TCGv_i64 op0
, op1
, t0
, t1
, tmax
;
3101 unsigned desc
, vsz
= vec_full_reg_size(s
);
3104 if (!sve_access_check(s
)) {
3108 op0
= read_cpu_reg(s
, a
->rn
, 1);
3109 op1
= read_cpu_reg(s
, a
->rm
, 1);
3113 tcg_gen_ext32u_i64(op0
, op0
);
3114 tcg_gen_ext32u_i64(op1
, op1
);
3116 tcg_gen_ext32s_i64(op0
, op0
);
3117 tcg_gen_ext32s_i64(op1
, op1
);
3121 /* For the helper, compress the different conditions into a computation
3122 * of how many iterations for which the condition is true.
3124 t0
= tcg_temp_new_i64();
3125 t1
= tcg_temp_new_i64();
3126 tcg_gen_sub_i64(t0
, op1
, op0
);
3128 tmax
= tcg_const_i64(vsz
>> a
->esz
);
3130 /* Equality means one more iteration. */
3131 tcg_gen_addi_i64(t0
, t0
, 1);
3133 /* If op1 is max (un)signed integer (and the only time the addition
3134 * above could overflow), then we produce an all-true predicate by
3135 * setting the count to the vector length. This is because the
3136 * pseudocode is described as an increment + compare loop, and the
3137 * max integer would always compare true.
3139 tcg_gen_movi_i64(t1
, (a
->sf
3140 ? (a
->u
? UINT64_MAX
: INT64_MAX
)
3141 : (a
->u
? UINT32_MAX
: INT32_MAX
)));
3142 tcg_gen_movcond_i64(TCG_COND_EQ
, t0
, op1
, t1
, tmax
, t0
);
3145 /* Bound to the maximum. */
3146 tcg_gen_umin_i64(t0
, t0
, tmax
);
3147 tcg_temp_free_i64(tmax
);
3149 /* Set the count to zero if the condition is false. */
3151 ? (a
->eq
? TCG_COND_LEU
: TCG_COND_LTU
)
3152 : (a
->eq
? TCG_COND_LE
: TCG_COND_LT
));
3153 tcg_gen_movi_i64(t1
, 0);
3154 tcg_gen_movcond_i64(cond
, t0
, op0
, op1
, t0
, t1
);
3155 tcg_temp_free_i64(t1
);
3157 /* Since we're bounded, pass as a 32-bit type. */
3158 t2
= tcg_temp_new_i32();
3159 tcg_gen_extrl_i64_i32(t2
, t0
);
3160 tcg_temp_free_i64(t0
);
3162 /* Scale elements to bits. */
3163 tcg_gen_shli_i32(t2
, t2
, a
->esz
);
3165 desc
= (vsz
/ 8) - 2;
3166 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
3167 t3
= tcg_const_i32(desc
);
3169 ptr
= tcg_temp_new_ptr();
3170 tcg_gen_addi_ptr(ptr
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
3172 gen_helper_sve_while(t2
, ptr
, t2
, t3
);
3175 tcg_temp_free_ptr(ptr
);
3176 tcg_temp_free_i32(t2
);
3177 tcg_temp_free_i32(t3
);
3182 *** SVE Integer Wide Immediate - Unpredicated Group
3185 static bool trans_FDUP(DisasContext
*s
, arg_FDUP
*a
)
3190 if (sve_access_check(s
)) {
3191 unsigned vsz
= vec_full_reg_size(s
);
3192 int dofs
= vec_full_reg_offset(s
, a
->rd
);
3195 /* Decode the VFP immediate. */
3196 imm
= vfp_expand_imm(a
->esz
, a
->imm
);
3197 tcg_gen_gvec_dup_imm(a
->esz
, dofs
, vsz
, vsz
, imm
);
3202 static bool trans_DUP_i(DisasContext
*s
, arg_DUP_i
*a
)
3204 if (a
->esz
== 0 && extract32(s
->insn
, 13, 1)) {
3207 if (sve_access_check(s
)) {
3208 unsigned vsz
= vec_full_reg_size(s
);
3209 int dofs
= vec_full_reg_offset(s
, a
->rd
);
3211 tcg_gen_gvec_dup_imm(a
->esz
, dofs
, vsz
, vsz
, a
->imm
);
3216 static bool trans_ADD_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3218 if (a
->esz
== 0 && extract32(s
->insn
, 13, 1)) {
3221 if (sve_access_check(s
)) {
3222 unsigned vsz
= vec_full_reg_size(s
);
3223 tcg_gen_gvec_addi(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
3224 vec_full_reg_offset(s
, a
->rn
), a
->imm
, vsz
, vsz
);
3229 static bool trans_SUB_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3232 return trans_ADD_zzi(s
, a
);
3235 static bool trans_SUBR_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3237 static const TCGOpcode vecop_list
[] = { INDEX_op_sub_vec
, 0 };
3238 static const GVecGen2s op
[4] = {
3239 { .fni8
= tcg_gen_vec_sub8_i64
,
3240 .fniv
= tcg_gen_sub_vec
,
3241 .fno
= gen_helper_sve_subri_b
,
3242 .opt_opc
= vecop_list
,
3244 .scalar_first
= true },
3245 { .fni8
= tcg_gen_vec_sub16_i64
,
3246 .fniv
= tcg_gen_sub_vec
,
3247 .fno
= gen_helper_sve_subri_h
,
3248 .opt_opc
= vecop_list
,
3250 .scalar_first
= true },
3251 { .fni4
= tcg_gen_sub_i32
,
3252 .fniv
= tcg_gen_sub_vec
,
3253 .fno
= gen_helper_sve_subri_s
,
3254 .opt_opc
= vecop_list
,
3256 .scalar_first
= true },
3257 { .fni8
= tcg_gen_sub_i64
,
3258 .fniv
= tcg_gen_sub_vec
,
3259 .fno
= gen_helper_sve_subri_d
,
3260 .opt_opc
= vecop_list
,
3261 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
3263 .scalar_first
= true }
3266 if (a
->esz
== 0 && extract32(s
->insn
, 13, 1)) {
3269 if (sve_access_check(s
)) {
3270 unsigned vsz
= vec_full_reg_size(s
);
3271 TCGv_i64 c
= tcg_const_i64(a
->imm
);
3272 tcg_gen_gvec_2s(vec_full_reg_offset(s
, a
->rd
),
3273 vec_full_reg_offset(s
, a
->rn
),
3274 vsz
, vsz
, c
, &op
[a
->esz
]);
3275 tcg_temp_free_i64(c
);
3280 static bool trans_MUL_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3282 if (sve_access_check(s
)) {
3283 unsigned vsz
= vec_full_reg_size(s
);
3284 tcg_gen_gvec_muli(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
3285 vec_full_reg_offset(s
, a
->rn
), a
->imm
, vsz
, vsz
);
3290 static bool do_zzi_sat(DisasContext
*s
, arg_rri_esz
*a
, bool u
, bool d
)
3292 if (a
->esz
== 0 && extract32(s
->insn
, 13, 1)) {
3295 if (sve_access_check(s
)) {
3296 TCGv_i64 val
= tcg_const_i64(a
->imm
);
3297 do_sat_addsub_vec(s
, a
->esz
, a
->rd
, a
->rn
, val
, u
, d
);
3298 tcg_temp_free_i64(val
);
3303 static bool trans_SQADD_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3305 return do_zzi_sat(s
, a
, false, false);
3308 static bool trans_UQADD_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3310 return do_zzi_sat(s
, a
, true, false);
3313 static bool trans_SQSUB_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3315 return do_zzi_sat(s
, a
, false, true);
3318 static bool trans_UQSUB_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3320 return do_zzi_sat(s
, a
, true, true);
3323 static bool do_zzi_ool(DisasContext
*s
, arg_rri_esz
*a
, gen_helper_gvec_2i
*fn
)
3325 if (sve_access_check(s
)) {
3326 unsigned vsz
= vec_full_reg_size(s
);
3327 TCGv_i64 c
= tcg_const_i64(a
->imm
);
3329 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s
, a
->rd
),
3330 vec_full_reg_offset(s
, a
->rn
),
3331 c
, vsz
, vsz
, 0, fn
);
3332 tcg_temp_free_i64(c
);
3337 #define DO_ZZI(NAME, name) \
3338 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a) \
3340 static gen_helper_gvec_2i * const fns[4] = { \
3341 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3342 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3344 return do_zzi_ool(s, a, fns[a->esz]); \
3354 static bool trans_DOT_zzz(DisasContext
*s
, arg_DOT_zzz
*a
)
3356 static gen_helper_gvec_3
* const fns
[2][2] = {
3357 { gen_helper_gvec_sdot_b
, gen_helper_gvec_sdot_h
},
3358 { gen_helper_gvec_udot_b
, gen_helper_gvec_udot_h
}
3361 if (sve_access_check(s
)) {
3362 gen_gvec_ool_zzz(s
, fns
[a
->u
][a
->sz
], a
->rd
, a
->rn
, a
->rm
, 0);
3367 static bool trans_DOT_zzx(DisasContext
*s
, arg_DOT_zzx
*a
)
3369 static gen_helper_gvec_3
* const fns
[2][2] = {
3370 { gen_helper_gvec_sdot_idx_b
, gen_helper_gvec_sdot_idx_h
},
3371 { gen_helper_gvec_udot_idx_b
, gen_helper_gvec_udot_idx_h
}
3374 if (sve_access_check(s
)) {
3375 gen_gvec_ool_zzz(s
, fns
[a
->u
][a
->sz
], a
->rd
, a
->rn
, a
->rm
, a
->index
);
3382 *** SVE Floating Point Multiply-Add Indexed Group
3385 static bool trans_FMLA_zzxz(DisasContext
*s
, arg_FMLA_zzxz
*a
)
3387 static gen_helper_gvec_4_ptr
* const fns
[3] = {
3388 gen_helper_gvec_fmla_idx_h
,
3389 gen_helper_gvec_fmla_idx_s
,
3390 gen_helper_gvec_fmla_idx_d
,
3393 if (sve_access_check(s
)) {
3394 unsigned vsz
= vec_full_reg_size(s
);
3395 TCGv_ptr status
= fpstatus_ptr(a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
3396 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s
, a
->rd
),
3397 vec_full_reg_offset(s
, a
->rn
),
3398 vec_full_reg_offset(s
, a
->rm
),
3399 vec_full_reg_offset(s
, a
->ra
),
3400 status
, vsz
, vsz
, (a
->index
<< 1) | a
->sub
,
3402 tcg_temp_free_ptr(status
);
3408 *** SVE Floating Point Multiply Indexed Group
3411 static bool trans_FMUL_zzx(DisasContext
*s
, arg_FMUL_zzx
*a
)
3413 static gen_helper_gvec_3_ptr
* const fns
[3] = {
3414 gen_helper_gvec_fmul_idx_h
,
3415 gen_helper_gvec_fmul_idx_s
,
3416 gen_helper_gvec_fmul_idx_d
,
3419 if (sve_access_check(s
)) {
3420 unsigned vsz
= vec_full_reg_size(s
);
3421 TCGv_ptr status
= fpstatus_ptr(a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
3422 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, a
->rd
),
3423 vec_full_reg_offset(s
, a
->rn
),
3424 vec_full_reg_offset(s
, a
->rm
),
3425 status
, vsz
, vsz
, a
->index
, fns
[a
->esz
- 1]);
3426 tcg_temp_free_ptr(status
);
3432 *** SVE Floating Point Fast Reduction Group
3435 typedef void gen_helper_fp_reduce(TCGv_i64
, TCGv_ptr
, TCGv_ptr
,
3436 TCGv_ptr
, TCGv_i32
);
3438 static void do_reduce(DisasContext
*s
, arg_rpr_esz
*a
,
3439 gen_helper_fp_reduce
*fn
)
3441 unsigned vsz
= vec_full_reg_size(s
);
3442 unsigned p2vsz
= pow2ceil(vsz
);
3443 TCGv_i32 t_desc
= tcg_const_i32(simd_desc(vsz
, p2vsz
, 0));
3444 TCGv_ptr t_zn
, t_pg
, status
;
3447 temp
= tcg_temp_new_i64();
3448 t_zn
= tcg_temp_new_ptr();
3449 t_pg
= tcg_temp_new_ptr();
3451 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
3452 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
3453 status
= fpstatus_ptr(a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
3455 fn(temp
, t_zn
, t_pg
, status
, t_desc
);
3456 tcg_temp_free_ptr(t_zn
);
3457 tcg_temp_free_ptr(t_pg
);
3458 tcg_temp_free_ptr(status
);
3459 tcg_temp_free_i32(t_desc
);
3461 write_fp_dreg(s
, a
->rd
, temp
);
3462 tcg_temp_free_i64(temp
);
3465 #define DO_VPZ(NAME, name) \
3466 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
3468 static gen_helper_fp_reduce * const fns[3] = { \
3469 gen_helper_sve_##name##_h, \
3470 gen_helper_sve_##name##_s, \
3471 gen_helper_sve_##name##_d, \
3473 if (a->esz == 0) { \
3476 if (sve_access_check(s)) { \
3477 do_reduce(s, a, fns[a->esz - 1]); \
3482 DO_VPZ(FADDV
, faddv
)
3483 DO_VPZ(FMINNMV
, fminnmv
)
3484 DO_VPZ(FMAXNMV
, fmaxnmv
)
3485 DO_VPZ(FMINV
, fminv
)
3486 DO_VPZ(FMAXV
, fmaxv
)
3489 *** SVE Floating Point Unary Operations - Unpredicated Group
3492 static void do_zz_fp(DisasContext
*s
, arg_rr_esz
*a
, gen_helper_gvec_2_ptr
*fn
)
3494 unsigned vsz
= vec_full_reg_size(s
);
3495 TCGv_ptr status
= fpstatus_ptr(a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
3497 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s
, a
->rd
),
3498 vec_full_reg_offset(s
, a
->rn
),
3499 status
, vsz
, vsz
, 0, fn
);
3500 tcg_temp_free_ptr(status
);
3503 static bool trans_FRECPE(DisasContext
*s
, arg_rr_esz
*a
)
3505 static gen_helper_gvec_2_ptr
* const fns
[3] = {
3506 gen_helper_gvec_frecpe_h
,
3507 gen_helper_gvec_frecpe_s
,
3508 gen_helper_gvec_frecpe_d
,
3513 if (sve_access_check(s
)) {
3514 do_zz_fp(s
, a
, fns
[a
->esz
- 1]);
3519 static bool trans_FRSQRTE(DisasContext
*s
, arg_rr_esz
*a
)
3521 static gen_helper_gvec_2_ptr
* const fns
[3] = {
3522 gen_helper_gvec_frsqrte_h
,
3523 gen_helper_gvec_frsqrte_s
,
3524 gen_helper_gvec_frsqrte_d
,
3529 if (sve_access_check(s
)) {
3530 do_zz_fp(s
, a
, fns
[a
->esz
- 1]);
3536 *** SVE Floating Point Compare with Zero Group
3539 static void do_ppz_fp(DisasContext
*s
, arg_rpr_esz
*a
,
3540 gen_helper_gvec_3_ptr
*fn
)
3542 unsigned vsz
= vec_full_reg_size(s
);
3543 TCGv_ptr status
= fpstatus_ptr(a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
3545 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s
, a
->rd
),
3546 vec_full_reg_offset(s
, a
->rn
),
3547 pred_full_reg_offset(s
, a
->pg
),
3548 status
, vsz
, vsz
, 0, fn
);
3549 tcg_temp_free_ptr(status
);
3552 #define DO_PPZ(NAME, name) \
3553 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
3555 static gen_helper_gvec_3_ptr * const fns[3] = { \
3556 gen_helper_sve_##name##_h, \
3557 gen_helper_sve_##name##_s, \
3558 gen_helper_sve_##name##_d, \
3560 if (a->esz == 0) { \
3563 if (sve_access_check(s)) { \
3564 do_ppz_fp(s, a, fns[a->esz - 1]); \
3569 DO_PPZ(FCMGE_ppz0
, fcmge0
)
3570 DO_PPZ(FCMGT_ppz0
, fcmgt0
)
3571 DO_PPZ(FCMLE_ppz0
, fcmle0
)
3572 DO_PPZ(FCMLT_ppz0
, fcmlt0
)
3573 DO_PPZ(FCMEQ_ppz0
, fcmeq0
)
3574 DO_PPZ(FCMNE_ppz0
, fcmne0
)
3579 *** SVE floating-point trig multiply-add coefficient
3582 static bool trans_FTMAD(DisasContext
*s
, arg_FTMAD
*a
)
3584 static gen_helper_gvec_3_ptr
* const fns
[3] = {
3585 gen_helper_sve_ftmad_h
,
3586 gen_helper_sve_ftmad_s
,
3587 gen_helper_sve_ftmad_d
,
3593 if (sve_access_check(s
)) {
3594 unsigned vsz
= vec_full_reg_size(s
);
3595 TCGv_ptr status
= fpstatus_ptr(a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
3596 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, a
->rd
),
3597 vec_full_reg_offset(s
, a
->rn
),
3598 vec_full_reg_offset(s
, a
->rm
),
3599 status
, vsz
, vsz
, a
->imm
, fns
[a
->esz
- 1]);
3600 tcg_temp_free_ptr(status
);
3606 *** SVE Floating Point Accumulating Reduction Group
3609 static bool trans_FADDA(DisasContext
*s
, arg_rprr_esz
*a
)
3611 typedef void fadda_fn(TCGv_i64
, TCGv_i64
, TCGv_ptr
,
3612 TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
3613 static fadda_fn
* const fns
[3] = {
3614 gen_helper_sve_fadda_h
,
3615 gen_helper_sve_fadda_s
,
3616 gen_helper_sve_fadda_d
,
3618 unsigned vsz
= vec_full_reg_size(s
);
3619 TCGv_ptr t_rm
, t_pg
, t_fpst
;
3626 if (!sve_access_check(s
)) {
3630 t_val
= load_esz(cpu_env
, vec_reg_offset(s
, a
->rn
, 0, a
->esz
), a
->esz
);
3631 t_rm
= tcg_temp_new_ptr();
3632 t_pg
= tcg_temp_new_ptr();
3633 tcg_gen_addi_ptr(t_rm
, cpu_env
, vec_full_reg_offset(s
, a
->rm
));
3634 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
3635 t_fpst
= fpstatus_ptr(a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
3636 t_desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
3638 fns
[a
->esz
- 1](t_val
, t_val
, t_rm
, t_pg
, t_fpst
, t_desc
);
3640 tcg_temp_free_i32(t_desc
);
3641 tcg_temp_free_ptr(t_fpst
);
3642 tcg_temp_free_ptr(t_pg
);
3643 tcg_temp_free_ptr(t_rm
);
3645 write_fp_dreg(s
, a
->rd
, t_val
);
3646 tcg_temp_free_i64(t_val
);
3651 *** SVE Floating Point Arithmetic - Unpredicated Group
3654 static bool do_zzz_fp(DisasContext
*s
, arg_rrr_esz
*a
,
3655 gen_helper_gvec_3_ptr
*fn
)
3660 if (sve_access_check(s
)) {
3661 unsigned vsz
= vec_full_reg_size(s
);
3662 TCGv_ptr status
= fpstatus_ptr(a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
3663 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, a
->rd
),
3664 vec_full_reg_offset(s
, a
->rn
),
3665 vec_full_reg_offset(s
, a
->rm
),
3666 status
, vsz
, vsz
, 0, fn
);
3667 tcg_temp_free_ptr(status
);
3673 #define DO_FP3(NAME, name) \
3674 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
3676 static gen_helper_gvec_3_ptr * const fns[4] = { \
3677 NULL, gen_helper_gvec_##name##_h, \
3678 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3680 return do_zzz_fp(s, a, fns[a->esz]); \
3683 DO_FP3(FADD_zzz
, fadd
)
3684 DO_FP3(FSUB_zzz
, fsub
)
3685 DO_FP3(FMUL_zzz
, fmul
)
3686 DO_FP3(FTSMUL
, ftsmul
)
3687 DO_FP3(FRECPS
, recps
)
3688 DO_FP3(FRSQRTS
, rsqrts
)
3693 *** SVE Floating Point Arithmetic - Predicated Group
3696 static bool do_zpzz_fp(DisasContext
*s
, arg_rprr_esz
*a
,
3697 gen_helper_gvec_4_ptr
*fn
)
3702 if (sve_access_check(s
)) {
3703 unsigned vsz
= vec_full_reg_size(s
);
3704 TCGv_ptr status
= fpstatus_ptr(a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
3705 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s
, a
->rd
),
3706 vec_full_reg_offset(s
, a
->rn
),
3707 vec_full_reg_offset(s
, a
->rm
),
3708 pred_full_reg_offset(s
, a
->pg
),
3709 status
, vsz
, vsz
, 0, fn
);
3710 tcg_temp_free_ptr(status
);
3715 #define DO_FP3(NAME, name) \
3716 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
3718 static gen_helper_gvec_4_ptr * const fns[4] = { \
3719 NULL, gen_helper_sve_##name##_h, \
3720 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3722 return do_zpzz_fp(s, a, fns[a->esz]); \
3725 DO_FP3(FADD_zpzz
, fadd
)
3726 DO_FP3(FSUB_zpzz
, fsub
)
3727 DO_FP3(FMUL_zpzz
, fmul
)
3728 DO_FP3(FMIN_zpzz
, fmin
)
3729 DO_FP3(FMAX_zpzz
, fmax
)
3730 DO_FP3(FMINNM_zpzz
, fminnum
)
3731 DO_FP3(FMAXNM_zpzz
, fmaxnum
)
3733 DO_FP3(FSCALE
, fscalbn
)
3735 DO_FP3(FMULX
, fmulx
)
3739 typedef void gen_helper_sve_fp2scalar(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
,
3740 TCGv_i64
, TCGv_ptr
, TCGv_i32
);
3742 static void do_fp_scalar(DisasContext
*s
, int zd
, int zn
, int pg
, bool is_fp16
,
3743 TCGv_i64 scalar
, gen_helper_sve_fp2scalar
*fn
)
3745 unsigned vsz
= vec_full_reg_size(s
);
3746 TCGv_ptr t_zd
, t_zn
, t_pg
, status
;
3749 t_zd
= tcg_temp_new_ptr();
3750 t_zn
= tcg_temp_new_ptr();
3751 t_pg
= tcg_temp_new_ptr();
3752 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, zd
));
3753 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, zn
));
3754 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
3756 status
= fpstatus_ptr(is_fp16
? FPST_FPCR_F16
: FPST_FPCR
);
3757 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
3758 fn(t_zd
, t_zn
, t_pg
, scalar
, status
, desc
);
3760 tcg_temp_free_i32(desc
);
3761 tcg_temp_free_ptr(status
);
3762 tcg_temp_free_ptr(t_pg
);
3763 tcg_temp_free_ptr(t_zn
);
3764 tcg_temp_free_ptr(t_zd
);
3767 static void do_fp_imm(DisasContext
*s
, arg_rpri_esz
*a
, uint64_t imm
,
3768 gen_helper_sve_fp2scalar
*fn
)
3770 TCGv_i64 temp
= tcg_const_i64(imm
);
3771 do_fp_scalar(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
== MO_16
, temp
, fn
);
3772 tcg_temp_free_i64(temp
);
3775 #define DO_FP_IMM(NAME, name, const0, const1) \
3776 static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
3778 static gen_helper_sve_fp2scalar * const fns[3] = { \
3779 gen_helper_sve_##name##_h, \
3780 gen_helper_sve_##name##_s, \
3781 gen_helper_sve_##name##_d \
3783 static uint64_t const val[3][2] = { \
3784 { float16_##const0, float16_##const1 }, \
3785 { float32_##const0, float32_##const1 }, \
3786 { float64_##const0, float64_##const1 }, \
3788 if (a->esz == 0) { \
3791 if (sve_access_check(s)) { \
3792 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
3797 DO_FP_IMM(FADD
, fadds
, half
, one
)
3798 DO_FP_IMM(FSUB
, fsubs
, half
, one
)
3799 DO_FP_IMM(FMUL
, fmuls
, half
, two
)
3800 DO_FP_IMM(FSUBR
, fsubrs
, half
, one
)
3801 DO_FP_IMM(FMAXNM
, fmaxnms
, zero
, one
)
3802 DO_FP_IMM(FMINNM
, fminnms
, zero
, one
)
3803 DO_FP_IMM(FMAX
, fmaxs
, zero
, one
)
3804 DO_FP_IMM(FMIN
, fmins
, zero
, one
)
3808 static bool do_fp_cmp(DisasContext
*s
, arg_rprr_esz
*a
,
3809 gen_helper_gvec_4_ptr
*fn
)
3814 if (sve_access_check(s
)) {
3815 unsigned vsz
= vec_full_reg_size(s
);
3816 TCGv_ptr status
= fpstatus_ptr(a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
3817 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s
, a
->rd
),
3818 vec_full_reg_offset(s
, a
->rn
),
3819 vec_full_reg_offset(s
, a
->rm
),
3820 pred_full_reg_offset(s
, a
->pg
),
3821 status
, vsz
, vsz
, 0, fn
);
3822 tcg_temp_free_ptr(status
);
3827 #define DO_FPCMP(NAME, name) \
3828 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
3830 static gen_helper_gvec_4_ptr * const fns[4] = { \
3831 NULL, gen_helper_sve_##name##_h, \
3832 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3834 return do_fp_cmp(s, a, fns[a->esz]); \
3837 DO_FPCMP(FCMGE
, fcmge
)
3838 DO_FPCMP(FCMGT
, fcmgt
)
3839 DO_FPCMP(FCMEQ
, fcmeq
)
3840 DO_FPCMP(FCMNE
, fcmne
)
3841 DO_FPCMP(FCMUO
, fcmuo
)
3842 DO_FPCMP(FACGE
, facge
)
3843 DO_FPCMP(FACGT
, facgt
)
3847 static bool trans_FCADD(DisasContext
*s
, arg_FCADD
*a
)
3849 static gen_helper_gvec_4_ptr
* const fns
[3] = {
3850 gen_helper_sve_fcadd_h
,
3851 gen_helper_sve_fcadd_s
,
3852 gen_helper_sve_fcadd_d
3858 if (sve_access_check(s
)) {
3859 unsigned vsz
= vec_full_reg_size(s
);
3860 TCGv_ptr status
= fpstatus_ptr(a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
3861 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s
, a
->rd
),
3862 vec_full_reg_offset(s
, a
->rn
),
3863 vec_full_reg_offset(s
, a
->rm
),
3864 pred_full_reg_offset(s
, a
->pg
),
3865 status
, vsz
, vsz
, a
->rot
, fns
[a
->esz
- 1]);
3866 tcg_temp_free_ptr(status
);
3871 static bool do_fmla(DisasContext
*s
, arg_rprrr_esz
*a
,
3872 gen_helper_gvec_5_ptr
*fn
)
3877 if (sve_access_check(s
)) {
3878 unsigned vsz
= vec_full_reg_size(s
);
3879 TCGv_ptr status
= fpstatus_ptr(a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
3880 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s
, a
->rd
),
3881 vec_full_reg_offset(s
, a
->rn
),
3882 vec_full_reg_offset(s
, a
->rm
),
3883 vec_full_reg_offset(s
, a
->ra
),
3884 pred_full_reg_offset(s
, a
->pg
),
3885 status
, vsz
, vsz
, 0, fn
);
3886 tcg_temp_free_ptr(status
);
3891 #define DO_FMLA(NAME, name) \
3892 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
3894 static gen_helper_gvec_5_ptr * const fns[4] = { \
3895 NULL, gen_helper_sve_##name##_h, \
3896 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3898 return do_fmla(s, a, fns[a->esz]); \
3901 DO_FMLA(FMLA_zpzzz
, fmla_zpzzz
)
3902 DO_FMLA(FMLS_zpzzz
, fmls_zpzzz
)
3903 DO_FMLA(FNMLA_zpzzz
, fnmla_zpzzz
)
3904 DO_FMLA(FNMLS_zpzzz
, fnmls_zpzzz
)
3908 static bool trans_FCMLA_zpzzz(DisasContext
*s
, arg_FCMLA_zpzzz
*a
)
3910 static gen_helper_gvec_5_ptr
* const fns
[4] = {
3912 gen_helper_sve_fcmla_zpzzz_h
,
3913 gen_helper_sve_fcmla_zpzzz_s
,
3914 gen_helper_sve_fcmla_zpzzz_d
,
3920 if (sve_access_check(s
)) {
3921 unsigned vsz
= vec_full_reg_size(s
);
3922 TCGv_ptr status
= fpstatus_ptr(a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
3923 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s
, a
->rd
),
3924 vec_full_reg_offset(s
, a
->rn
),
3925 vec_full_reg_offset(s
, a
->rm
),
3926 vec_full_reg_offset(s
, a
->ra
),
3927 pred_full_reg_offset(s
, a
->pg
),
3928 status
, vsz
, vsz
, a
->rot
, fns
[a
->esz
]);
3929 tcg_temp_free_ptr(status
);
3934 static bool trans_FCMLA_zzxz(DisasContext
*s
, arg_FCMLA_zzxz
*a
)
3936 static gen_helper_gvec_3_ptr
* const fns
[2] = {
3937 gen_helper_gvec_fcmlah_idx
,
3938 gen_helper_gvec_fcmlas_idx
,
3941 tcg_debug_assert(a
->esz
== 1 || a
->esz
== 2);
3942 tcg_debug_assert(a
->rd
== a
->ra
);
3943 if (sve_access_check(s
)) {
3944 unsigned vsz
= vec_full_reg_size(s
);
3945 TCGv_ptr status
= fpstatus_ptr(a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
3946 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, a
->rd
),
3947 vec_full_reg_offset(s
, a
->rn
),
3948 vec_full_reg_offset(s
, a
->rm
),
3950 a
->index
* 4 + a
->rot
,
3952 tcg_temp_free_ptr(status
);
3958 *** SVE Floating Point Unary Operations Predicated Group
3961 static bool do_zpz_ptr(DisasContext
*s
, int rd
, int rn
, int pg
,
3962 bool is_fp16
, gen_helper_gvec_3_ptr
*fn
)
3964 if (sve_access_check(s
)) {
3965 unsigned vsz
= vec_full_reg_size(s
);
3966 TCGv_ptr status
= fpstatus_ptr(is_fp16
? FPST_FPCR_F16
: FPST_FPCR
);
3967 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, rd
),
3968 vec_full_reg_offset(s
, rn
),
3969 pred_full_reg_offset(s
, pg
),
3970 status
, vsz
, vsz
, 0, fn
);
3971 tcg_temp_free_ptr(status
);
3976 static bool trans_FCVT_sh(DisasContext
*s
, arg_rpr_esz
*a
)
3978 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_sh
);
3981 static bool trans_FCVT_hs(DisasContext
*s
, arg_rpr_esz
*a
)
3983 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_hs
);
3986 static bool trans_FCVT_dh(DisasContext
*s
, arg_rpr_esz
*a
)
3988 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_dh
);
3991 static bool trans_FCVT_hd(DisasContext
*s
, arg_rpr_esz
*a
)
3993 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_hd
);
3996 static bool trans_FCVT_ds(DisasContext
*s
, arg_rpr_esz
*a
)
3998 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_ds
);
4001 static bool trans_FCVT_sd(DisasContext
*s
, arg_rpr_esz
*a
)
4003 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_sd
);
4006 static bool trans_FCVTZS_hh(DisasContext
*s
, arg_rpr_esz
*a
)
4008 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzs_hh
);
4011 static bool trans_FCVTZU_hh(DisasContext
*s
, arg_rpr_esz
*a
)
4013 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzu_hh
);
4016 static bool trans_FCVTZS_hs(DisasContext
*s
, arg_rpr_esz
*a
)
4018 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzs_hs
);
4021 static bool trans_FCVTZU_hs(DisasContext
*s
, arg_rpr_esz
*a
)
4023 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzu_hs
);
4026 static bool trans_FCVTZS_hd(DisasContext
*s
, arg_rpr_esz
*a
)
4028 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzs_hd
);
4031 static bool trans_FCVTZU_hd(DisasContext
*s
, arg_rpr_esz
*a
)
4033 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzu_hd
);
4036 static bool trans_FCVTZS_ss(DisasContext
*s
, arg_rpr_esz
*a
)
4038 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzs_ss
);
4041 static bool trans_FCVTZU_ss(DisasContext
*s
, arg_rpr_esz
*a
)
4043 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzu_ss
);
4046 static bool trans_FCVTZS_sd(DisasContext
*s
, arg_rpr_esz
*a
)
4048 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzs_sd
);
4051 static bool trans_FCVTZU_sd(DisasContext
*s
, arg_rpr_esz
*a
)
4053 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzu_sd
);
4056 static bool trans_FCVTZS_ds(DisasContext
*s
, arg_rpr_esz
*a
)
4058 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzs_ds
);
4061 static bool trans_FCVTZU_ds(DisasContext
*s
, arg_rpr_esz
*a
)
4063 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzu_ds
);
4066 static bool trans_FCVTZS_dd(DisasContext
*s
, arg_rpr_esz
*a
)
4068 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzs_dd
);
4071 static bool trans_FCVTZU_dd(DisasContext
*s
, arg_rpr_esz
*a
)
4073 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzu_dd
);
4076 static gen_helper_gvec_3_ptr
* const frint_fns
[3] = {
4077 gen_helper_sve_frint_h
,
4078 gen_helper_sve_frint_s
,
4079 gen_helper_sve_frint_d
4082 static bool trans_FRINTI(DisasContext
*s
, arg_rpr_esz
*a
)
4087 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
== MO_16
,
4088 frint_fns
[a
->esz
- 1]);
4091 static bool trans_FRINTX(DisasContext
*s
, arg_rpr_esz
*a
)
4093 static gen_helper_gvec_3_ptr
* const fns
[3] = {
4094 gen_helper_sve_frintx_h
,
4095 gen_helper_sve_frintx_s
,
4096 gen_helper_sve_frintx_d
4101 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
== MO_16
, fns
[a
->esz
- 1]);
4104 static bool do_frint_mode(DisasContext
*s
, arg_rpr_esz
*a
, int mode
)
4109 if (sve_access_check(s
)) {
4110 unsigned vsz
= vec_full_reg_size(s
);
4111 TCGv_i32 tmode
= tcg_const_i32(mode
);
4112 TCGv_ptr status
= fpstatus_ptr(a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
4114 gen_helper_set_rmode(tmode
, tmode
, status
);
4116 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, a
->rd
),
4117 vec_full_reg_offset(s
, a
->rn
),
4118 pred_full_reg_offset(s
, a
->pg
),
4119 status
, vsz
, vsz
, 0, frint_fns
[a
->esz
- 1]);
4121 gen_helper_set_rmode(tmode
, tmode
, status
);
4122 tcg_temp_free_i32(tmode
);
4123 tcg_temp_free_ptr(status
);
4128 static bool trans_FRINTN(DisasContext
*s
, arg_rpr_esz
*a
)
4130 return do_frint_mode(s
, a
, float_round_nearest_even
);
4133 static bool trans_FRINTP(DisasContext
*s
, arg_rpr_esz
*a
)
4135 return do_frint_mode(s
, a
, float_round_up
);
4138 static bool trans_FRINTM(DisasContext
*s
, arg_rpr_esz
*a
)
4140 return do_frint_mode(s
, a
, float_round_down
);
4143 static bool trans_FRINTZ(DisasContext
*s
, arg_rpr_esz
*a
)
4145 return do_frint_mode(s
, a
, float_round_to_zero
);
4148 static bool trans_FRINTA(DisasContext
*s
, arg_rpr_esz
*a
)
4150 return do_frint_mode(s
, a
, float_round_ties_away
);
4153 static bool trans_FRECPX(DisasContext
*s
, arg_rpr_esz
*a
)
4155 static gen_helper_gvec_3_ptr
* const fns
[3] = {
4156 gen_helper_sve_frecpx_h
,
4157 gen_helper_sve_frecpx_s
,
4158 gen_helper_sve_frecpx_d
4163 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
== MO_16
, fns
[a
->esz
- 1]);
4166 static bool trans_FSQRT(DisasContext
*s
, arg_rpr_esz
*a
)
4168 static gen_helper_gvec_3_ptr
* const fns
[3] = {
4169 gen_helper_sve_fsqrt_h
,
4170 gen_helper_sve_fsqrt_s
,
4171 gen_helper_sve_fsqrt_d
4176 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
== MO_16
, fns
[a
->esz
- 1]);
4179 static bool trans_SCVTF_hh(DisasContext
*s
, arg_rpr_esz
*a
)
4181 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_scvt_hh
);
4184 static bool trans_SCVTF_sh(DisasContext
*s
, arg_rpr_esz
*a
)
4186 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_scvt_sh
);
4189 static bool trans_SCVTF_dh(DisasContext
*s
, arg_rpr_esz
*a
)
4191 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_scvt_dh
);
4194 static bool trans_SCVTF_ss(DisasContext
*s
, arg_rpr_esz
*a
)
4196 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_scvt_ss
);
4199 static bool trans_SCVTF_ds(DisasContext
*s
, arg_rpr_esz
*a
)
4201 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_scvt_ds
);
4204 static bool trans_SCVTF_sd(DisasContext
*s
, arg_rpr_esz
*a
)
4206 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_scvt_sd
);
4209 static bool trans_SCVTF_dd(DisasContext
*s
, arg_rpr_esz
*a
)
4211 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_scvt_dd
);
4214 static bool trans_UCVTF_hh(DisasContext
*s
, arg_rpr_esz
*a
)
4216 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_ucvt_hh
);
4219 static bool trans_UCVTF_sh(DisasContext
*s
, arg_rpr_esz
*a
)
4221 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_ucvt_sh
);
4224 static bool trans_UCVTF_dh(DisasContext
*s
, arg_rpr_esz
*a
)
4226 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_ucvt_dh
);
4229 static bool trans_UCVTF_ss(DisasContext
*s
, arg_rpr_esz
*a
)
4231 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_ucvt_ss
);
4234 static bool trans_UCVTF_ds(DisasContext
*s
, arg_rpr_esz
*a
)
4236 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_ucvt_ds
);
4239 static bool trans_UCVTF_sd(DisasContext
*s
, arg_rpr_esz
*a
)
4241 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_ucvt_sd
);
4244 static bool trans_UCVTF_dd(DisasContext
*s
, arg_rpr_esz
*a
)
4246 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_ucvt_dd
);
4250 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4253 /* Subroutine loading a vector register at VOFS of LEN bytes.
4254 * The load should begin at the address Rn + IMM.
4257 static void do_ldr(DisasContext
*s
, uint32_t vofs
, int len
, int rn
, int imm
)
4259 int len_align
= QEMU_ALIGN_DOWN(len
, 8);
4260 int len_remain
= len
% 8;
4261 int nparts
= len
/ 8 + ctpop8(len_remain
);
4262 int midx
= get_mem_index(s
);
4263 TCGv_i64 dirty_addr
, clean_addr
, t0
, t1
;
4265 dirty_addr
= tcg_temp_new_i64();
4266 tcg_gen_addi_i64(dirty_addr
, cpu_reg_sp(s
, rn
), imm
);
4267 clean_addr
= gen_mte_checkN(s
, dirty_addr
, false, rn
!= 31, len
, MO_8
);
4268 tcg_temp_free_i64(dirty_addr
);
4271 * Note that unpredicated load/store of vector/predicate registers
4272 * are defined as a stream of bytes, which equates to little-endian
4273 * operations on larger quantities.
4274 * Attempt to keep code expansion to a minimum by limiting the
4275 * amount of unrolling done.
4280 t0
= tcg_temp_new_i64();
4281 for (i
= 0; i
< len_align
; i
+= 8) {
4282 tcg_gen_qemu_ld_i64(t0
, clean_addr
, midx
, MO_LEQ
);
4283 tcg_gen_st_i64(t0
, cpu_env
, vofs
+ i
);
4284 tcg_gen_addi_i64(clean_addr
, clean_addr
, 8);
4286 tcg_temp_free_i64(t0
);
4288 TCGLabel
*loop
= gen_new_label();
4289 TCGv_ptr tp
, i
= tcg_const_local_ptr(0);
4291 /* Copy the clean address into a local temp, live across the loop. */
4293 clean_addr
= new_tmp_a64_local(s
);
4294 tcg_gen_mov_i64(clean_addr
, t0
);
4296 gen_set_label(loop
);
4298 t0
= tcg_temp_new_i64();
4299 tcg_gen_qemu_ld_i64(t0
, clean_addr
, midx
, MO_LEQ
);
4300 tcg_gen_addi_i64(clean_addr
, clean_addr
, 8);
4302 tp
= tcg_temp_new_ptr();
4303 tcg_gen_add_ptr(tp
, cpu_env
, i
);
4304 tcg_gen_addi_ptr(i
, i
, 8);
4305 tcg_gen_st_i64(t0
, tp
, vofs
);
4306 tcg_temp_free_ptr(tp
);
4307 tcg_temp_free_i64(t0
);
4309 tcg_gen_brcondi_ptr(TCG_COND_LTU
, i
, len_align
, loop
);
4310 tcg_temp_free_ptr(i
);
4314 * Predicate register loads can be any multiple of 2.
4315 * Note that we still store the entire 64-bit unit into cpu_env.
4318 t0
= tcg_temp_new_i64();
4319 switch (len_remain
) {
4323 tcg_gen_qemu_ld_i64(t0
, clean_addr
, midx
,
4324 MO_LE
| ctz32(len_remain
));
4328 t1
= tcg_temp_new_i64();
4329 tcg_gen_qemu_ld_i64(t0
, clean_addr
, midx
, MO_LEUL
);
4330 tcg_gen_addi_i64(clean_addr
, clean_addr
, 4);
4331 tcg_gen_qemu_ld_i64(t1
, clean_addr
, midx
, MO_LEUW
);
4332 tcg_gen_deposit_i64(t0
, t0
, t1
, 32, 32);
4333 tcg_temp_free_i64(t1
);
4337 g_assert_not_reached();
4339 tcg_gen_st_i64(t0
, cpu_env
, vofs
+ len_align
);
4340 tcg_temp_free_i64(t0
);
4344 /* Similarly for stores. */
4345 static void do_str(DisasContext
*s
, uint32_t vofs
, int len
, int rn
, int imm
)
4347 int len_align
= QEMU_ALIGN_DOWN(len
, 8);
4348 int len_remain
= len
% 8;
4349 int nparts
= len
/ 8 + ctpop8(len_remain
);
4350 int midx
= get_mem_index(s
);
4351 TCGv_i64 dirty_addr
, clean_addr
, t0
;
4353 dirty_addr
= tcg_temp_new_i64();
4354 tcg_gen_addi_i64(dirty_addr
, cpu_reg_sp(s
, rn
), imm
);
4355 clean_addr
= gen_mte_checkN(s
, dirty_addr
, false, rn
!= 31, len
, MO_8
);
4356 tcg_temp_free_i64(dirty_addr
);
4358 /* Note that unpredicated load/store of vector/predicate registers
4359 * are defined as a stream of bytes, which equates to little-endian
4360 * operations on larger quantities. There is no nice way to force
4361 * a little-endian store for aarch64_be-linux-user out of line.
4363 * Attempt to keep code expansion to a minimum by limiting the
4364 * amount of unrolling done.
4369 t0
= tcg_temp_new_i64();
4370 for (i
= 0; i
< len_align
; i
+= 8) {
4371 tcg_gen_ld_i64(t0
, cpu_env
, vofs
+ i
);
4372 tcg_gen_qemu_st_i64(t0
, clean_addr
, midx
, MO_LEQ
);
4373 tcg_gen_addi_i64(clean_addr
, clean_addr
, 8);
4375 tcg_temp_free_i64(t0
);
4377 TCGLabel
*loop
= gen_new_label();
4378 TCGv_ptr tp
, i
= tcg_const_local_ptr(0);
4380 /* Copy the clean address into a local temp, live across the loop. */
4382 clean_addr
= new_tmp_a64_local(s
);
4383 tcg_gen_mov_i64(clean_addr
, t0
);
4385 gen_set_label(loop
);
4387 t0
= tcg_temp_new_i64();
4388 tp
= tcg_temp_new_ptr();
4389 tcg_gen_add_ptr(tp
, cpu_env
, i
);
4390 tcg_gen_ld_i64(t0
, tp
, vofs
);
4391 tcg_gen_addi_ptr(i
, i
, 8);
4392 tcg_temp_free_ptr(tp
);
4394 tcg_gen_qemu_st_i64(t0
, clean_addr
, midx
, MO_LEQ
);
4395 tcg_gen_addi_i64(clean_addr
, clean_addr
, 8);
4396 tcg_temp_free_i64(t0
);
4398 tcg_gen_brcondi_ptr(TCG_COND_LTU
, i
, len_align
, loop
);
4399 tcg_temp_free_ptr(i
);
4402 /* Predicate register stores can be any multiple of 2. */
4404 t0
= tcg_temp_new_i64();
4405 tcg_gen_ld_i64(t0
, cpu_env
, vofs
+ len_align
);
4407 switch (len_remain
) {
4411 tcg_gen_qemu_st_i64(t0
, clean_addr
, midx
,
4412 MO_LE
| ctz32(len_remain
));
4416 tcg_gen_qemu_st_i64(t0
, clean_addr
, midx
, MO_LEUL
);
4417 tcg_gen_addi_i64(clean_addr
, clean_addr
, 4);
4418 tcg_gen_shri_i64(t0
, t0
, 32);
4419 tcg_gen_qemu_st_i64(t0
, clean_addr
, midx
, MO_LEUW
);
4423 g_assert_not_reached();
4425 tcg_temp_free_i64(t0
);
4429 static bool trans_LDR_zri(DisasContext
*s
, arg_rri
*a
)
4431 if (sve_access_check(s
)) {
4432 int size
= vec_full_reg_size(s
);
4433 int off
= vec_full_reg_offset(s
, a
->rd
);
4434 do_ldr(s
, off
, size
, a
->rn
, a
->imm
* size
);
4439 static bool trans_LDR_pri(DisasContext
*s
, arg_rri
*a
)
4441 if (sve_access_check(s
)) {
4442 int size
= pred_full_reg_size(s
);
4443 int off
= pred_full_reg_offset(s
, a
->rd
);
4444 do_ldr(s
, off
, size
, a
->rn
, a
->imm
* size
);
4449 static bool trans_STR_zri(DisasContext
*s
, arg_rri
*a
)
4451 if (sve_access_check(s
)) {
4452 int size
= vec_full_reg_size(s
);
4453 int off
= vec_full_reg_offset(s
, a
->rd
);
4454 do_str(s
, off
, size
, a
->rn
, a
->imm
* size
);
4459 static bool trans_STR_pri(DisasContext
*s
, arg_rri
*a
)
4461 if (sve_access_check(s
)) {
4462 int size
= pred_full_reg_size(s
);
4463 int off
= pred_full_reg_offset(s
, a
->rd
);
4464 do_str(s
, off
, size
, a
->rn
, a
->imm
* size
);
4470 *** SVE Memory - Contiguous Load Group
4473 /* The memory mode of the dtype. */
4474 static const MemOp dtype_mop
[16] = {
4475 MO_UB
, MO_UB
, MO_UB
, MO_UB
,
4476 MO_SL
, MO_UW
, MO_UW
, MO_UW
,
4477 MO_SW
, MO_SW
, MO_UL
, MO_UL
,
4478 MO_SB
, MO_SB
, MO_SB
, MO_Q
4481 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4483 /* The vector element size of dtype. */
4484 static const uint8_t dtype_esz
[16] = {
4491 static void do_mem_zpa(DisasContext
*s
, int zt
, int pg
, TCGv_i64 addr
,
4492 int dtype
, uint32_t mte_n
, bool is_write
,
4493 gen_helper_gvec_mem
*fn
)
4495 unsigned vsz
= vec_full_reg_size(s
);
4501 * For e.g. LD4, there are not enough arguments to pass all 4
4502 * registers as pointers, so encode the regno into the data field.
4503 * For consistency, do this even for LD1.
4505 if (s
->mte_active
[0]) {
4506 int msz
= dtype_msz(dtype
);
4508 desc
= FIELD_DP32(desc
, MTEDESC
, MIDX
, get_mem_index(s
));
4509 desc
= FIELD_DP32(desc
, MTEDESC
, TBI
, s
->tbid
);
4510 desc
= FIELD_DP32(desc
, MTEDESC
, TCMA
, s
->tcma
);
4511 desc
= FIELD_DP32(desc
, MTEDESC
, WRITE
, is_write
);
4512 desc
= FIELD_DP32(desc
, MTEDESC
, ESIZE
, 1 << msz
);
4513 desc
= FIELD_DP32(desc
, MTEDESC
, TSIZE
, mte_n
<< msz
);
4514 desc
<<= SVE_MTEDESC_SHIFT
;
4516 addr
= clean_data_tbi(s
, addr
);
4519 desc
= simd_desc(vsz
, vsz
, zt
| desc
);
4520 t_desc
= tcg_const_i32(desc
);
4521 t_pg
= tcg_temp_new_ptr();
4523 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
4524 fn(cpu_env
, t_pg
, addr
, t_desc
);
4526 tcg_temp_free_ptr(t_pg
);
4527 tcg_temp_free_i32(t_desc
);
4530 static void do_ld_zpa(DisasContext
*s
, int zt
, int pg
,
4531 TCGv_i64 addr
, int dtype
, int nreg
)
4533 static gen_helper_gvec_mem
* const fns
[2][2][16][4] = {
4534 { /* mte inactive, little-endian */
4535 { { gen_helper_sve_ld1bb_r
, gen_helper_sve_ld2bb_r
,
4536 gen_helper_sve_ld3bb_r
, gen_helper_sve_ld4bb_r
},
4537 { gen_helper_sve_ld1bhu_r
, NULL
, NULL
, NULL
},
4538 { gen_helper_sve_ld1bsu_r
, NULL
, NULL
, NULL
},
4539 { gen_helper_sve_ld1bdu_r
, NULL
, NULL
, NULL
},
4541 { gen_helper_sve_ld1sds_le_r
, NULL
, NULL
, NULL
},
4542 { gen_helper_sve_ld1hh_le_r
, gen_helper_sve_ld2hh_le_r
,
4543 gen_helper_sve_ld3hh_le_r
, gen_helper_sve_ld4hh_le_r
},
4544 { gen_helper_sve_ld1hsu_le_r
, NULL
, NULL
, NULL
},
4545 { gen_helper_sve_ld1hdu_le_r
, NULL
, NULL
, NULL
},
4547 { gen_helper_sve_ld1hds_le_r
, NULL
, NULL
, NULL
},
4548 { gen_helper_sve_ld1hss_le_r
, NULL
, NULL
, NULL
},
4549 { gen_helper_sve_ld1ss_le_r
, gen_helper_sve_ld2ss_le_r
,
4550 gen_helper_sve_ld3ss_le_r
, gen_helper_sve_ld4ss_le_r
},
4551 { gen_helper_sve_ld1sdu_le_r
, NULL
, NULL
, NULL
},
4553 { gen_helper_sve_ld1bds_r
, NULL
, NULL
, NULL
},
4554 { gen_helper_sve_ld1bss_r
, NULL
, NULL
, NULL
},
4555 { gen_helper_sve_ld1bhs_r
, NULL
, NULL
, NULL
},
4556 { gen_helper_sve_ld1dd_le_r
, gen_helper_sve_ld2dd_le_r
,
4557 gen_helper_sve_ld3dd_le_r
, gen_helper_sve_ld4dd_le_r
} },
4559 /* mte inactive, big-endian */
4560 { { gen_helper_sve_ld1bb_r
, gen_helper_sve_ld2bb_r
,
4561 gen_helper_sve_ld3bb_r
, gen_helper_sve_ld4bb_r
},
4562 { gen_helper_sve_ld1bhu_r
, NULL
, NULL
, NULL
},
4563 { gen_helper_sve_ld1bsu_r
, NULL
, NULL
, NULL
},
4564 { gen_helper_sve_ld1bdu_r
, NULL
, NULL
, NULL
},
4566 { gen_helper_sve_ld1sds_be_r
, NULL
, NULL
, NULL
},
4567 { gen_helper_sve_ld1hh_be_r
, gen_helper_sve_ld2hh_be_r
,
4568 gen_helper_sve_ld3hh_be_r
, gen_helper_sve_ld4hh_be_r
},
4569 { gen_helper_sve_ld1hsu_be_r
, NULL
, NULL
, NULL
},
4570 { gen_helper_sve_ld1hdu_be_r
, NULL
, NULL
, NULL
},
4572 { gen_helper_sve_ld1hds_be_r
, NULL
, NULL
, NULL
},
4573 { gen_helper_sve_ld1hss_be_r
, NULL
, NULL
, NULL
},
4574 { gen_helper_sve_ld1ss_be_r
, gen_helper_sve_ld2ss_be_r
,
4575 gen_helper_sve_ld3ss_be_r
, gen_helper_sve_ld4ss_be_r
},
4576 { gen_helper_sve_ld1sdu_be_r
, NULL
, NULL
, NULL
},
4578 { gen_helper_sve_ld1bds_r
, NULL
, NULL
, NULL
},
4579 { gen_helper_sve_ld1bss_r
, NULL
, NULL
, NULL
},
4580 { gen_helper_sve_ld1bhs_r
, NULL
, NULL
, NULL
},
4581 { gen_helper_sve_ld1dd_be_r
, gen_helper_sve_ld2dd_be_r
,
4582 gen_helper_sve_ld3dd_be_r
, gen_helper_sve_ld4dd_be_r
} } },
4584 { /* mte active, little-endian */
4585 { { gen_helper_sve_ld1bb_r_mte
,
4586 gen_helper_sve_ld2bb_r_mte
,
4587 gen_helper_sve_ld3bb_r_mte
,
4588 gen_helper_sve_ld4bb_r_mte
},
4589 { gen_helper_sve_ld1bhu_r_mte
, NULL
, NULL
, NULL
},
4590 { gen_helper_sve_ld1bsu_r_mte
, NULL
, NULL
, NULL
},
4591 { gen_helper_sve_ld1bdu_r_mte
, NULL
, NULL
, NULL
},
4593 { gen_helper_sve_ld1sds_le_r_mte
, NULL
, NULL
, NULL
},
4594 { gen_helper_sve_ld1hh_le_r_mte
,
4595 gen_helper_sve_ld2hh_le_r_mte
,
4596 gen_helper_sve_ld3hh_le_r_mte
,
4597 gen_helper_sve_ld4hh_le_r_mte
},
4598 { gen_helper_sve_ld1hsu_le_r_mte
, NULL
, NULL
, NULL
},
4599 { gen_helper_sve_ld1hdu_le_r_mte
, NULL
, NULL
, NULL
},
4601 { gen_helper_sve_ld1hds_le_r_mte
, NULL
, NULL
, NULL
},
4602 { gen_helper_sve_ld1hss_le_r_mte
, NULL
, NULL
, NULL
},
4603 { gen_helper_sve_ld1ss_le_r_mte
,
4604 gen_helper_sve_ld2ss_le_r_mte
,
4605 gen_helper_sve_ld3ss_le_r_mte
,
4606 gen_helper_sve_ld4ss_le_r_mte
},
4607 { gen_helper_sve_ld1sdu_le_r_mte
, NULL
, NULL
, NULL
},
4609 { gen_helper_sve_ld1bds_r_mte
, NULL
, NULL
, NULL
},
4610 { gen_helper_sve_ld1bss_r_mte
, NULL
, NULL
, NULL
},
4611 { gen_helper_sve_ld1bhs_r_mte
, NULL
, NULL
, NULL
},
4612 { gen_helper_sve_ld1dd_le_r_mte
,
4613 gen_helper_sve_ld2dd_le_r_mte
,
4614 gen_helper_sve_ld3dd_le_r_mte
,
4615 gen_helper_sve_ld4dd_le_r_mte
} },
4617 /* mte active, big-endian */
4618 { { gen_helper_sve_ld1bb_r_mte
,
4619 gen_helper_sve_ld2bb_r_mte
,
4620 gen_helper_sve_ld3bb_r_mte
,
4621 gen_helper_sve_ld4bb_r_mte
},
4622 { gen_helper_sve_ld1bhu_r_mte
, NULL
, NULL
, NULL
},
4623 { gen_helper_sve_ld1bsu_r_mte
, NULL
, NULL
, NULL
},
4624 { gen_helper_sve_ld1bdu_r_mte
, NULL
, NULL
, NULL
},
4626 { gen_helper_sve_ld1sds_be_r_mte
, NULL
, NULL
, NULL
},
4627 { gen_helper_sve_ld1hh_be_r_mte
,
4628 gen_helper_sve_ld2hh_be_r_mte
,
4629 gen_helper_sve_ld3hh_be_r_mte
,
4630 gen_helper_sve_ld4hh_be_r_mte
},
4631 { gen_helper_sve_ld1hsu_be_r_mte
, NULL
, NULL
, NULL
},
4632 { gen_helper_sve_ld1hdu_be_r_mte
, NULL
, NULL
, NULL
},
4634 { gen_helper_sve_ld1hds_be_r_mte
, NULL
, NULL
, NULL
},
4635 { gen_helper_sve_ld1hss_be_r_mte
, NULL
, NULL
, NULL
},
4636 { gen_helper_sve_ld1ss_be_r_mte
,
4637 gen_helper_sve_ld2ss_be_r_mte
,
4638 gen_helper_sve_ld3ss_be_r_mte
,
4639 gen_helper_sve_ld4ss_be_r_mte
},
4640 { gen_helper_sve_ld1sdu_be_r_mte
, NULL
, NULL
, NULL
},
4642 { gen_helper_sve_ld1bds_r_mte
, NULL
, NULL
, NULL
},
4643 { gen_helper_sve_ld1bss_r_mte
, NULL
, NULL
, NULL
},
4644 { gen_helper_sve_ld1bhs_r_mte
, NULL
, NULL
, NULL
},
4645 { gen_helper_sve_ld1dd_be_r_mte
,
4646 gen_helper_sve_ld2dd_be_r_mte
,
4647 gen_helper_sve_ld3dd_be_r_mte
,
4648 gen_helper_sve_ld4dd_be_r_mte
} } },
4650 gen_helper_gvec_mem
*fn
4651 = fns
[s
->mte_active
[0]][s
->be_data
== MO_BE
][dtype
][nreg
];
4654 * While there are holes in the table, they are not
4655 * accessible via the instruction encoding.
4658 do_mem_zpa(s
, zt
, pg
, addr
, dtype
, nreg
, false, fn
);
4661 static bool trans_LD_zprr(DisasContext
*s
, arg_rprr_load
*a
)
4666 if (sve_access_check(s
)) {
4667 TCGv_i64 addr
= new_tmp_a64(s
);
4668 tcg_gen_shli_i64(addr
, cpu_reg(s
, a
->rm
), dtype_msz(a
->dtype
));
4669 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
4670 do_ld_zpa(s
, a
->rd
, a
->pg
, addr
, a
->dtype
, a
->nreg
);
4675 static bool trans_LD_zpri(DisasContext
*s
, arg_rpri_load
*a
)
4677 if (sve_access_check(s
)) {
4678 int vsz
= vec_full_reg_size(s
);
4679 int elements
= vsz
>> dtype_esz
[a
->dtype
];
4680 TCGv_i64 addr
= new_tmp_a64(s
);
4682 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
),
4683 (a
->imm
* elements
* (a
->nreg
+ 1))
4684 << dtype_msz(a
->dtype
));
4685 do_ld_zpa(s
, a
->rd
, a
->pg
, addr
, a
->dtype
, a
->nreg
);
4690 static bool trans_LDFF1_zprr(DisasContext
*s
, arg_rprr_load
*a
)
4692 static gen_helper_gvec_mem
* const fns
[2][2][16] = {
4693 { /* mte inactive, little-endian */
4694 { gen_helper_sve_ldff1bb_r
,
4695 gen_helper_sve_ldff1bhu_r
,
4696 gen_helper_sve_ldff1bsu_r
,
4697 gen_helper_sve_ldff1bdu_r
,
4699 gen_helper_sve_ldff1sds_le_r
,
4700 gen_helper_sve_ldff1hh_le_r
,
4701 gen_helper_sve_ldff1hsu_le_r
,
4702 gen_helper_sve_ldff1hdu_le_r
,
4704 gen_helper_sve_ldff1hds_le_r
,
4705 gen_helper_sve_ldff1hss_le_r
,
4706 gen_helper_sve_ldff1ss_le_r
,
4707 gen_helper_sve_ldff1sdu_le_r
,
4709 gen_helper_sve_ldff1bds_r
,
4710 gen_helper_sve_ldff1bss_r
,
4711 gen_helper_sve_ldff1bhs_r
,
4712 gen_helper_sve_ldff1dd_le_r
},
4714 /* mte inactive, big-endian */
4715 { gen_helper_sve_ldff1bb_r
,
4716 gen_helper_sve_ldff1bhu_r
,
4717 gen_helper_sve_ldff1bsu_r
,
4718 gen_helper_sve_ldff1bdu_r
,
4720 gen_helper_sve_ldff1sds_be_r
,
4721 gen_helper_sve_ldff1hh_be_r
,
4722 gen_helper_sve_ldff1hsu_be_r
,
4723 gen_helper_sve_ldff1hdu_be_r
,
4725 gen_helper_sve_ldff1hds_be_r
,
4726 gen_helper_sve_ldff1hss_be_r
,
4727 gen_helper_sve_ldff1ss_be_r
,
4728 gen_helper_sve_ldff1sdu_be_r
,
4730 gen_helper_sve_ldff1bds_r
,
4731 gen_helper_sve_ldff1bss_r
,
4732 gen_helper_sve_ldff1bhs_r
,
4733 gen_helper_sve_ldff1dd_be_r
} },
4735 { /* mte active, little-endian */
4736 { gen_helper_sve_ldff1bb_r_mte
,
4737 gen_helper_sve_ldff1bhu_r_mte
,
4738 gen_helper_sve_ldff1bsu_r_mte
,
4739 gen_helper_sve_ldff1bdu_r_mte
,
4741 gen_helper_sve_ldff1sds_le_r_mte
,
4742 gen_helper_sve_ldff1hh_le_r_mte
,
4743 gen_helper_sve_ldff1hsu_le_r_mte
,
4744 gen_helper_sve_ldff1hdu_le_r_mte
,
4746 gen_helper_sve_ldff1hds_le_r_mte
,
4747 gen_helper_sve_ldff1hss_le_r_mte
,
4748 gen_helper_sve_ldff1ss_le_r_mte
,
4749 gen_helper_sve_ldff1sdu_le_r_mte
,
4751 gen_helper_sve_ldff1bds_r_mte
,
4752 gen_helper_sve_ldff1bss_r_mte
,
4753 gen_helper_sve_ldff1bhs_r_mte
,
4754 gen_helper_sve_ldff1dd_le_r_mte
},
4756 /* mte active, big-endian */
4757 { gen_helper_sve_ldff1bb_r_mte
,
4758 gen_helper_sve_ldff1bhu_r_mte
,
4759 gen_helper_sve_ldff1bsu_r_mte
,
4760 gen_helper_sve_ldff1bdu_r_mte
,
4762 gen_helper_sve_ldff1sds_be_r_mte
,
4763 gen_helper_sve_ldff1hh_be_r_mte
,
4764 gen_helper_sve_ldff1hsu_be_r_mte
,
4765 gen_helper_sve_ldff1hdu_be_r_mte
,
4767 gen_helper_sve_ldff1hds_be_r_mte
,
4768 gen_helper_sve_ldff1hss_be_r_mte
,
4769 gen_helper_sve_ldff1ss_be_r_mte
,
4770 gen_helper_sve_ldff1sdu_be_r_mte
,
4772 gen_helper_sve_ldff1bds_r_mte
,
4773 gen_helper_sve_ldff1bss_r_mte
,
4774 gen_helper_sve_ldff1bhs_r_mte
,
4775 gen_helper_sve_ldff1dd_be_r_mte
} },
4778 if (sve_access_check(s
)) {
4779 TCGv_i64 addr
= new_tmp_a64(s
);
4780 tcg_gen_shli_i64(addr
, cpu_reg(s
, a
->rm
), dtype_msz(a
->dtype
));
4781 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
4782 do_mem_zpa(s
, a
->rd
, a
->pg
, addr
, a
->dtype
, 1, false,
4783 fns
[s
->mte_active
[0]][s
->be_data
== MO_BE
][a
->dtype
]);
4788 static bool trans_LDNF1_zpri(DisasContext
*s
, arg_rpri_load
*a
)
4790 static gen_helper_gvec_mem
* const fns
[2][2][16] = {
4791 { /* mte inactive, little-endian */
4792 { gen_helper_sve_ldnf1bb_r
,
4793 gen_helper_sve_ldnf1bhu_r
,
4794 gen_helper_sve_ldnf1bsu_r
,
4795 gen_helper_sve_ldnf1bdu_r
,
4797 gen_helper_sve_ldnf1sds_le_r
,
4798 gen_helper_sve_ldnf1hh_le_r
,
4799 gen_helper_sve_ldnf1hsu_le_r
,
4800 gen_helper_sve_ldnf1hdu_le_r
,
4802 gen_helper_sve_ldnf1hds_le_r
,
4803 gen_helper_sve_ldnf1hss_le_r
,
4804 gen_helper_sve_ldnf1ss_le_r
,
4805 gen_helper_sve_ldnf1sdu_le_r
,
4807 gen_helper_sve_ldnf1bds_r
,
4808 gen_helper_sve_ldnf1bss_r
,
4809 gen_helper_sve_ldnf1bhs_r
,
4810 gen_helper_sve_ldnf1dd_le_r
},
4812 /* mte inactive, big-endian */
4813 { gen_helper_sve_ldnf1bb_r
,
4814 gen_helper_sve_ldnf1bhu_r
,
4815 gen_helper_sve_ldnf1bsu_r
,
4816 gen_helper_sve_ldnf1bdu_r
,
4818 gen_helper_sve_ldnf1sds_be_r
,
4819 gen_helper_sve_ldnf1hh_be_r
,
4820 gen_helper_sve_ldnf1hsu_be_r
,
4821 gen_helper_sve_ldnf1hdu_be_r
,
4823 gen_helper_sve_ldnf1hds_be_r
,
4824 gen_helper_sve_ldnf1hss_be_r
,
4825 gen_helper_sve_ldnf1ss_be_r
,
4826 gen_helper_sve_ldnf1sdu_be_r
,
4828 gen_helper_sve_ldnf1bds_r
,
4829 gen_helper_sve_ldnf1bss_r
,
4830 gen_helper_sve_ldnf1bhs_r
,
4831 gen_helper_sve_ldnf1dd_be_r
} },
4833 { /* mte inactive, little-endian */
4834 { gen_helper_sve_ldnf1bb_r_mte
,
4835 gen_helper_sve_ldnf1bhu_r_mte
,
4836 gen_helper_sve_ldnf1bsu_r_mte
,
4837 gen_helper_sve_ldnf1bdu_r_mte
,
4839 gen_helper_sve_ldnf1sds_le_r_mte
,
4840 gen_helper_sve_ldnf1hh_le_r_mte
,
4841 gen_helper_sve_ldnf1hsu_le_r_mte
,
4842 gen_helper_sve_ldnf1hdu_le_r_mte
,
4844 gen_helper_sve_ldnf1hds_le_r_mte
,
4845 gen_helper_sve_ldnf1hss_le_r_mte
,
4846 gen_helper_sve_ldnf1ss_le_r_mte
,
4847 gen_helper_sve_ldnf1sdu_le_r_mte
,
4849 gen_helper_sve_ldnf1bds_r_mte
,
4850 gen_helper_sve_ldnf1bss_r_mte
,
4851 gen_helper_sve_ldnf1bhs_r_mte
,
4852 gen_helper_sve_ldnf1dd_le_r_mte
},
4854 /* mte inactive, big-endian */
4855 { gen_helper_sve_ldnf1bb_r_mte
,
4856 gen_helper_sve_ldnf1bhu_r_mte
,
4857 gen_helper_sve_ldnf1bsu_r_mte
,
4858 gen_helper_sve_ldnf1bdu_r_mte
,
4860 gen_helper_sve_ldnf1sds_be_r_mte
,
4861 gen_helper_sve_ldnf1hh_be_r_mte
,
4862 gen_helper_sve_ldnf1hsu_be_r_mte
,
4863 gen_helper_sve_ldnf1hdu_be_r_mte
,
4865 gen_helper_sve_ldnf1hds_be_r_mte
,
4866 gen_helper_sve_ldnf1hss_be_r_mte
,
4867 gen_helper_sve_ldnf1ss_be_r_mte
,
4868 gen_helper_sve_ldnf1sdu_be_r_mte
,
4870 gen_helper_sve_ldnf1bds_r_mte
,
4871 gen_helper_sve_ldnf1bss_r_mte
,
4872 gen_helper_sve_ldnf1bhs_r_mte
,
4873 gen_helper_sve_ldnf1dd_be_r_mte
} },
4876 if (sve_access_check(s
)) {
4877 int vsz
= vec_full_reg_size(s
);
4878 int elements
= vsz
>> dtype_esz
[a
->dtype
];
4879 int off
= (a
->imm
* elements
) << dtype_msz(a
->dtype
);
4880 TCGv_i64 addr
= new_tmp_a64(s
);
4882 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
), off
);
4883 do_mem_zpa(s
, a
->rd
, a
->pg
, addr
, a
->dtype
, 1, false,
4884 fns
[s
->mte_active
[0]][s
->be_data
== MO_BE
][a
->dtype
]);
4889 static void do_ldrq(DisasContext
*s
, int zt
, int pg
, TCGv_i64 addr
, int msz
)
4891 static gen_helper_gvec_mem
* const fns
[2][4] = {
4892 { gen_helper_sve_ld1bb_r
, gen_helper_sve_ld1hh_le_r
,
4893 gen_helper_sve_ld1ss_le_r
, gen_helper_sve_ld1dd_le_r
},
4894 { gen_helper_sve_ld1bb_r
, gen_helper_sve_ld1hh_be_r
,
4895 gen_helper_sve_ld1ss_be_r
, gen_helper_sve_ld1dd_be_r
},
4897 unsigned vsz
= vec_full_reg_size(s
);
4902 /* Load the first quadword using the normal predicated load helpers. */
4903 desc
= simd_desc(16, 16, zt
);
4904 t_desc
= tcg_const_i32(desc
);
4906 poff
= pred_full_reg_offset(s
, pg
);
4909 * Zero-extend the first 16 bits of the predicate into a temporary.
4910 * This avoids triggering an assert making sure we don't have bits
4911 * set within a predicate beyond VQ, but we have lowered VQ to 1
4912 * for this load operation.
4914 TCGv_i64 tmp
= tcg_temp_new_i64();
4915 #ifdef HOST_WORDS_BIGENDIAN
4918 tcg_gen_ld16u_i64(tmp
, cpu_env
, poff
);
4920 poff
= offsetof(CPUARMState
, vfp
.preg_tmp
);
4921 tcg_gen_st_i64(tmp
, cpu_env
, poff
);
4922 tcg_temp_free_i64(tmp
);
4925 t_pg
= tcg_temp_new_ptr();
4926 tcg_gen_addi_ptr(t_pg
, cpu_env
, poff
);
4928 fns
[s
->be_data
== MO_BE
][msz
](cpu_env
, t_pg
, addr
, t_desc
);
4930 tcg_temp_free_ptr(t_pg
);
4931 tcg_temp_free_i32(t_desc
);
4933 /* Replicate that first quadword. */
4935 unsigned dofs
= vec_full_reg_offset(s
, zt
);
4936 tcg_gen_gvec_dup_mem(4, dofs
+ 16, dofs
, vsz
- 16, vsz
- 16);
4940 static bool trans_LD1RQ_zprr(DisasContext
*s
, arg_rprr_load
*a
)
4945 if (sve_access_check(s
)) {
4946 int msz
= dtype_msz(a
->dtype
);
4947 TCGv_i64 addr
= new_tmp_a64(s
);
4948 tcg_gen_shli_i64(addr
, cpu_reg(s
, a
->rm
), msz
);
4949 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
4950 do_ldrq(s
, a
->rd
, a
->pg
, addr
, msz
);
4955 static bool trans_LD1RQ_zpri(DisasContext
*s
, arg_rpri_load
*a
)
4957 if (sve_access_check(s
)) {
4958 TCGv_i64 addr
= new_tmp_a64(s
);
4959 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
), a
->imm
* 16);
4960 do_ldrq(s
, a
->rd
, a
->pg
, addr
, dtype_msz(a
->dtype
));
4965 /* Load and broadcast element. */
4966 static bool trans_LD1R_zpri(DisasContext
*s
, arg_rpri_load
*a
)
4968 unsigned vsz
= vec_full_reg_size(s
);
4969 unsigned psz
= pred_full_reg_size(s
);
4970 unsigned esz
= dtype_esz
[a
->dtype
];
4971 unsigned msz
= dtype_msz(a
->dtype
);
4973 TCGv_i64 temp
, clean_addr
;
4975 if (!sve_access_check(s
)) {
4979 over
= gen_new_label();
4981 /* If the guarding predicate has no bits set, no load occurs. */
4983 /* Reduce the pred_esz_masks value simply to reduce the
4984 * size of the code generated here.
4986 uint64_t psz_mask
= MAKE_64BIT_MASK(0, psz
* 8);
4987 temp
= tcg_temp_new_i64();
4988 tcg_gen_ld_i64(temp
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
4989 tcg_gen_andi_i64(temp
, temp
, pred_esz_masks
[esz
] & psz_mask
);
4990 tcg_gen_brcondi_i64(TCG_COND_EQ
, temp
, 0, over
);
4991 tcg_temp_free_i64(temp
);
4993 TCGv_i32 t32
= tcg_temp_new_i32();
4994 find_last_active(s
, t32
, esz
, a
->pg
);
4995 tcg_gen_brcondi_i32(TCG_COND_LT
, t32
, 0, over
);
4996 tcg_temp_free_i32(t32
);
4999 /* Load the data. */
5000 temp
= tcg_temp_new_i64();
5001 tcg_gen_addi_i64(temp
, cpu_reg_sp(s
, a
->rn
), a
->imm
<< msz
);
5002 clean_addr
= gen_mte_check1(s
, temp
, false, true, msz
);
5004 tcg_gen_qemu_ld_i64(temp
, clean_addr
, get_mem_index(s
),
5005 s
->be_data
| dtype_mop
[a
->dtype
]);
5007 /* Broadcast to *all* elements. */
5008 tcg_gen_gvec_dup_i64(esz
, vec_full_reg_offset(s
, a
->rd
),
5010 tcg_temp_free_i64(temp
);
5012 /* Zero the inactive elements. */
5013 gen_set_label(over
);
5014 return do_movz_zpz(s
, a
->rd
, a
->rd
, a
->pg
, esz
, false);
5017 static void do_st_zpa(DisasContext
*s
, int zt
, int pg
, TCGv_i64 addr
,
5018 int msz
, int esz
, int nreg
)
5020 static gen_helper_gvec_mem
* const fn_single
[2][2][4][4] = {
5021 { { { gen_helper_sve_st1bb_r
,
5022 gen_helper_sve_st1bh_r
,
5023 gen_helper_sve_st1bs_r
,
5024 gen_helper_sve_st1bd_r
},
5026 gen_helper_sve_st1hh_le_r
,
5027 gen_helper_sve_st1hs_le_r
,
5028 gen_helper_sve_st1hd_le_r
},
5030 gen_helper_sve_st1ss_le_r
,
5031 gen_helper_sve_st1sd_le_r
},
5033 gen_helper_sve_st1dd_le_r
} },
5034 { { gen_helper_sve_st1bb_r
,
5035 gen_helper_sve_st1bh_r
,
5036 gen_helper_sve_st1bs_r
,
5037 gen_helper_sve_st1bd_r
},
5039 gen_helper_sve_st1hh_be_r
,
5040 gen_helper_sve_st1hs_be_r
,
5041 gen_helper_sve_st1hd_be_r
},
5043 gen_helper_sve_st1ss_be_r
,
5044 gen_helper_sve_st1sd_be_r
},
5046 gen_helper_sve_st1dd_be_r
} } },
5048 { { { gen_helper_sve_st1bb_r_mte
,
5049 gen_helper_sve_st1bh_r_mte
,
5050 gen_helper_sve_st1bs_r_mte
,
5051 gen_helper_sve_st1bd_r_mte
},
5053 gen_helper_sve_st1hh_le_r_mte
,
5054 gen_helper_sve_st1hs_le_r_mte
,
5055 gen_helper_sve_st1hd_le_r_mte
},
5057 gen_helper_sve_st1ss_le_r_mte
,
5058 gen_helper_sve_st1sd_le_r_mte
},
5060 gen_helper_sve_st1dd_le_r_mte
} },
5061 { { gen_helper_sve_st1bb_r_mte
,
5062 gen_helper_sve_st1bh_r_mte
,
5063 gen_helper_sve_st1bs_r_mte
,
5064 gen_helper_sve_st1bd_r_mte
},
5066 gen_helper_sve_st1hh_be_r_mte
,
5067 gen_helper_sve_st1hs_be_r_mte
,
5068 gen_helper_sve_st1hd_be_r_mte
},
5070 gen_helper_sve_st1ss_be_r_mte
,
5071 gen_helper_sve_st1sd_be_r_mte
},
5073 gen_helper_sve_st1dd_be_r_mte
} } },
5075 static gen_helper_gvec_mem
* const fn_multiple
[2][2][3][4] = {
5076 { { { gen_helper_sve_st2bb_r
,
5077 gen_helper_sve_st2hh_le_r
,
5078 gen_helper_sve_st2ss_le_r
,
5079 gen_helper_sve_st2dd_le_r
},
5080 { gen_helper_sve_st3bb_r
,
5081 gen_helper_sve_st3hh_le_r
,
5082 gen_helper_sve_st3ss_le_r
,
5083 gen_helper_sve_st3dd_le_r
},
5084 { gen_helper_sve_st4bb_r
,
5085 gen_helper_sve_st4hh_le_r
,
5086 gen_helper_sve_st4ss_le_r
,
5087 gen_helper_sve_st4dd_le_r
} },
5088 { { gen_helper_sve_st2bb_r
,
5089 gen_helper_sve_st2hh_be_r
,
5090 gen_helper_sve_st2ss_be_r
,
5091 gen_helper_sve_st2dd_be_r
},
5092 { gen_helper_sve_st3bb_r
,
5093 gen_helper_sve_st3hh_be_r
,
5094 gen_helper_sve_st3ss_be_r
,
5095 gen_helper_sve_st3dd_be_r
},
5096 { gen_helper_sve_st4bb_r
,
5097 gen_helper_sve_st4hh_be_r
,
5098 gen_helper_sve_st4ss_be_r
,
5099 gen_helper_sve_st4dd_be_r
} } },
5100 { { { gen_helper_sve_st2bb_r_mte
,
5101 gen_helper_sve_st2hh_le_r_mte
,
5102 gen_helper_sve_st2ss_le_r_mte
,
5103 gen_helper_sve_st2dd_le_r_mte
},
5104 { gen_helper_sve_st3bb_r_mte
,
5105 gen_helper_sve_st3hh_le_r_mte
,
5106 gen_helper_sve_st3ss_le_r_mte
,
5107 gen_helper_sve_st3dd_le_r_mte
},
5108 { gen_helper_sve_st4bb_r_mte
,
5109 gen_helper_sve_st4hh_le_r_mte
,
5110 gen_helper_sve_st4ss_le_r_mte
,
5111 gen_helper_sve_st4dd_le_r_mte
} },
5112 { { gen_helper_sve_st2bb_r_mte
,
5113 gen_helper_sve_st2hh_be_r_mte
,
5114 gen_helper_sve_st2ss_be_r_mte
,
5115 gen_helper_sve_st2dd_be_r_mte
},
5116 { gen_helper_sve_st3bb_r_mte
,
5117 gen_helper_sve_st3hh_be_r_mte
,
5118 gen_helper_sve_st3ss_be_r_mte
,
5119 gen_helper_sve_st3dd_be_r_mte
},
5120 { gen_helper_sve_st4bb_r_mte
,
5121 gen_helper_sve_st4hh_be_r_mte
,
5122 gen_helper_sve_st4ss_be_r_mte
,
5123 gen_helper_sve_st4dd_be_r_mte
} } },
5125 gen_helper_gvec_mem
*fn
;
5126 int be
= s
->be_data
== MO_BE
;
5130 fn
= fn_single
[s
->mte_active
[0]][be
][msz
][esz
];
5133 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5135 fn
= fn_multiple
[s
->mte_active
[0]][be
][nreg
- 1][msz
];
5138 do_mem_zpa(s
, zt
, pg
, addr
, msz_dtype(s
, msz
), nreg
, true, fn
);
5141 static bool trans_ST_zprr(DisasContext
*s
, arg_rprr_store
*a
)
5143 if (a
->rm
== 31 || a
->msz
> a
->esz
) {
5146 if (sve_access_check(s
)) {
5147 TCGv_i64 addr
= new_tmp_a64(s
);
5148 tcg_gen_shli_i64(addr
, cpu_reg(s
, a
->rm
), a
->msz
);
5149 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
5150 do_st_zpa(s
, a
->rd
, a
->pg
, addr
, a
->msz
, a
->esz
, a
->nreg
);
5155 static bool trans_ST_zpri(DisasContext
*s
, arg_rpri_store
*a
)
5157 if (a
->msz
> a
->esz
) {
5160 if (sve_access_check(s
)) {
5161 int vsz
= vec_full_reg_size(s
);
5162 int elements
= vsz
>> a
->esz
;
5163 TCGv_i64 addr
= new_tmp_a64(s
);
5165 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
),
5166 (a
->imm
* elements
* (a
->nreg
+ 1)) << a
->msz
);
5167 do_st_zpa(s
, a
->rd
, a
->pg
, addr
, a
->msz
, a
->esz
, a
->nreg
);
5173 *** SVE gather loads / scatter stores
5176 static void do_mem_zpz(DisasContext
*s
, int zt
, int pg
, int zm
,
5177 int scale
, TCGv_i64 scalar
, int msz
, bool is_write
,
5178 gen_helper_gvec_mem_scatter
*fn
)
5180 unsigned vsz
= vec_full_reg_size(s
);
5181 TCGv_ptr t_zm
= tcg_temp_new_ptr();
5182 TCGv_ptr t_pg
= tcg_temp_new_ptr();
5183 TCGv_ptr t_zt
= tcg_temp_new_ptr();
5187 if (s
->mte_active
[0]) {
5188 desc
= FIELD_DP32(desc
, MTEDESC
, MIDX
, get_mem_index(s
));
5189 desc
= FIELD_DP32(desc
, MTEDESC
, TBI
, s
->tbid
);
5190 desc
= FIELD_DP32(desc
, MTEDESC
, TCMA
, s
->tcma
);
5191 desc
= FIELD_DP32(desc
, MTEDESC
, WRITE
, is_write
);
5192 desc
= FIELD_DP32(desc
, MTEDESC
, ESIZE
, 1 << msz
);
5193 desc
<<= SVE_MTEDESC_SHIFT
;
5195 desc
= simd_desc(vsz
, vsz
, desc
| scale
);
5196 t_desc
= tcg_const_i32(desc
);
5198 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
5199 tcg_gen_addi_ptr(t_zm
, cpu_env
, vec_full_reg_offset(s
, zm
));
5200 tcg_gen_addi_ptr(t_zt
, cpu_env
, vec_full_reg_offset(s
, zt
));
5201 fn(cpu_env
, t_zt
, t_pg
, t_zm
, scalar
, t_desc
);
5203 tcg_temp_free_ptr(t_zt
);
5204 tcg_temp_free_ptr(t_zm
);
5205 tcg_temp_free_ptr(t_pg
);
5206 tcg_temp_free_i32(t_desc
);
5209 /* Indexed by [mte][be][ff][xs][u][msz]. */
5210 static gen_helper_gvec_mem_scatter
* const
5211 gather_load_fn32
[2][2][2][2][2][3] = {
5212 { /* MTE Inactive */
5213 { /* Little-endian */
5214 { { { gen_helper_sve_ldbss_zsu
,
5215 gen_helper_sve_ldhss_le_zsu
,
5217 { gen_helper_sve_ldbsu_zsu
,
5218 gen_helper_sve_ldhsu_le_zsu
,
5219 gen_helper_sve_ldss_le_zsu
, } },
5220 { { gen_helper_sve_ldbss_zss
,
5221 gen_helper_sve_ldhss_le_zss
,
5223 { gen_helper_sve_ldbsu_zss
,
5224 gen_helper_sve_ldhsu_le_zss
,
5225 gen_helper_sve_ldss_le_zss
, } } },
5228 { { { gen_helper_sve_ldffbss_zsu
,
5229 gen_helper_sve_ldffhss_le_zsu
,
5231 { gen_helper_sve_ldffbsu_zsu
,
5232 gen_helper_sve_ldffhsu_le_zsu
,
5233 gen_helper_sve_ldffss_le_zsu
, } },
5234 { { gen_helper_sve_ldffbss_zss
,
5235 gen_helper_sve_ldffhss_le_zss
,
5237 { gen_helper_sve_ldffbsu_zss
,
5238 gen_helper_sve_ldffhsu_le_zss
,
5239 gen_helper_sve_ldffss_le_zss
, } } } },
5242 { { { gen_helper_sve_ldbss_zsu
,
5243 gen_helper_sve_ldhss_be_zsu
,
5245 { gen_helper_sve_ldbsu_zsu
,
5246 gen_helper_sve_ldhsu_be_zsu
,
5247 gen_helper_sve_ldss_be_zsu
, } },
5248 { { gen_helper_sve_ldbss_zss
,
5249 gen_helper_sve_ldhss_be_zss
,
5251 { gen_helper_sve_ldbsu_zss
,
5252 gen_helper_sve_ldhsu_be_zss
,
5253 gen_helper_sve_ldss_be_zss
, } } },
5256 { { { gen_helper_sve_ldffbss_zsu
,
5257 gen_helper_sve_ldffhss_be_zsu
,
5259 { gen_helper_sve_ldffbsu_zsu
,
5260 gen_helper_sve_ldffhsu_be_zsu
,
5261 gen_helper_sve_ldffss_be_zsu
, } },
5262 { { gen_helper_sve_ldffbss_zss
,
5263 gen_helper_sve_ldffhss_be_zss
,
5265 { gen_helper_sve_ldffbsu_zss
,
5266 gen_helper_sve_ldffhsu_be_zss
,
5267 gen_helper_sve_ldffss_be_zss
, } } } } },
5269 { /* Little-endian */
5270 { { { gen_helper_sve_ldbss_zsu_mte
,
5271 gen_helper_sve_ldhss_le_zsu_mte
,
5273 { gen_helper_sve_ldbsu_zsu_mte
,
5274 gen_helper_sve_ldhsu_le_zsu_mte
,
5275 gen_helper_sve_ldss_le_zsu_mte
, } },
5276 { { gen_helper_sve_ldbss_zss_mte
,
5277 gen_helper_sve_ldhss_le_zss_mte
,
5279 { gen_helper_sve_ldbsu_zss_mte
,
5280 gen_helper_sve_ldhsu_le_zss_mte
,
5281 gen_helper_sve_ldss_le_zss_mte
, } } },
5284 { { { gen_helper_sve_ldffbss_zsu_mte
,
5285 gen_helper_sve_ldffhss_le_zsu_mte
,
5287 { gen_helper_sve_ldffbsu_zsu_mte
,
5288 gen_helper_sve_ldffhsu_le_zsu_mte
,
5289 gen_helper_sve_ldffss_le_zsu_mte
, } },
5290 { { gen_helper_sve_ldffbss_zss_mte
,
5291 gen_helper_sve_ldffhss_le_zss_mte
,
5293 { gen_helper_sve_ldffbsu_zss_mte
,
5294 gen_helper_sve_ldffhsu_le_zss_mte
,
5295 gen_helper_sve_ldffss_le_zss_mte
, } } } },
5298 { { { gen_helper_sve_ldbss_zsu_mte
,
5299 gen_helper_sve_ldhss_be_zsu_mte
,
5301 { gen_helper_sve_ldbsu_zsu_mte
,
5302 gen_helper_sve_ldhsu_be_zsu_mte
,
5303 gen_helper_sve_ldss_be_zsu_mte
, } },
5304 { { gen_helper_sve_ldbss_zss_mte
,
5305 gen_helper_sve_ldhss_be_zss_mte
,
5307 { gen_helper_sve_ldbsu_zss_mte
,
5308 gen_helper_sve_ldhsu_be_zss_mte
,
5309 gen_helper_sve_ldss_be_zss_mte
, } } },
5312 { { { gen_helper_sve_ldffbss_zsu_mte
,
5313 gen_helper_sve_ldffhss_be_zsu_mte
,
5315 { gen_helper_sve_ldffbsu_zsu_mte
,
5316 gen_helper_sve_ldffhsu_be_zsu_mte
,
5317 gen_helper_sve_ldffss_be_zsu_mte
, } },
5318 { { gen_helper_sve_ldffbss_zss_mte
,
5319 gen_helper_sve_ldffhss_be_zss_mte
,
5321 { gen_helper_sve_ldffbsu_zss_mte
,
5322 gen_helper_sve_ldffhsu_be_zss_mte
,
5323 gen_helper_sve_ldffss_be_zss_mte
, } } } } },
5326 /* Note that we overload xs=2 to indicate 64-bit offset. */
5327 static gen_helper_gvec_mem_scatter
* const
5328 gather_load_fn64
[2][2][2][3][2][4] = {
5329 { /* MTE Inactive */
5330 { /* Little-endian */
5331 { { { gen_helper_sve_ldbds_zsu
,
5332 gen_helper_sve_ldhds_le_zsu
,
5333 gen_helper_sve_ldsds_le_zsu
,
5335 { gen_helper_sve_ldbdu_zsu
,
5336 gen_helper_sve_ldhdu_le_zsu
,
5337 gen_helper_sve_ldsdu_le_zsu
,
5338 gen_helper_sve_lddd_le_zsu
, } },
5339 { { gen_helper_sve_ldbds_zss
,
5340 gen_helper_sve_ldhds_le_zss
,
5341 gen_helper_sve_ldsds_le_zss
,
5343 { gen_helper_sve_ldbdu_zss
,
5344 gen_helper_sve_ldhdu_le_zss
,
5345 gen_helper_sve_ldsdu_le_zss
,
5346 gen_helper_sve_lddd_le_zss
, } },
5347 { { gen_helper_sve_ldbds_zd
,
5348 gen_helper_sve_ldhds_le_zd
,
5349 gen_helper_sve_ldsds_le_zd
,
5351 { gen_helper_sve_ldbdu_zd
,
5352 gen_helper_sve_ldhdu_le_zd
,
5353 gen_helper_sve_ldsdu_le_zd
,
5354 gen_helper_sve_lddd_le_zd
, } } },
5357 { { { gen_helper_sve_ldffbds_zsu
,
5358 gen_helper_sve_ldffhds_le_zsu
,
5359 gen_helper_sve_ldffsds_le_zsu
,
5361 { gen_helper_sve_ldffbdu_zsu
,
5362 gen_helper_sve_ldffhdu_le_zsu
,
5363 gen_helper_sve_ldffsdu_le_zsu
,
5364 gen_helper_sve_ldffdd_le_zsu
, } },
5365 { { gen_helper_sve_ldffbds_zss
,
5366 gen_helper_sve_ldffhds_le_zss
,
5367 gen_helper_sve_ldffsds_le_zss
,
5369 { gen_helper_sve_ldffbdu_zss
,
5370 gen_helper_sve_ldffhdu_le_zss
,
5371 gen_helper_sve_ldffsdu_le_zss
,
5372 gen_helper_sve_ldffdd_le_zss
, } },
5373 { { gen_helper_sve_ldffbds_zd
,
5374 gen_helper_sve_ldffhds_le_zd
,
5375 gen_helper_sve_ldffsds_le_zd
,
5377 { gen_helper_sve_ldffbdu_zd
,
5378 gen_helper_sve_ldffhdu_le_zd
,
5379 gen_helper_sve_ldffsdu_le_zd
,
5380 gen_helper_sve_ldffdd_le_zd
, } } } },
5382 { { { gen_helper_sve_ldbds_zsu
,
5383 gen_helper_sve_ldhds_be_zsu
,
5384 gen_helper_sve_ldsds_be_zsu
,
5386 { gen_helper_sve_ldbdu_zsu
,
5387 gen_helper_sve_ldhdu_be_zsu
,
5388 gen_helper_sve_ldsdu_be_zsu
,
5389 gen_helper_sve_lddd_be_zsu
, } },
5390 { { gen_helper_sve_ldbds_zss
,
5391 gen_helper_sve_ldhds_be_zss
,
5392 gen_helper_sve_ldsds_be_zss
,
5394 { gen_helper_sve_ldbdu_zss
,
5395 gen_helper_sve_ldhdu_be_zss
,
5396 gen_helper_sve_ldsdu_be_zss
,
5397 gen_helper_sve_lddd_be_zss
, } },
5398 { { gen_helper_sve_ldbds_zd
,
5399 gen_helper_sve_ldhds_be_zd
,
5400 gen_helper_sve_ldsds_be_zd
,
5402 { gen_helper_sve_ldbdu_zd
,
5403 gen_helper_sve_ldhdu_be_zd
,
5404 gen_helper_sve_ldsdu_be_zd
,
5405 gen_helper_sve_lddd_be_zd
, } } },
5408 { { { gen_helper_sve_ldffbds_zsu
,
5409 gen_helper_sve_ldffhds_be_zsu
,
5410 gen_helper_sve_ldffsds_be_zsu
,
5412 { gen_helper_sve_ldffbdu_zsu
,
5413 gen_helper_sve_ldffhdu_be_zsu
,
5414 gen_helper_sve_ldffsdu_be_zsu
,
5415 gen_helper_sve_ldffdd_be_zsu
, } },
5416 { { gen_helper_sve_ldffbds_zss
,
5417 gen_helper_sve_ldffhds_be_zss
,
5418 gen_helper_sve_ldffsds_be_zss
,
5420 { gen_helper_sve_ldffbdu_zss
,
5421 gen_helper_sve_ldffhdu_be_zss
,
5422 gen_helper_sve_ldffsdu_be_zss
,
5423 gen_helper_sve_ldffdd_be_zss
, } },
5424 { { gen_helper_sve_ldffbds_zd
,
5425 gen_helper_sve_ldffhds_be_zd
,
5426 gen_helper_sve_ldffsds_be_zd
,
5428 { gen_helper_sve_ldffbdu_zd
,
5429 gen_helper_sve_ldffhdu_be_zd
,
5430 gen_helper_sve_ldffsdu_be_zd
,
5431 gen_helper_sve_ldffdd_be_zd
, } } } } },
5433 { /* Little-endian */
5434 { { { gen_helper_sve_ldbds_zsu_mte
,
5435 gen_helper_sve_ldhds_le_zsu_mte
,
5436 gen_helper_sve_ldsds_le_zsu_mte
,
5438 { gen_helper_sve_ldbdu_zsu_mte
,
5439 gen_helper_sve_ldhdu_le_zsu_mte
,
5440 gen_helper_sve_ldsdu_le_zsu_mte
,
5441 gen_helper_sve_lddd_le_zsu_mte
, } },
5442 { { gen_helper_sve_ldbds_zss_mte
,
5443 gen_helper_sve_ldhds_le_zss_mte
,
5444 gen_helper_sve_ldsds_le_zss_mte
,
5446 { gen_helper_sve_ldbdu_zss_mte
,
5447 gen_helper_sve_ldhdu_le_zss_mte
,
5448 gen_helper_sve_ldsdu_le_zss_mte
,
5449 gen_helper_sve_lddd_le_zss_mte
, } },
5450 { { gen_helper_sve_ldbds_zd_mte
,
5451 gen_helper_sve_ldhds_le_zd_mte
,
5452 gen_helper_sve_ldsds_le_zd_mte
,
5454 { gen_helper_sve_ldbdu_zd_mte
,
5455 gen_helper_sve_ldhdu_le_zd_mte
,
5456 gen_helper_sve_ldsdu_le_zd_mte
,
5457 gen_helper_sve_lddd_le_zd_mte
, } } },
5460 { { { gen_helper_sve_ldffbds_zsu_mte
,
5461 gen_helper_sve_ldffhds_le_zsu_mte
,
5462 gen_helper_sve_ldffsds_le_zsu_mte
,
5464 { gen_helper_sve_ldffbdu_zsu_mte
,
5465 gen_helper_sve_ldffhdu_le_zsu_mte
,
5466 gen_helper_sve_ldffsdu_le_zsu_mte
,
5467 gen_helper_sve_ldffdd_le_zsu_mte
, } },
5468 { { gen_helper_sve_ldffbds_zss_mte
,
5469 gen_helper_sve_ldffhds_le_zss_mte
,
5470 gen_helper_sve_ldffsds_le_zss_mte
,
5472 { gen_helper_sve_ldffbdu_zss_mte
,
5473 gen_helper_sve_ldffhdu_le_zss_mte
,
5474 gen_helper_sve_ldffsdu_le_zss_mte
,
5475 gen_helper_sve_ldffdd_le_zss_mte
, } },
5476 { { gen_helper_sve_ldffbds_zd_mte
,
5477 gen_helper_sve_ldffhds_le_zd_mte
,
5478 gen_helper_sve_ldffsds_le_zd_mte
,
5480 { gen_helper_sve_ldffbdu_zd_mte
,
5481 gen_helper_sve_ldffhdu_le_zd_mte
,
5482 gen_helper_sve_ldffsdu_le_zd_mte
,
5483 gen_helper_sve_ldffdd_le_zd_mte
, } } } },
5485 { { { gen_helper_sve_ldbds_zsu_mte
,
5486 gen_helper_sve_ldhds_be_zsu_mte
,
5487 gen_helper_sve_ldsds_be_zsu_mte
,
5489 { gen_helper_sve_ldbdu_zsu_mte
,
5490 gen_helper_sve_ldhdu_be_zsu_mte
,
5491 gen_helper_sve_ldsdu_be_zsu_mte
,
5492 gen_helper_sve_lddd_be_zsu_mte
, } },
5493 { { gen_helper_sve_ldbds_zss_mte
,
5494 gen_helper_sve_ldhds_be_zss_mte
,
5495 gen_helper_sve_ldsds_be_zss_mte
,
5497 { gen_helper_sve_ldbdu_zss_mte
,
5498 gen_helper_sve_ldhdu_be_zss_mte
,
5499 gen_helper_sve_ldsdu_be_zss_mte
,
5500 gen_helper_sve_lddd_be_zss_mte
, } },
5501 { { gen_helper_sve_ldbds_zd_mte
,
5502 gen_helper_sve_ldhds_be_zd_mte
,
5503 gen_helper_sve_ldsds_be_zd_mte
,
5505 { gen_helper_sve_ldbdu_zd_mte
,
5506 gen_helper_sve_ldhdu_be_zd_mte
,
5507 gen_helper_sve_ldsdu_be_zd_mte
,
5508 gen_helper_sve_lddd_be_zd_mte
, } } },
5511 { { { gen_helper_sve_ldffbds_zsu_mte
,
5512 gen_helper_sve_ldffhds_be_zsu_mte
,
5513 gen_helper_sve_ldffsds_be_zsu_mte
,
5515 { gen_helper_sve_ldffbdu_zsu_mte
,
5516 gen_helper_sve_ldffhdu_be_zsu_mte
,
5517 gen_helper_sve_ldffsdu_be_zsu_mte
,
5518 gen_helper_sve_ldffdd_be_zsu_mte
, } },
5519 { { gen_helper_sve_ldffbds_zss_mte
,
5520 gen_helper_sve_ldffhds_be_zss_mte
,
5521 gen_helper_sve_ldffsds_be_zss_mte
,
5523 { gen_helper_sve_ldffbdu_zss_mte
,
5524 gen_helper_sve_ldffhdu_be_zss_mte
,
5525 gen_helper_sve_ldffsdu_be_zss_mte
,
5526 gen_helper_sve_ldffdd_be_zss_mte
, } },
5527 { { gen_helper_sve_ldffbds_zd_mte
,
5528 gen_helper_sve_ldffhds_be_zd_mte
,
5529 gen_helper_sve_ldffsds_be_zd_mte
,
5531 { gen_helper_sve_ldffbdu_zd_mte
,
5532 gen_helper_sve_ldffhdu_be_zd_mte
,
5533 gen_helper_sve_ldffsdu_be_zd_mte
,
5534 gen_helper_sve_ldffdd_be_zd_mte
, } } } } },
5537 static bool trans_LD1_zprz(DisasContext
*s
, arg_LD1_zprz
*a
)
5539 gen_helper_gvec_mem_scatter
*fn
= NULL
;
5540 bool be
= s
->be_data
== MO_BE
;
5541 bool mte
= s
->mte_active
[0];
5543 if (!sve_access_check(s
)) {
5549 fn
= gather_load_fn32
[mte
][be
][a
->ff
][a
->xs
][a
->u
][a
->msz
];
5552 fn
= gather_load_fn64
[mte
][be
][a
->ff
][a
->xs
][a
->u
][a
->msz
];
5557 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rm
, a
->scale
* a
->msz
,
5558 cpu_reg_sp(s
, a
->rn
), a
->msz
, false, fn
);
5562 static bool trans_LD1_zpiz(DisasContext
*s
, arg_LD1_zpiz
*a
)
5564 gen_helper_gvec_mem_scatter
*fn
= NULL
;
5565 bool be
= s
->be_data
== MO_BE
;
5566 bool mte
= s
->mte_active
[0];
5569 if (a
->esz
< a
->msz
|| (a
->esz
== a
->msz
&& !a
->u
)) {
5572 if (!sve_access_check(s
)) {
5578 fn
= gather_load_fn32
[mte
][be
][a
->ff
][0][a
->u
][a
->msz
];
5581 fn
= gather_load_fn64
[mte
][be
][a
->ff
][2][a
->u
][a
->msz
];
5586 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5587 * by loading the immediate into the scalar parameter.
5589 imm
= tcg_const_i64(a
->imm
<< a
->msz
);
5590 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rn
, 0, imm
, a
->msz
, false, fn
);
5591 tcg_temp_free_i64(imm
);
5595 /* Indexed by [mte][be][xs][msz]. */
5596 static gen_helper_gvec_mem_scatter
* const scatter_store_fn32
[2][2][2][3] = {
5597 { /* MTE Inactive */
5598 { /* Little-endian */
5599 { gen_helper_sve_stbs_zsu
,
5600 gen_helper_sve_sths_le_zsu
,
5601 gen_helper_sve_stss_le_zsu
, },
5602 { gen_helper_sve_stbs_zss
,
5603 gen_helper_sve_sths_le_zss
,
5604 gen_helper_sve_stss_le_zss
, } },
5606 { gen_helper_sve_stbs_zsu
,
5607 gen_helper_sve_sths_be_zsu
,
5608 gen_helper_sve_stss_be_zsu
, },
5609 { gen_helper_sve_stbs_zss
,
5610 gen_helper_sve_sths_be_zss
,
5611 gen_helper_sve_stss_be_zss
, } } },
5613 { /* Little-endian */
5614 { gen_helper_sve_stbs_zsu_mte
,
5615 gen_helper_sve_sths_le_zsu_mte
,
5616 gen_helper_sve_stss_le_zsu_mte
, },
5617 { gen_helper_sve_stbs_zss_mte
,
5618 gen_helper_sve_sths_le_zss_mte
,
5619 gen_helper_sve_stss_le_zss_mte
, } },
5621 { gen_helper_sve_stbs_zsu_mte
,
5622 gen_helper_sve_sths_be_zsu_mte
,
5623 gen_helper_sve_stss_be_zsu_mte
, },
5624 { gen_helper_sve_stbs_zss_mte
,
5625 gen_helper_sve_sths_be_zss_mte
,
5626 gen_helper_sve_stss_be_zss_mte
, } } },
5629 /* Note that we overload xs=2 to indicate 64-bit offset. */
5630 static gen_helper_gvec_mem_scatter
* const scatter_store_fn64
[2][2][3][4] = {
5631 { /* MTE Inactive */
5632 { /* Little-endian */
5633 { gen_helper_sve_stbd_zsu
,
5634 gen_helper_sve_sthd_le_zsu
,
5635 gen_helper_sve_stsd_le_zsu
,
5636 gen_helper_sve_stdd_le_zsu
, },
5637 { gen_helper_sve_stbd_zss
,
5638 gen_helper_sve_sthd_le_zss
,
5639 gen_helper_sve_stsd_le_zss
,
5640 gen_helper_sve_stdd_le_zss
, },
5641 { gen_helper_sve_stbd_zd
,
5642 gen_helper_sve_sthd_le_zd
,
5643 gen_helper_sve_stsd_le_zd
,
5644 gen_helper_sve_stdd_le_zd
, } },
5646 { gen_helper_sve_stbd_zsu
,
5647 gen_helper_sve_sthd_be_zsu
,
5648 gen_helper_sve_stsd_be_zsu
,
5649 gen_helper_sve_stdd_be_zsu
, },
5650 { gen_helper_sve_stbd_zss
,
5651 gen_helper_sve_sthd_be_zss
,
5652 gen_helper_sve_stsd_be_zss
,
5653 gen_helper_sve_stdd_be_zss
, },
5654 { gen_helper_sve_stbd_zd
,
5655 gen_helper_sve_sthd_be_zd
,
5656 gen_helper_sve_stsd_be_zd
,
5657 gen_helper_sve_stdd_be_zd
, } } },
5658 { /* MTE Inactive */
5659 { /* Little-endian */
5660 { gen_helper_sve_stbd_zsu_mte
,
5661 gen_helper_sve_sthd_le_zsu_mte
,
5662 gen_helper_sve_stsd_le_zsu_mte
,
5663 gen_helper_sve_stdd_le_zsu_mte
, },
5664 { gen_helper_sve_stbd_zss_mte
,
5665 gen_helper_sve_sthd_le_zss_mte
,
5666 gen_helper_sve_stsd_le_zss_mte
,
5667 gen_helper_sve_stdd_le_zss_mte
, },
5668 { gen_helper_sve_stbd_zd_mte
,
5669 gen_helper_sve_sthd_le_zd_mte
,
5670 gen_helper_sve_stsd_le_zd_mte
,
5671 gen_helper_sve_stdd_le_zd_mte
, } },
5673 { gen_helper_sve_stbd_zsu_mte
,
5674 gen_helper_sve_sthd_be_zsu_mte
,
5675 gen_helper_sve_stsd_be_zsu_mte
,
5676 gen_helper_sve_stdd_be_zsu_mte
, },
5677 { gen_helper_sve_stbd_zss_mte
,
5678 gen_helper_sve_sthd_be_zss_mte
,
5679 gen_helper_sve_stsd_be_zss_mte
,
5680 gen_helper_sve_stdd_be_zss_mte
, },
5681 { gen_helper_sve_stbd_zd_mte
,
5682 gen_helper_sve_sthd_be_zd_mte
,
5683 gen_helper_sve_stsd_be_zd_mte
,
5684 gen_helper_sve_stdd_be_zd_mte
, } } },
5687 static bool trans_ST1_zprz(DisasContext
*s
, arg_ST1_zprz
*a
)
5689 gen_helper_gvec_mem_scatter
*fn
;
5690 bool be
= s
->be_data
== MO_BE
;
5691 bool mte
= s
->mte_active
[0];
5693 if (a
->esz
< a
->msz
|| (a
->msz
== 0 && a
->scale
)) {
5696 if (!sve_access_check(s
)) {
5701 fn
= scatter_store_fn32
[mte
][be
][a
->xs
][a
->msz
];
5704 fn
= scatter_store_fn64
[mte
][be
][a
->xs
][a
->msz
];
5707 g_assert_not_reached();
5709 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rm
, a
->scale
* a
->msz
,
5710 cpu_reg_sp(s
, a
->rn
), a
->msz
, true, fn
);
5714 static bool trans_ST1_zpiz(DisasContext
*s
, arg_ST1_zpiz
*a
)
5716 gen_helper_gvec_mem_scatter
*fn
= NULL
;
5717 bool be
= s
->be_data
== MO_BE
;
5718 bool mte
= s
->mte_active
[0];
5721 if (a
->esz
< a
->msz
) {
5724 if (!sve_access_check(s
)) {
5730 fn
= scatter_store_fn32
[mte
][be
][0][a
->msz
];
5733 fn
= scatter_store_fn64
[mte
][be
][2][a
->msz
];
5738 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5739 * by loading the immediate into the scalar parameter.
5741 imm
= tcg_const_i64(a
->imm
<< a
->msz
);
5742 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rn
, 0, imm
, a
->msz
, true, fn
);
5743 tcg_temp_free_i64(imm
);
5751 static bool trans_PRF(DisasContext
*s
, arg_PRF
*a
)
5753 /* Prefetch is a nop within QEMU. */
5754 (void)sve_access_check(s
);
5758 static bool trans_PRF_rr(DisasContext
*s
, arg_PRF_rr
*a
)
5763 /* Prefetch is a nop within QEMU. */
5764 (void)sve_access_check(s
);
5771 * TODO: The implementation so far could handle predicated merging movprfx.
5772 * The helper functions as written take an extra source register to
5773 * use in the operation, but the result is only written when predication
5774 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
5775 * to allow the final write back to the destination to be unconditional.
5776 * For predicated zeroing movprfx, we need to rearrange the helpers to
5777 * allow the final write back to zero inactives.
5779 * In the meantime, just emit the moves.
5782 static bool trans_MOVPRFX(DisasContext
*s
, arg_MOVPRFX
*a
)
5784 return do_mov_z(s
, a
->rd
, a
->rn
);
5787 static bool trans_MOVPRFX_m(DisasContext
*s
, arg_rpr_esz
*a
)
5789 if (sve_access_check(s
)) {
5790 do_sel_z(s
, a
->rd
, a
->rn
, a
->rd
, a
->pg
, a
->esz
);
5795 static bool trans_MOVPRFX_z(DisasContext
*s
, arg_rpr_esz
*a
)
5797 return do_movz_zpz(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
, false);