2 * AArch64 SVE translation
4 * Copyright (c) 2018 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
22 #include "exec/exec-all.h"
24 #include "tcg-op-gvec.h"
25 #include "tcg-gvec-desc.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
35 #include "fpu/softfloat.h"
38 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
39 TCGv_i64
, uint32_t, uint32_t);
41 typedef void gen_helper_gvec_flags_3(TCGv_i32
, TCGv_ptr
, TCGv_ptr
,
43 typedef void gen_helper_gvec_flags_4(TCGv_i32
, TCGv_ptr
, TCGv_ptr
,
44 TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
46 typedef void gen_helper_gvec_mem(TCGv_env
, TCGv_ptr
, TCGv_i64
, TCGv_i32
);
47 typedef void gen_helper_gvec_mem_scatter(TCGv_env
, TCGv_ptr
, TCGv_ptr
,
48 TCGv_ptr
, TCGv_i64
, TCGv_i32
);
51 * Helpers for extracting complex instruction fields.
54 /* See e.g. ASR (immediate, predicated).
55 * Returns -1 for unallocated encoding; diagnose later.
57 static int tszimm_esz(int x
)
59 x
>>= 3; /* discard imm3 */
63 static int tszimm_shr(int x
)
65 return (16 << tszimm_esz(x
)) - x
;
68 /* See e.g. LSL (immediate, predicated). */
69 static int tszimm_shl(int x
)
71 return x
- (8 << tszimm_esz(x
));
74 static inline int plus1(int x
)
79 /* The SH bit is in bit 8. Extract the low 8 and shift. */
80 static inline int expand_imm_sh8s(int x
)
82 return (int8_t)x
<< (x
& 0x100 ? 8 : 0);
85 static inline int expand_imm_sh8u(int x
)
87 return (uint8_t)x
<< (x
& 0x100 ? 8 : 0);
90 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
91 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
93 static inline int msz_dtype(int msz
)
95 static const uint8_t dtype
[4] = { 0, 5, 10, 15 };
100 * Include the generated decoder.
103 #include "decode-sve.inc.c"
106 * Implement all of the translator functions referenced by the decoder.
109 /* Return the offset info CPUARMState of the predicate vector register Pn.
110 * Note for this purpose, FFR is P16.
112 static inline int pred_full_reg_offset(DisasContext
*s
, int regno
)
114 return offsetof(CPUARMState
, vfp
.pregs
[regno
]);
117 /* Return the byte size of the whole predicate register, VL / 64. */
118 static inline int pred_full_reg_size(DisasContext
*s
)
120 return s
->sve_len
>> 3;
123 /* Round up the size of a register to a size allowed by
124 * the tcg vector infrastructure. Any operation which uses this
125 * size may assume that the bits above pred_full_reg_size are zero,
126 * and must leave them the same way.
128 * Note that this is not needed for the vector registers as they
129 * are always properly sized for tcg vectors.
131 static int size_for_gvec(int size
)
136 return QEMU_ALIGN_UP(size
, 16);
140 static int pred_gvec_reg_size(DisasContext
*s
)
142 return size_for_gvec(pred_full_reg_size(s
));
145 /* Invoke a vector expander on two Zregs. */
146 static bool do_vector2_z(DisasContext
*s
, GVecGen2Fn
*gvec_fn
,
147 int esz
, int rd
, int rn
)
149 if (sve_access_check(s
)) {
150 unsigned vsz
= vec_full_reg_size(s
);
151 gvec_fn(esz
, vec_full_reg_offset(s
, rd
),
152 vec_full_reg_offset(s
, rn
), vsz
, vsz
);
157 /* Invoke a vector expander on three Zregs. */
158 static bool do_vector3_z(DisasContext
*s
, GVecGen3Fn
*gvec_fn
,
159 int esz
, int rd
, int rn
, int rm
)
161 if (sve_access_check(s
)) {
162 unsigned vsz
= vec_full_reg_size(s
);
163 gvec_fn(esz
, vec_full_reg_offset(s
, rd
),
164 vec_full_reg_offset(s
, rn
),
165 vec_full_reg_offset(s
, rm
), vsz
, vsz
);
170 /* Invoke a vector move on two Zregs. */
171 static bool do_mov_z(DisasContext
*s
, int rd
, int rn
)
173 return do_vector2_z(s
, tcg_gen_gvec_mov
, 0, rd
, rn
);
176 /* Initialize a Zreg with replications of a 64-bit immediate. */
177 static void do_dupi_z(DisasContext
*s
, int rd
, uint64_t word
)
179 unsigned vsz
= vec_full_reg_size(s
);
180 tcg_gen_gvec_dup64i(vec_full_reg_offset(s
, rd
), vsz
, vsz
, word
);
183 /* Invoke a vector expander on two Pregs. */
184 static bool do_vector2_p(DisasContext
*s
, GVecGen2Fn
*gvec_fn
,
185 int esz
, int rd
, int rn
)
187 if (sve_access_check(s
)) {
188 unsigned psz
= pred_gvec_reg_size(s
);
189 gvec_fn(esz
, pred_full_reg_offset(s
, rd
),
190 pred_full_reg_offset(s
, rn
), psz
, psz
);
195 /* Invoke a vector expander on three Pregs. */
196 static bool do_vector3_p(DisasContext
*s
, GVecGen3Fn
*gvec_fn
,
197 int esz
, int rd
, int rn
, int rm
)
199 if (sve_access_check(s
)) {
200 unsigned psz
= pred_gvec_reg_size(s
);
201 gvec_fn(esz
, pred_full_reg_offset(s
, rd
),
202 pred_full_reg_offset(s
, rn
),
203 pred_full_reg_offset(s
, rm
), psz
, psz
);
208 /* Invoke a vector operation on four Pregs. */
209 static bool do_vecop4_p(DisasContext
*s
, const GVecGen4
*gvec_op
,
210 int rd
, int rn
, int rm
, int rg
)
212 if (sve_access_check(s
)) {
213 unsigned psz
= pred_gvec_reg_size(s
);
214 tcg_gen_gvec_4(pred_full_reg_offset(s
, rd
),
215 pred_full_reg_offset(s
, rn
),
216 pred_full_reg_offset(s
, rm
),
217 pred_full_reg_offset(s
, rg
),
223 /* Invoke a vector move on two Pregs. */
224 static bool do_mov_p(DisasContext
*s
, int rd
, int rn
)
226 return do_vector2_p(s
, tcg_gen_gvec_mov
, 0, rd
, rn
);
229 /* Set the cpu flags as per a return from an SVE helper. */
230 static void do_pred_flags(TCGv_i32 t
)
232 tcg_gen_mov_i32(cpu_NF
, t
);
233 tcg_gen_andi_i32(cpu_ZF
, t
, 2);
234 tcg_gen_andi_i32(cpu_CF
, t
, 1);
235 tcg_gen_movi_i32(cpu_VF
, 0);
238 /* Subroutines computing the ARM PredTest psuedofunction. */
239 static void do_predtest1(TCGv_i64 d
, TCGv_i64 g
)
241 TCGv_i32 t
= tcg_temp_new_i32();
243 gen_helper_sve_predtest1(t
, d
, g
);
245 tcg_temp_free_i32(t
);
248 static void do_predtest(DisasContext
*s
, int dofs
, int gofs
, int words
)
250 TCGv_ptr dptr
= tcg_temp_new_ptr();
251 TCGv_ptr gptr
= tcg_temp_new_ptr();
254 tcg_gen_addi_ptr(dptr
, cpu_env
, dofs
);
255 tcg_gen_addi_ptr(gptr
, cpu_env
, gofs
);
256 t
= tcg_const_i32(words
);
258 gen_helper_sve_predtest(t
, dptr
, gptr
, t
);
259 tcg_temp_free_ptr(dptr
);
260 tcg_temp_free_ptr(gptr
);
263 tcg_temp_free_i32(t
);
266 /* For each element size, the bits within a predicate word that are active. */
267 const uint64_t pred_esz_masks
[4] = {
268 0xffffffffffffffffull
, 0x5555555555555555ull
,
269 0x1111111111111111ull
, 0x0101010101010101ull
273 *** SVE Logical - Unpredicated Group
276 static bool trans_AND_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
278 return do_vector3_z(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->rm
);
281 static bool trans_ORR_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
283 return do_vector3_z(s
, tcg_gen_gvec_or
, 0, a
->rd
, a
->rn
, a
->rm
);
286 static bool trans_EOR_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
288 return do_vector3_z(s
, tcg_gen_gvec_xor
, 0, a
->rd
, a
->rn
, a
->rm
);
291 static bool trans_BIC_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
293 return do_vector3_z(s
, tcg_gen_gvec_andc
, 0, a
->rd
, a
->rn
, a
->rm
);
297 *** SVE Integer Arithmetic - Unpredicated Group
300 static bool trans_ADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
302 return do_vector3_z(s
, tcg_gen_gvec_add
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
305 static bool trans_SUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
307 return do_vector3_z(s
, tcg_gen_gvec_sub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
310 static bool trans_SQADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
312 return do_vector3_z(s
, tcg_gen_gvec_ssadd
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
315 static bool trans_SQSUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
317 return do_vector3_z(s
, tcg_gen_gvec_sssub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
320 static bool trans_UQADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
322 return do_vector3_z(s
, tcg_gen_gvec_usadd
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
325 static bool trans_UQSUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
327 return do_vector3_z(s
, tcg_gen_gvec_ussub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
331 *** SVE Integer Arithmetic - Binary Predicated Group
334 static bool do_zpzz_ool(DisasContext
*s
, arg_rprr_esz
*a
, gen_helper_gvec_4
*fn
)
336 unsigned vsz
= vec_full_reg_size(s
);
340 if (sve_access_check(s
)) {
341 tcg_gen_gvec_4_ool(vec_full_reg_offset(s
, a
->rd
),
342 vec_full_reg_offset(s
, a
->rn
),
343 vec_full_reg_offset(s
, a
->rm
),
344 pred_full_reg_offset(s
, a
->pg
),
350 /* Select active elememnts from Zn and inactive elements from Zm,
351 * storing the result in Zd.
353 static void do_sel_z(DisasContext
*s
, int rd
, int rn
, int rm
, int pg
, int esz
)
355 static gen_helper_gvec_4
* const fns
[4] = {
356 gen_helper_sve_sel_zpzz_b
, gen_helper_sve_sel_zpzz_h
,
357 gen_helper_sve_sel_zpzz_s
, gen_helper_sve_sel_zpzz_d
359 unsigned vsz
= vec_full_reg_size(s
);
360 tcg_gen_gvec_4_ool(vec_full_reg_offset(s
, rd
),
361 vec_full_reg_offset(s
, rn
),
362 vec_full_reg_offset(s
, rm
),
363 pred_full_reg_offset(s
, pg
),
364 vsz
, vsz
, 0, fns
[esz
]);
367 #define DO_ZPZZ(NAME, name) \
368 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a) \
370 static gen_helper_gvec_4 * const fns[4] = { \
371 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
372 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
374 return do_zpzz_ool(s, a, fns[a->esz]); \
393 DO_ZPZZ(SMULH
, smulh
)
394 DO_ZPZZ(UMULH
, umulh
)
400 static bool trans_SDIV_zpzz(DisasContext
*s
, arg_rprr_esz
*a
)
402 static gen_helper_gvec_4
* const fns
[4] = {
403 NULL
, NULL
, gen_helper_sve_sdiv_zpzz_s
, gen_helper_sve_sdiv_zpzz_d
405 return do_zpzz_ool(s
, a
, fns
[a
->esz
]);
408 static bool trans_UDIV_zpzz(DisasContext
*s
, arg_rprr_esz
*a
)
410 static gen_helper_gvec_4
* const fns
[4] = {
411 NULL
, NULL
, gen_helper_sve_udiv_zpzz_s
, gen_helper_sve_udiv_zpzz_d
413 return do_zpzz_ool(s
, a
, fns
[a
->esz
]);
416 static bool trans_SEL_zpzz(DisasContext
*s
, arg_rprr_esz
*a
)
418 if (sve_access_check(s
)) {
419 do_sel_z(s
, a
->rd
, a
->rn
, a
->rm
, a
->pg
, a
->esz
);
427 *** SVE Integer Arithmetic - Unary Predicated Group
430 static bool do_zpz_ool(DisasContext
*s
, arg_rpr_esz
*a
, gen_helper_gvec_3
*fn
)
435 if (sve_access_check(s
)) {
436 unsigned vsz
= vec_full_reg_size(s
);
437 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
438 vec_full_reg_offset(s
, a
->rn
),
439 pred_full_reg_offset(s
, a
->pg
),
445 #define DO_ZPZ(NAME, name) \
446 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
448 static gen_helper_gvec_3 * const fns[4] = { \
449 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
450 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
452 return do_zpz_ool(s, a, fns[a->esz]); \
457 DO_ZPZ(CNT_zpz
, cnt_zpz
)
459 DO_ZPZ(NOT_zpz
, not_zpz
)
463 static bool trans_FABS(DisasContext
*s
, arg_rpr_esz
*a
)
465 static gen_helper_gvec_3
* const fns
[4] = {
467 gen_helper_sve_fabs_h
,
468 gen_helper_sve_fabs_s
,
469 gen_helper_sve_fabs_d
471 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
474 static bool trans_FNEG(DisasContext
*s
, arg_rpr_esz
*a
)
476 static gen_helper_gvec_3
* const fns
[4] = {
478 gen_helper_sve_fneg_h
,
479 gen_helper_sve_fneg_s
,
480 gen_helper_sve_fneg_d
482 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
485 static bool trans_SXTB(DisasContext
*s
, arg_rpr_esz
*a
)
487 static gen_helper_gvec_3
* const fns
[4] = {
489 gen_helper_sve_sxtb_h
,
490 gen_helper_sve_sxtb_s
,
491 gen_helper_sve_sxtb_d
493 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
496 static bool trans_UXTB(DisasContext
*s
, arg_rpr_esz
*a
)
498 static gen_helper_gvec_3
* const fns
[4] = {
500 gen_helper_sve_uxtb_h
,
501 gen_helper_sve_uxtb_s
,
502 gen_helper_sve_uxtb_d
504 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
507 static bool trans_SXTH(DisasContext
*s
, arg_rpr_esz
*a
)
509 static gen_helper_gvec_3
* const fns
[4] = {
511 gen_helper_sve_sxth_s
,
512 gen_helper_sve_sxth_d
514 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
517 static bool trans_UXTH(DisasContext
*s
, arg_rpr_esz
*a
)
519 static gen_helper_gvec_3
* const fns
[4] = {
521 gen_helper_sve_uxth_s
,
522 gen_helper_sve_uxth_d
524 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
527 static bool trans_SXTW(DisasContext
*s
, arg_rpr_esz
*a
)
529 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_sxtw_d
: NULL
);
532 static bool trans_UXTW(DisasContext
*s
, arg_rpr_esz
*a
)
534 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_uxtw_d
: NULL
);
540 *** SVE Integer Reduction Group
543 typedef void gen_helper_gvec_reduc(TCGv_i64
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
544 static bool do_vpz_ool(DisasContext
*s
, arg_rpr_esz
*a
,
545 gen_helper_gvec_reduc
*fn
)
547 unsigned vsz
= vec_full_reg_size(s
);
555 if (!sve_access_check(s
)) {
559 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
560 temp
= tcg_temp_new_i64();
561 t_zn
= tcg_temp_new_ptr();
562 t_pg
= tcg_temp_new_ptr();
564 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
565 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
566 fn(temp
, t_zn
, t_pg
, desc
);
567 tcg_temp_free_ptr(t_zn
);
568 tcg_temp_free_ptr(t_pg
);
569 tcg_temp_free_i32(desc
);
571 write_fp_dreg(s
, a
->rd
, temp
);
572 tcg_temp_free_i64(temp
);
576 #define DO_VPZ(NAME, name) \
577 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
579 static gen_helper_gvec_reduc * const fns[4] = { \
580 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
581 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
583 return do_vpz_ool(s, a, fns[a->esz]); \
596 static bool trans_SADDV(DisasContext
*s
, arg_rpr_esz
*a
)
598 static gen_helper_gvec_reduc
* const fns
[4] = {
599 gen_helper_sve_saddv_b
, gen_helper_sve_saddv_h
,
600 gen_helper_sve_saddv_s
, NULL
602 return do_vpz_ool(s
, a
, fns
[a
->esz
]);
608 *** SVE Shift by Immediate - Predicated Group
611 /* Store zero into every active element of Zd. We will use this for two
612 * and three-operand predicated instructions for which logic dictates a
615 static bool do_clr_zp(DisasContext
*s
, int rd
, int pg
, int esz
)
617 static gen_helper_gvec_2
* const fns
[4] = {
618 gen_helper_sve_clr_b
, gen_helper_sve_clr_h
,
619 gen_helper_sve_clr_s
, gen_helper_sve_clr_d
,
621 if (sve_access_check(s
)) {
622 unsigned vsz
= vec_full_reg_size(s
);
623 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, rd
),
624 pred_full_reg_offset(s
, pg
),
625 vsz
, vsz
, 0, fns
[esz
]);
630 /* Copy Zn into Zd, storing zeros into inactive elements. */
631 static void do_movz_zpz(DisasContext
*s
, int rd
, int rn
, int pg
, int esz
)
633 static gen_helper_gvec_3
* const fns
[4] = {
634 gen_helper_sve_movz_b
, gen_helper_sve_movz_h
,
635 gen_helper_sve_movz_s
, gen_helper_sve_movz_d
,
637 unsigned vsz
= vec_full_reg_size(s
);
638 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, rd
),
639 vec_full_reg_offset(s
, rn
),
640 pred_full_reg_offset(s
, pg
),
641 vsz
, vsz
, 0, fns
[esz
]);
644 static bool do_zpzi_ool(DisasContext
*s
, arg_rpri_esz
*a
,
645 gen_helper_gvec_3
*fn
)
647 if (sve_access_check(s
)) {
648 unsigned vsz
= vec_full_reg_size(s
);
649 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
650 vec_full_reg_offset(s
, a
->rn
),
651 pred_full_reg_offset(s
, a
->pg
),
652 vsz
, vsz
, a
->imm
, fn
);
657 static bool trans_ASR_zpzi(DisasContext
*s
, arg_rpri_esz
*a
)
659 static gen_helper_gvec_3
* const fns
[4] = {
660 gen_helper_sve_asr_zpzi_b
, gen_helper_sve_asr_zpzi_h
,
661 gen_helper_sve_asr_zpzi_s
, gen_helper_sve_asr_zpzi_d
,
664 /* Invalid tsz encoding -- see tszimm_esz. */
667 /* Shift by element size is architecturally valid. For
668 arithmetic right-shift, it's the same as by one less. */
669 a
->imm
= MIN(a
->imm
, (8 << a
->esz
) - 1);
670 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
673 static bool trans_LSR_zpzi(DisasContext
*s
, arg_rpri_esz
*a
)
675 static gen_helper_gvec_3
* const fns
[4] = {
676 gen_helper_sve_lsr_zpzi_b
, gen_helper_sve_lsr_zpzi_h
,
677 gen_helper_sve_lsr_zpzi_s
, gen_helper_sve_lsr_zpzi_d
,
682 /* Shift by element size is architecturally valid.
683 For logical shifts, it is a zeroing operation. */
684 if (a
->imm
>= (8 << a
->esz
)) {
685 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
687 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
691 static bool trans_LSL_zpzi(DisasContext
*s
, arg_rpri_esz
*a
)
693 static gen_helper_gvec_3
* const fns
[4] = {
694 gen_helper_sve_lsl_zpzi_b
, gen_helper_sve_lsl_zpzi_h
,
695 gen_helper_sve_lsl_zpzi_s
, gen_helper_sve_lsl_zpzi_d
,
700 /* Shift by element size is architecturally valid.
701 For logical shifts, it is a zeroing operation. */
702 if (a
->imm
>= (8 << a
->esz
)) {
703 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
705 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
709 static bool trans_ASRD(DisasContext
*s
, arg_rpri_esz
*a
)
711 static gen_helper_gvec_3
* const fns
[4] = {
712 gen_helper_sve_asrd_b
, gen_helper_sve_asrd_h
,
713 gen_helper_sve_asrd_s
, gen_helper_sve_asrd_d
,
718 /* Shift by element size is architecturally valid. For arithmetic
719 right shift for division, it is a zeroing operation. */
720 if (a
->imm
>= (8 << a
->esz
)) {
721 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
723 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
728 *** SVE Bitwise Shift - Predicated Group
731 #define DO_ZPZW(NAME, name) \
732 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a) \
734 static gen_helper_gvec_4 * const fns[3] = { \
735 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
736 gen_helper_sve_##name##_zpzw_s, \
738 if (a->esz < 0 || a->esz >= 3) { \
741 return do_zpzz_ool(s, a, fns[a->esz]); \
751 *** SVE Bitwise Shift - Unpredicated Group
754 static bool do_shift_imm(DisasContext
*s
, arg_rri_esz
*a
, bool asr
,
755 void (*gvec_fn
)(unsigned, uint32_t, uint32_t,
756 int64_t, uint32_t, uint32_t))
759 /* Invalid tsz encoding -- see tszimm_esz. */
762 if (sve_access_check(s
)) {
763 unsigned vsz
= vec_full_reg_size(s
);
764 /* Shift by element size is architecturally valid. For
765 arithmetic right-shift, it's the same as by one less.
766 Otherwise it is a zeroing operation. */
767 if (a
->imm
>= 8 << a
->esz
) {
769 a
->imm
= (8 << a
->esz
) - 1;
771 do_dupi_z(s
, a
->rd
, 0);
775 gvec_fn(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
776 vec_full_reg_offset(s
, a
->rn
), a
->imm
, vsz
, vsz
);
781 static bool trans_ASR_zzi(DisasContext
*s
, arg_rri_esz
*a
)
783 return do_shift_imm(s
, a
, true, tcg_gen_gvec_sari
);
786 static bool trans_LSR_zzi(DisasContext
*s
, arg_rri_esz
*a
)
788 return do_shift_imm(s
, a
, false, tcg_gen_gvec_shri
);
791 static bool trans_LSL_zzi(DisasContext
*s
, arg_rri_esz
*a
)
793 return do_shift_imm(s
, a
, false, tcg_gen_gvec_shli
);
796 static bool do_zzw_ool(DisasContext
*s
, arg_rrr_esz
*a
, gen_helper_gvec_3
*fn
)
801 if (sve_access_check(s
)) {
802 unsigned vsz
= vec_full_reg_size(s
);
803 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
804 vec_full_reg_offset(s
, a
->rn
),
805 vec_full_reg_offset(s
, a
->rm
),
811 #define DO_ZZW(NAME, name) \
812 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a) \
814 static gen_helper_gvec_3 * const fns[4] = { \
815 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
816 gen_helper_sve_##name##_zzw_s, NULL \
818 return do_zzw_ool(s, a, fns[a->esz]); \
828 *** SVE Integer Multiply-Add Group
831 static bool do_zpzzz_ool(DisasContext
*s
, arg_rprrr_esz
*a
,
832 gen_helper_gvec_5
*fn
)
834 if (sve_access_check(s
)) {
835 unsigned vsz
= vec_full_reg_size(s
);
836 tcg_gen_gvec_5_ool(vec_full_reg_offset(s
, a
->rd
),
837 vec_full_reg_offset(s
, a
->ra
),
838 vec_full_reg_offset(s
, a
->rn
),
839 vec_full_reg_offset(s
, a
->rm
),
840 pred_full_reg_offset(s
, a
->pg
),
846 #define DO_ZPZZZ(NAME, name) \
847 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
849 static gen_helper_gvec_5 * const fns[4] = { \
850 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
851 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
853 return do_zpzzz_ool(s, a, fns[a->esz]); \
862 *** SVE Index Generation Group
865 static void do_index(DisasContext
*s
, int esz
, int rd
,
866 TCGv_i64 start
, TCGv_i64 incr
)
868 unsigned vsz
= vec_full_reg_size(s
);
869 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
870 TCGv_ptr t_zd
= tcg_temp_new_ptr();
872 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, rd
));
874 gen_helper_sve_index_d(t_zd
, start
, incr
, desc
);
876 typedef void index_fn(TCGv_ptr
, TCGv_i32
, TCGv_i32
, TCGv_i32
);
877 static index_fn
* const fns
[3] = {
878 gen_helper_sve_index_b
,
879 gen_helper_sve_index_h
,
880 gen_helper_sve_index_s
,
882 TCGv_i32 s32
= tcg_temp_new_i32();
883 TCGv_i32 i32
= tcg_temp_new_i32();
885 tcg_gen_extrl_i64_i32(s32
, start
);
886 tcg_gen_extrl_i64_i32(i32
, incr
);
887 fns
[esz
](t_zd
, s32
, i32
, desc
);
889 tcg_temp_free_i32(s32
);
890 tcg_temp_free_i32(i32
);
892 tcg_temp_free_ptr(t_zd
);
893 tcg_temp_free_i32(desc
);
896 static bool trans_INDEX_ii(DisasContext
*s
, arg_INDEX_ii
*a
)
898 if (sve_access_check(s
)) {
899 TCGv_i64 start
= tcg_const_i64(a
->imm1
);
900 TCGv_i64 incr
= tcg_const_i64(a
->imm2
);
901 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
902 tcg_temp_free_i64(start
);
903 tcg_temp_free_i64(incr
);
908 static bool trans_INDEX_ir(DisasContext
*s
, arg_INDEX_ir
*a
)
910 if (sve_access_check(s
)) {
911 TCGv_i64 start
= tcg_const_i64(a
->imm
);
912 TCGv_i64 incr
= cpu_reg(s
, a
->rm
);
913 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
914 tcg_temp_free_i64(start
);
919 static bool trans_INDEX_ri(DisasContext
*s
, arg_INDEX_ri
*a
)
921 if (sve_access_check(s
)) {
922 TCGv_i64 start
= cpu_reg(s
, a
->rn
);
923 TCGv_i64 incr
= tcg_const_i64(a
->imm
);
924 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
925 tcg_temp_free_i64(incr
);
930 static bool trans_INDEX_rr(DisasContext
*s
, arg_INDEX_rr
*a
)
932 if (sve_access_check(s
)) {
933 TCGv_i64 start
= cpu_reg(s
, a
->rn
);
934 TCGv_i64 incr
= cpu_reg(s
, a
->rm
);
935 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
941 *** SVE Stack Allocation Group
944 static bool trans_ADDVL(DisasContext
*s
, arg_ADDVL
*a
)
946 TCGv_i64 rd
= cpu_reg_sp(s
, a
->rd
);
947 TCGv_i64 rn
= cpu_reg_sp(s
, a
->rn
);
948 tcg_gen_addi_i64(rd
, rn
, a
->imm
* vec_full_reg_size(s
));
952 static bool trans_ADDPL(DisasContext
*s
, arg_ADDPL
*a
)
954 TCGv_i64 rd
= cpu_reg_sp(s
, a
->rd
);
955 TCGv_i64 rn
= cpu_reg_sp(s
, a
->rn
);
956 tcg_gen_addi_i64(rd
, rn
, a
->imm
* pred_full_reg_size(s
));
960 static bool trans_RDVL(DisasContext
*s
, arg_RDVL
*a
)
962 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
963 tcg_gen_movi_i64(reg
, a
->imm
* vec_full_reg_size(s
));
968 *** SVE Compute Vector Address Group
971 static bool do_adr(DisasContext
*s
, arg_rrri
*a
, gen_helper_gvec_3
*fn
)
973 if (sve_access_check(s
)) {
974 unsigned vsz
= vec_full_reg_size(s
);
975 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
976 vec_full_reg_offset(s
, a
->rn
),
977 vec_full_reg_offset(s
, a
->rm
),
978 vsz
, vsz
, a
->imm
, fn
);
983 static bool trans_ADR_p32(DisasContext
*s
, arg_rrri
*a
)
985 return do_adr(s
, a
, gen_helper_sve_adr_p32
);
988 static bool trans_ADR_p64(DisasContext
*s
, arg_rrri
*a
)
990 return do_adr(s
, a
, gen_helper_sve_adr_p64
);
993 static bool trans_ADR_s32(DisasContext
*s
, arg_rrri
*a
)
995 return do_adr(s
, a
, gen_helper_sve_adr_s32
);
998 static bool trans_ADR_u32(DisasContext
*s
, arg_rrri
*a
)
1000 return do_adr(s
, a
, gen_helper_sve_adr_u32
);
1004 *** SVE Integer Misc - Unpredicated Group
1007 static bool trans_FEXPA(DisasContext
*s
, arg_rr_esz
*a
)
1009 static gen_helper_gvec_2
* const fns
[4] = {
1011 gen_helper_sve_fexpa_h
,
1012 gen_helper_sve_fexpa_s
,
1013 gen_helper_sve_fexpa_d
,
1018 if (sve_access_check(s
)) {
1019 unsigned vsz
= vec_full_reg_size(s
);
1020 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, a
->rd
),
1021 vec_full_reg_offset(s
, a
->rn
),
1022 vsz
, vsz
, 0, fns
[a
->esz
]);
1027 static bool trans_FTSSEL(DisasContext
*s
, arg_rrr_esz
*a
)
1029 static gen_helper_gvec_3
* const fns
[4] = {
1031 gen_helper_sve_ftssel_h
,
1032 gen_helper_sve_ftssel_s
,
1033 gen_helper_sve_ftssel_d
,
1038 if (sve_access_check(s
)) {
1039 unsigned vsz
= vec_full_reg_size(s
);
1040 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
1041 vec_full_reg_offset(s
, a
->rn
),
1042 vec_full_reg_offset(s
, a
->rm
),
1043 vsz
, vsz
, 0, fns
[a
->esz
]);
1049 *** SVE Predicate Logical Operations Group
1052 static bool do_pppp_flags(DisasContext
*s
, arg_rprr_s
*a
,
1053 const GVecGen4
*gvec_op
)
1055 if (!sve_access_check(s
)) {
1059 unsigned psz
= pred_gvec_reg_size(s
);
1060 int dofs
= pred_full_reg_offset(s
, a
->rd
);
1061 int nofs
= pred_full_reg_offset(s
, a
->rn
);
1062 int mofs
= pred_full_reg_offset(s
, a
->rm
);
1063 int gofs
= pred_full_reg_offset(s
, a
->pg
);
1066 /* Do the operation and the flags generation in temps. */
1067 TCGv_i64 pd
= tcg_temp_new_i64();
1068 TCGv_i64 pn
= tcg_temp_new_i64();
1069 TCGv_i64 pm
= tcg_temp_new_i64();
1070 TCGv_i64 pg
= tcg_temp_new_i64();
1072 tcg_gen_ld_i64(pn
, cpu_env
, nofs
);
1073 tcg_gen_ld_i64(pm
, cpu_env
, mofs
);
1074 tcg_gen_ld_i64(pg
, cpu_env
, gofs
);
1076 gvec_op
->fni8(pd
, pn
, pm
, pg
);
1077 tcg_gen_st_i64(pd
, cpu_env
, dofs
);
1079 do_predtest1(pd
, pg
);
1081 tcg_temp_free_i64(pd
);
1082 tcg_temp_free_i64(pn
);
1083 tcg_temp_free_i64(pm
);
1084 tcg_temp_free_i64(pg
);
1086 /* The operation and flags generation is large. The computation
1087 * of the flags depends on the original contents of the guarding
1088 * predicate. If the destination overwrites the guarding predicate,
1089 * then the easiest way to get this right is to save a copy.
1092 if (a
->rd
== a
->pg
) {
1093 tofs
= offsetof(CPUARMState
, vfp
.preg_tmp
);
1094 tcg_gen_gvec_mov(0, tofs
, gofs
, psz
, psz
);
1097 tcg_gen_gvec_4(dofs
, nofs
, mofs
, gofs
, psz
, psz
, gvec_op
);
1098 do_predtest(s
, dofs
, tofs
, psz
/ 8);
1103 static void gen_and_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1105 tcg_gen_and_i64(pd
, pn
, pm
);
1106 tcg_gen_and_i64(pd
, pd
, pg
);
1109 static void gen_and_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1110 TCGv_vec pm
, TCGv_vec pg
)
1112 tcg_gen_and_vec(vece
, pd
, pn
, pm
);
1113 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1116 static bool trans_AND_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1118 static const GVecGen4 op
= {
1119 .fni8
= gen_and_pg_i64
,
1120 .fniv
= gen_and_pg_vec
,
1121 .fno
= gen_helper_sve_and_pppp
,
1122 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1125 return do_pppp_flags(s
, a
, &op
);
1126 } else if (a
->rn
== a
->rm
) {
1127 if (a
->pg
== a
->rn
) {
1128 return do_mov_p(s
, a
->rd
, a
->rn
);
1130 return do_vector3_p(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->pg
);
1132 } else if (a
->pg
== a
->rn
|| a
->pg
== a
->rm
) {
1133 return do_vector3_p(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->rm
);
1135 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1139 static void gen_bic_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1141 tcg_gen_andc_i64(pd
, pn
, pm
);
1142 tcg_gen_and_i64(pd
, pd
, pg
);
1145 static void gen_bic_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1146 TCGv_vec pm
, TCGv_vec pg
)
1148 tcg_gen_andc_vec(vece
, pd
, pn
, pm
);
1149 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1152 static bool trans_BIC_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1154 static const GVecGen4 op
= {
1155 .fni8
= gen_bic_pg_i64
,
1156 .fniv
= gen_bic_pg_vec
,
1157 .fno
= gen_helper_sve_bic_pppp
,
1158 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1161 return do_pppp_flags(s
, a
, &op
);
1162 } else if (a
->pg
== a
->rn
) {
1163 return do_vector3_p(s
, tcg_gen_gvec_andc
, 0, a
->rd
, a
->rn
, a
->rm
);
1165 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1169 static void gen_eor_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1171 tcg_gen_xor_i64(pd
, pn
, pm
);
1172 tcg_gen_and_i64(pd
, pd
, pg
);
1175 static void gen_eor_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1176 TCGv_vec pm
, TCGv_vec pg
)
1178 tcg_gen_xor_vec(vece
, pd
, pn
, pm
);
1179 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1182 static bool trans_EOR_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1184 static const GVecGen4 op
= {
1185 .fni8
= gen_eor_pg_i64
,
1186 .fniv
= gen_eor_pg_vec
,
1187 .fno
= gen_helper_sve_eor_pppp
,
1188 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1191 return do_pppp_flags(s
, a
, &op
);
1193 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1197 static void gen_sel_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1199 tcg_gen_and_i64(pn
, pn
, pg
);
1200 tcg_gen_andc_i64(pm
, pm
, pg
);
1201 tcg_gen_or_i64(pd
, pn
, pm
);
1204 static void gen_sel_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1205 TCGv_vec pm
, TCGv_vec pg
)
1207 tcg_gen_and_vec(vece
, pn
, pn
, pg
);
1208 tcg_gen_andc_vec(vece
, pm
, pm
, pg
);
1209 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1212 static bool trans_SEL_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1214 static const GVecGen4 op
= {
1215 .fni8
= gen_sel_pg_i64
,
1216 .fniv
= gen_sel_pg_vec
,
1217 .fno
= gen_helper_sve_sel_pppp
,
1218 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1223 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1227 static void gen_orr_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1229 tcg_gen_or_i64(pd
, pn
, pm
);
1230 tcg_gen_and_i64(pd
, pd
, pg
);
1233 static void gen_orr_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1234 TCGv_vec pm
, TCGv_vec pg
)
1236 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1237 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1240 static bool trans_ORR_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1242 static const GVecGen4 op
= {
1243 .fni8
= gen_orr_pg_i64
,
1244 .fniv
= gen_orr_pg_vec
,
1245 .fno
= gen_helper_sve_orr_pppp
,
1246 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1249 return do_pppp_flags(s
, a
, &op
);
1250 } else if (a
->pg
== a
->rn
&& a
->rn
== a
->rm
) {
1251 return do_mov_p(s
, a
->rd
, a
->rn
);
1253 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1257 static void gen_orn_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1259 tcg_gen_orc_i64(pd
, pn
, pm
);
1260 tcg_gen_and_i64(pd
, pd
, pg
);
1263 static void gen_orn_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1264 TCGv_vec pm
, TCGv_vec pg
)
1266 tcg_gen_orc_vec(vece
, pd
, pn
, pm
);
1267 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1270 static bool trans_ORN_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1272 static const GVecGen4 op
= {
1273 .fni8
= gen_orn_pg_i64
,
1274 .fniv
= gen_orn_pg_vec
,
1275 .fno
= gen_helper_sve_orn_pppp
,
1276 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1279 return do_pppp_flags(s
, a
, &op
);
1281 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1285 static void gen_nor_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1287 tcg_gen_or_i64(pd
, pn
, pm
);
1288 tcg_gen_andc_i64(pd
, pg
, pd
);
1291 static void gen_nor_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1292 TCGv_vec pm
, TCGv_vec pg
)
1294 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1295 tcg_gen_andc_vec(vece
, pd
, pg
, pd
);
1298 static bool trans_NOR_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1300 static const GVecGen4 op
= {
1301 .fni8
= gen_nor_pg_i64
,
1302 .fniv
= gen_nor_pg_vec
,
1303 .fno
= gen_helper_sve_nor_pppp
,
1304 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1307 return do_pppp_flags(s
, a
, &op
);
1309 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1313 static void gen_nand_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1315 tcg_gen_and_i64(pd
, pn
, pm
);
1316 tcg_gen_andc_i64(pd
, pg
, pd
);
1319 static void gen_nand_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1320 TCGv_vec pm
, TCGv_vec pg
)
1322 tcg_gen_and_vec(vece
, pd
, pn
, pm
);
1323 tcg_gen_andc_vec(vece
, pd
, pg
, pd
);
1326 static bool trans_NAND_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1328 static const GVecGen4 op
= {
1329 .fni8
= gen_nand_pg_i64
,
1330 .fniv
= gen_nand_pg_vec
,
1331 .fno
= gen_helper_sve_nand_pppp
,
1332 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1335 return do_pppp_flags(s
, a
, &op
);
1337 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1342 *** SVE Predicate Misc Group
1345 static bool trans_PTEST(DisasContext
*s
, arg_PTEST
*a
)
1347 if (sve_access_check(s
)) {
1348 int nofs
= pred_full_reg_offset(s
, a
->rn
);
1349 int gofs
= pred_full_reg_offset(s
, a
->pg
);
1350 int words
= DIV_ROUND_UP(pred_full_reg_size(s
), 8);
1353 TCGv_i64 pn
= tcg_temp_new_i64();
1354 TCGv_i64 pg
= tcg_temp_new_i64();
1356 tcg_gen_ld_i64(pn
, cpu_env
, nofs
);
1357 tcg_gen_ld_i64(pg
, cpu_env
, gofs
);
1358 do_predtest1(pn
, pg
);
1360 tcg_temp_free_i64(pn
);
1361 tcg_temp_free_i64(pg
);
1363 do_predtest(s
, nofs
, gofs
, words
);
1369 /* See the ARM pseudocode DecodePredCount. */
1370 static unsigned decode_pred_count(unsigned fullsz
, int pattern
, int esz
)
1372 unsigned elements
= fullsz
>> esz
;
1376 case 0x0: /* POW2 */
1377 return pow2floor(elements
);
1388 case 0x9: /* VL16 */
1389 case 0xa: /* VL32 */
1390 case 0xb: /* VL64 */
1391 case 0xc: /* VL128 */
1392 case 0xd: /* VL256 */
1393 bound
= 16 << (pattern
- 9);
1395 case 0x1d: /* MUL4 */
1396 return elements
- elements
% 4;
1397 case 0x1e: /* MUL3 */
1398 return elements
- elements
% 3;
1399 case 0x1f: /* ALL */
1401 default: /* #uimm5 */
1404 return elements
>= bound
? bound
: 0;
1407 /* This handles all of the predicate initialization instructions,
1408 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1409 * so that decode_pred_count returns 0. For SETFFR, we will have
1410 * set RD == 16 == FFR.
1412 static bool do_predset(DisasContext
*s
, int esz
, int rd
, int pat
, bool setflag
)
1414 if (!sve_access_check(s
)) {
1418 unsigned fullsz
= vec_full_reg_size(s
);
1419 unsigned ofs
= pred_full_reg_offset(s
, rd
);
1420 unsigned numelem
, setsz
, i
;
1421 uint64_t word
, lastword
;
1424 numelem
= decode_pred_count(fullsz
, pat
, esz
);
1426 /* Determine what we must store into each bit, and how many. */
1428 lastword
= word
= 0;
1431 setsz
= numelem
<< esz
;
1432 lastword
= word
= pred_esz_masks
[esz
];
1434 lastword
&= MAKE_64BIT_MASK(0, setsz
% 64);
1438 t
= tcg_temp_new_i64();
1440 tcg_gen_movi_i64(t
, lastword
);
1441 tcg_gen_st_i64(t
, cpu_env
, ofs
);
1445 if (word
== lastword
) {
1446 unsigned maxsz
= size_for_gvec(fullsz
/ 8);
1447 unsigned oprsz
= size_for_gvec(setsz
/ 8);
1449 if (oprsz
* 8 == setsz
) {
1450 tcg_gen_gvec_dup64i(ofs
, oprsz
, maxsz
, word
);
1458 tcg_gen_movi_i64(t
, word
);
1459 for (i
= 0; i
< QEMU_ALIGN_DOWN(setsz
, 8); i
+= 8) {
1460 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1462 if (lastword
!= word
) {
1463 tcg_gen_movi_i64(t
, lastword
);
1464 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1468 tcg_gen_movi_i64(t
, 0);
1469 for (; i
< fullsz
; i
+= 8) {
1470 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1475 tcg_temp_free_i64(t
);
1479 tcg_gen_movi_i32(cpu_NF
, -(word
!= 0));
1480 tcg_gen_movi_i32(cpu_CF
, word
== 0);
1481 tcg_gen_movi_i32(cpu_VF
, 0);
1482 tcg_gen_mov_i32(cpu_ZF
, cpu_NF
);
1487 static bool trans_PTRUE(DisasContext
*s
, arg_PTRUE
*a
)
1489 return do_predset(s
, a
->esz
, a
->rd
, a
->pat
, a
->s
);
1492 static bool trans_SETFFR(DisasContext
*s
, arg_SETFFR
*a
)
1494 /* Note pat == 31 is #all, to set all elements. */
1495 return do_predset(s
, 0, FFR_PRED_NUM
, 31, false);
1498 static bool trans_PFALSE(DisasContext
*s
, arg_PFALSE
*a
)
1500 /* Note pat == 32 is #unimp, to set no elements. */
1501 return do_predset(s
, 0, a
->rd
, 32, false);
1504 static bool trans_RDFFR_p(DisasContext
*s
, arg_RDFFR_p
*a
)
1506 /* The path through do_pppp_flags is complicated enough to want to avoid
1507 * duplication. Frob the arguments into the form of a predicated AND.
1509 arg_rprr_s alt_a
= {
1510 .rd
= a
->rd
, .pg
= a
->pg
, .s
= a
->s
,
1511 .rn
= FFR_PRED_NUM
, .rm
= FFR_PRED_NUM
,
1513 return trans_AND_pppp(s
, &alt_a
);
1516 static bool trans_RDFFR(DisasContext
*s
, arg_RDFFR
*a
)
1518 return do_mov_p(s
, a
->rd
, FFR_PRED_NUM
);
1521 static bool trans_WRFFR(DisasContext
*s
, arg_WRFFR
*a
)
1523 return do_mov_p(s
, FFR_PRED_NUM
, a
->rn
);
1526 static bool do_pfirst_pnext(DisasContext
*s
, arg_rr_esz
*a
,
1527 void (*gen_fn
)(TCGv_i32
, TCGv_ptr
,
1528 TCGv_ptr
, TCGv_i32
))
1530 if (!sve_access_check(s
)) {
1534 TCGv_ptr t_pd
= tcg_temp_new_ptr();
1535 TCGv_ptr t_pg
= tcg_temp_new_ptr();
1539 desc
= DIV_ROUND_UP(pred_full_reg_size(s
), 8);
1540 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
1542 tcg_gen_addi_ptr(t_pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
1543 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
1544 t
= tcg_const_i32(desc
);
1546 gen_fn(t
, t_pd
, t_pg
, t
);
1547 tcg_temp_free_ptr(t_pd
);
1548 tcg_temp_free_ptr(t_pg
);
1551 tcg_temp_free_i32(t
);
1555 static bool trans_PFIRST(DisasContext
*s
, arg_rr_esz
*a
)
1557 return do_pfirst_pnext(s
, a
, gen_helper_sve_pfirst
);
1560 static bool trans_PNEXT(DisasContext
*s
, arg_rr_esz
*a
)
1562 return do_pfirst_pnext(s
, a
, gen_helper_sve_pnext
);
1566 *** SVE Element Count Group
1569 /* Perform an inline saturating addition of a 32-bit value within
1570 * a 64-bit register. The second operand is known to be positive,
1571 * which halves the comparisions we must perform to bound the result.
1573 static void do_sat_addsub_32(TCGv_i64 reg
, TCGv_i64 val
, bool u
, bool d
)
1579 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1581 tcg_gen_ext32u_i64(reg
, reg
);
1583 tcg_gen_ext32s_i64(reg
, reg
);
1586 tcg_gen_sub_i64(reg
, reg
, val
);
1587 ibound
= (u
? 0 : INT32_MIN
);
1590 tcg_gen_add_i64(reg
, reg
, val
);
1591 ibound
= (u
? UINT32_MAX
: INT32_MAX
);
1594 bound
= tcg_const_i64(ibound
);
1595 tcg_gen_movcond_i64(cond
, reg
, reg
, bound
, bound
, reg
);
1596 tcg_temp_free_i64(bound
);
1599 /* Similarly with 64-bit values. */
1600 static void do_sat_addsub_64(TCGv_i64 reg
, TCGv_i64 val
, bool u
, bool d
)
1602 TCGv_i64 t0
= tcg_temp_new_i64();
1603 TCGv_i64 t1
= tcg_temp_new_i64();
1608 tcg_gen_sub_i64(t0
, reg
, val
);
1609 tcg_gen_movi_i64(t1
, 0);
1610 tcg_gen_movcond_i64(TCG_COND_LTU
, reg
, reg
, val
, t1
, t0
);
1612 tcg_gen_add_i64(t0
, reg
, val
);
1613 tcg_gen_movi_i64(t1
, -1);
1614 tcg_gen_movcond_i64(TCG_COND_LTU
, reg
, t0
, reg
, t1
, t0
);
1618 /* Detect signed overflow for subtraction. */
1619 tcg_gen_xor_i64(t0
, reg
, val
);
1620 tcg_gen_sub_i64(t1
, reg
, val
);
1621 tcg_gen_xor_i64(reg
, reg
, t1
);
1622 tcg_gen_and_i64(t0
, t0
, reg
);
1624 /* Bound the result. */
1625 tcg_gen_movi_i64(reg
, INT64_MIN
);
1626 t2
= tcg_const_i64(0);
1627 tcg_gen_movcond_i64(TCG_COND_LT
, reg
, t0
, t2
, reg
, t1
);
1629 /* Detect signed overflow for addition. */
1630 tcg_gen_xor_i64(t0
, reg
, val
);
1631 tcg_gen_add_i64(reg
, reg
, val
);
1632 tcg_gen_xor_i64(t1
, reg
, val
);
1633 tcg_gen_andc_i64(t0
, t1
, t0
);
1635 /* Bound the result. */
1636 tcg_gen_movi_i64(t1
, INT64_MAX
);
1637 t2
= tcg_const_i64(0);
1638 tcg_gen_movcond_i64(TCG_COND_LT
, reg
, t0
, t2
, t1
, reg
);
1640 tcg_temp_free_i64(t2
);
1642 tcg_temp_free_i64(t0
);
1643 tcg_temp_free_i64(t1
);
1646 /* Similarly with a vector and a scalar operand. */
1647 static void do_sat_addsub_vec(DisasContext
*s
, int esz
, int rd
, int rn
,
1648 TCGv_i64 val
, bool u
, bool d
)
1650 unsigned vsz
= vec_full_reg_size(s
);
1651 TCGv_ptr dptr
, nptr
;
1655 dptr
= tcg_temp_new_ptr();
1656 nptr
= tcg_temp_new_ptr();
1657 tcg_gen_addi_ptr(dptr
, cpu_env
, vec_full_reg_offset(s
, rd
));
1658 tcg_gen_addi_ptr(nptr
, cpu_env
, vec_full_reg_offset(s
, rn
));
1659 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
1663 t32
= tcg_temp_new_i32();
1664 tcg_gen_extrl_i64_i32(t32
, val
);
1666 tcg_gen_neg_i32(t32
, t32
);
1669 gen_helper_sve_uqaddi_b(dptr
, nptr
, t32
, desc
);
1671 gen_helper_sve_sqaddi_b(dptr
, nptr
, t32
, desc
);
1673 tcg_temp_free_i32(t32
);
1677 t32
= tcg_temp_new_i32();
1678 tcg_gen_extrl_i64_i32(t32
, val
);
1680 tcg_gen_neg_i32(t32
, t32
);
1683 gen_helper_sve_uqaddi_h(dptr
, nptr
, t32
, desc
);
1685 gen_helper_sve_sqaddi_h(dptr
, nptr
, t32
, desc
);
1687 tcg_temp_free_i32(t32
);
1691 t64
= tcg_temp_new_i64();
1693 tcg_gen_neg_i64(t64
, val
);
1695 tcg_gen_mov_i64(t64
, val
);
1698 gen_helper_sve_uqaddi_s(dptr
, nptr
, t64
, desc
);
1700 gen_helper_sve_sqaddi_s(dptr
, nptr
, t64
, desc
);
1702 tcg_temp_free_i64(t64
);
1708 gen_helper_sve_uqsubi_d(dptr
, nptr
, val
, desc
);
1710 gen_helper_sve_uqaddi_d(dptr
, nptr
, val
, desc
);
1713 t64
= tcg_temp_new_i64();
1714 tcg_gen_neg_i64(t64
, val
);
1715 gen_helper_sve_sqaddi_d(dptr
, nptr
, t64
, desc
);
1716 tcg_temp_free_i64(t64
);
1718 gen_helper_sve_sqaddi_d(dptr
, nptr
, val
, desc
);
1723 g_assert_not_reached();
1726 tcg_temp_free_ptr(dptr
);
1727 tcg_temp_free_ptr(nptr
);
1728 tcg_temp_free_i32(desc
);
1731 static bool trans_CNT_r(DisasContext
*s
, arg_CNT_r
*a
)
1733 if (sve_access_check(s
)) {
1734 unsigned fullsz
= vec_full_reg_size(s
);
1735 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1736 tcg_gen_movi_i64(cpu_reg(s
, a
->rd
), numelem
* a
->imm
);
1741 static bool trans_INCDEC_r(DisasContext
*s
, arg_incdec_cnt
*a
)
1743 if (sve_access_check(s
)) {
1744 unsigned fullsz
= vec_full_reg_size(s
);
1745 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1746 int inc
= numelem
* a
->imm
* (a
->d
? -1 : 1);
1747 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1749 tcg_gen_addi_i64(reg
, reg
, inc
);
1754 static bool trans_SINCDEC_r_32(DisasContext
*s
, arg_incdec_cnt
*a
)
1756 if (!sve_access_check(s
)) {
1760 unsigned fullsz
= vec_full_reg_size(s
);
1761 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1762 int inc
= numelem
* a
->imm
;
1763 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1765 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1768 tcg_gen_ext32u_i64(reg
, reg
);
1770 tcg_gen_ext32s_i64(reg
, reg
);
1773 TCGv_i64 t
= tcg_const_i64(inc
);
1774 do_sat_addsub_32(reg
, t
, a
->u
, a
->d
);
1775 tcg_temp_free_i64(t
);
1780 static bool trans_SINCDEC_r_64(DisasContext
*s
, arg_incdec_cnt
*a
)
1782 if (!sve_access_check(s
)) {
1786 unsigned fullsz
= vec_full_reg_size(s
);
1787 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1788 int inc
= numelem
* a
->imm
;
1789 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1792 TCGv_i64 t
= tcg_const_i64(inc
);
1793 do_sat_addsub_64(reg
, t
, a
->u
, a
->d
);
1794 tcg_temp_free_i64(t
);
1799 static bool trans_INCDEC_v(DisasContext
*s
, arg_incdec2_cnt
*a
)
1805 unsigned fullsz
= vec_full_reg_size(s
);
1806 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1807 int inc
= numelem
* a
->imm
;
1810 if (sve_access_check(s
)) {
1811 TCGv_i64 t
= tcg_const_i64(a
->d
? -inc
: inc
);
1812 tcg_gen_gvec_adds(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
1813 vec_full_reg_offset(s
, a
->rn
),
1815 tcg_temp_free_i64(t
);
1818 do_mov_z(s
, a
->rd
, a
->rn
);
1823 static bool trans_SINCDEC_v(DisasContext
*s
, arg_incdec2_cnt
*a
)
1829 unsigned fullsz
= vec_full_reg_size(s
);
1830 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1831 int inc
= numelem
* a
->imm
;
1834 if (sve_access_check(s
)) {
1835 TCGv_i64 t
= tcg_const_i64(inc
);
1836 do_sat_addsub_vec(s
, a
->esz
, a
->rd
, a
->rn
, t
, a
->u
, a
->d
);
1837 tcg_temp_free_i64(t
);
1840 do_mov_z(s
, a
->rd
, a
->rn
);
1846 *** SVE Bitwise Immediate Group
1849 static bool do_zz_dbm(DisasContext
*s
, arg_rr_dbm
*a
, GVecGen2iFn
*gvec_fn
)
1852 if (!logic_imm_decode_wmask(&imm
, extract32(a
->dbm
, 12, 1),
1853 extract32(a
->dbm
, 0, 6),
1854 extract32(a
->dbm
, 6, 6))) {
1857 if (sve_access_check(s
)) {
1858 unsigned vsz
= vec_full_reg_size(s
);
1859 gvec_fn(MO_64
, vec_full_reg_offset(s
, a
->rd
),
1860 vec_full_reg_offset(s
, a
->rn
), imm
, vsz
, vsz
);
1865 static bool trans_AND_zzi(DisasContext
*s
, arg_rr_dbm
*a
)
1867 return do_zz_dbm(s
, a
, tcg_gen_gvec_andi
);
1870 static bool trans_ORR_zzi(DisasContext
*s
, arg_rr_dbm
*a
)
1872 return do_zz_dbm(s
, a
, tcg_gen_gvec_ori
);
1875 static bool trans_EOR_zzi(DisasContext
*s
, arg_rr_dbm
*a
)
1877 return do_zz_dbm(s
, a
, tcg_gen_gvec_xori
);
1880 static bool trans_DUPM(DisasContext
*s
, arg_DUPM
*a
)
1883 if (!logic_imm_decode_wmask(&imm
, extract32(a
->dbm
, 12, 1),
1884 extract32(a
->dbm
, 0, 6),
1885 extract32(a
->dbm
, 6, 6))) {
1888 if (sve_access_check(s
)) {
1889 do_dupi_z(s
, a
->rd
, imm
);
1895 *** SVE Integer Wide Immediate - Predicated Group
1898 /* Implement all merging copies. This is used for CPY (immediate),
1899 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1901 static void do_cpy_m(DisasContext
*s
, int esz
, int rd
, int rn
, int pg
,
1904 typedef void gen_cpy(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i64
, TCGv_i32
);
1905 static gen_cpy
* const fns
[4] = {
1906 gen_helper_sve_cpy_m_b
, gen_helper_sve_cpy_m_h
,
1907 gen_helper_sve_cpy_m_s
, gen_helper_sve_cpy_m_d
,
1909 unsigned vsz
= vec_full_reg_size(s
);
1910 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
1911 TCGv_ptr t_zd
= tcg_temp_new_ptr();
1912 TCGv_ptr t_zn
= tcg_temp_new_ptr();
1913 TCGv_ptr t_pg
= tcg_temp_new_ptr();
1915 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, rd
));
1916 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, rn
));
1917 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
1919 fns
[esz
](t_zd
, t_zn
, t_pg
, val
, desc
);
1921 tcg_temp_free_ptr(t_zd
);
1922 tcg_temp_free_ptr(t_zn
);
1923 tcg_temp_free_ptr(t_pg
);
1924 tcg_temp_free_i32(desc
);
1927 static bool trans_FCPY(DisasContext
*s
, arg_FCPY
*a
)
1932 if (sve_access_check(s
)) {
1933 /* Decode the VFP immediate. */
1934 uint64_t imm
= vfp_expand_imm(a
->esz
, a
->imm
);
1935 TCGv_i64 t_imm
= tcg_const_i64(imm
);
1936 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rn
, a
->pg
, t_imm
);
1937 tcg_temp_free_i64(t_imm
);
1942 static bool trans_CPY_m_i(DisasContext
*s
, arg_rpri_esz
*a
)
1944 if (a
->esz
== 0 && extract32(s
->insn
, 13, 1)) {
1947 if (sve_access_check(s
)) {
1948 TCGv_i64 t_imm
= tcg_const_i64(a
->imm
);
1949 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rn
, a
->pg
, t_imm
);
1950 tcg_temp_free_i64(t_imm
);
1955 static bool trans_CPY_z_i(DisasContext
*s
, arg_CPY_z_i
*a
)
1957 static gen_helper_gvec_2i
* const fns
[4] = {
1958 gen_helper_sve_cpy_z_b
, gen_helper_sve_cpy_z_h
,
1959 gen_helper_sve_cpy_z_s
, gen_helper_sve_cpy_z_d
,
1962 if (a
->esz
== 0 && extract32(s
->insn
, 13, 1)) {
1965 if (sve_access_check(s
)) {
1966 unsigned vsz
= vec_full_reg_size(s
);
1967 TCGv_i64 t_imm
= tcg_const_i64(a
->imm
);
1968 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s
, a
->rd
),
1969 pred_full_reg_offset(s
, a
->pg
),
1970 t_imm
, vsz
, vsz
, 0, fns
[a
->esz
]);
1971 tcg_temp_free_i64(t_imm
);
1977 *** SVE Permute Extract Group
1980 static bool trans_EXT(DisasContext
*s
, arg_EXT
*a
)
1982 if (!sve_access_check(s
)) {
1986 unsigned vsz
= vec_full_reg_size(s
);
1987 unsigned n_ofs
= a
->imm
>= vsz
? 0 : a
->imm
;
1988 unsigned n_siz
= vsz
- n_ofs
;
1989 unsigned d
= vec_full_reg_offset(s
, a
->rd
);
1990 unsigned n
= vec_full_reg_offset(s
, a
->rn
);
1991 unsigned m
= vec_full_reg_offset(s
, a
->rm
);
1993 /* Use host vector move insns if we have appropriate sizes
1994 * and no unfortunate overlap.
1997 && n_ofs
== size_for_gvec(n_ofs
)
1998 && n_siz
== size_for_gvec(n_siz
)
1999 && (d
!= n
|| n_siz
<= n_ofs
)) {
2000 tcg_gen_gvec_mov(0, d
, n
+ n_ofs
, n_siz
, n_siz
);
2002 tcg_gen_gvec_mov(0, d
+ n_siz
, m
, n_ofs
, n_ofs
);
2005 tcg_gen_gvec_3_ool(d
, n
, m
, vsz
, vsz
, n_ofs
, gen_helper_sve_ext
);
2011 *** SVE Permute - Unpredicated Group
2014 static bool trans_DUP_s(DisasContext
*s
, arg_DUP_s
*a
)
2016 if (sve_access_check(s
)) {
2017 unsigned vsz
= vec_full_reg_size(s
);
2018 tcg_gen_gvec_dup_i64(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
2019 vsz
, vsz
, cpu_reg_sp(s
, a
->rn
));
2024 static bool trans_DUP_x(DisasContext
*s
, arg_DUP_x
*a
)
2026 if ((a
->imm
& 0x1f) == 0) {
2029 if (sve_access_check(s
)) {
2030 unsigned vsz
= vec_full_reg_size(s
);
2031 unsigned dofs
= vec_full_reg_offset(s
, a
->rd
);
2032 unsigned esz
, index
;
2034 esz
= ctz32(a
->imm
);
2035 index
= a
->imm
>> (esz
+ 1);
2037 if ((index
<< esz
) < vsz
) {
2038 unsigned nofs
= vec_reg_offset(s
, a
->rn
, index
, esz
);
2039 tcg_gen_gvec_dup_mem(esz
, dofs
, nofs
, vsz
, vsz
);
2041 tcg_gen_gvec_dup64i(dofs
, vsz
, vsz
, 0);
2047 static void do_insr_i64(DisasContext
*s
, arg_rrr_esz
*a
, TCGv_i64 val
)
2049 typedef void gen_insr(TCGv_ptr
, TCGv_ptr
, TCGv_i64
, TCGv_i32
);
2050 static gen_insr
* const fns
[4] = {
2051 gen_helper_sve_insr_b
, gen_helper_sve_insr_h
,
2052 gen_helper_sve_insr_s
, gen_helper_sve_insr_d
,
2054 unsigned vsz
= vec_full_reg_size(s
);
2055 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
2056 TCGv_ptr t_zd
= tcg_temp_new_ptr();
2057 TCGv_ptr t_zn
= tcg_temp_new_ptr();
2059 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, a
->rd
));
2060 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
2062 fns
[a
->esz
](t_zd
, t_zn
, val
, desc
);
2064 tcg_temp_free_ptr(t_zd
);
2065 tcg_temp_free_ptr(t_zn
);
2066 tcg_temp_free_i32(desc
);
2069 static bool trans_INSR_f(DisasContext
*s
, arg_rrr_esz
*a
)
2071 if (sve_access_check(s
)) {
2072 TCGv_i64 t
= tcg_temp_new_i64();
2073 tcg_gen_ld_i64(t
, cpu_env
, vec_reg_offset(s
, a
->rm
, 0, MO_64
));
2074 do_insr_i64(s
, a
, t
);
2075 tcg_temp_free_i64(t
);
2080 static bool trans_INSR_r(DisasContext
*s
, arg_rrr_esz
*a
)
2082 if (sve_access_check(s
)) {
2083 do_insr_i64(s
, a
, cpu_reg(s
, a
->rm
));
2088 static bool trans_REV_v(DisasContext
*s
, arg_rr_esz
*a
)
2090 static gen_helper_gvec_2
* const fns
[4] = {
2091 gen_helper_sve_rev_b
, gen_helper_sve_rev_h
,
2092 gen_helper_sve_rev_s
, gen_helper_sve_rev_d
2095 if (sve_access_check(s
)) {
2096 unsigned vsz
= vec_full_reg_size(s
);
2097 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, a
->rd
),
2098 vec_full_reg_offset(s
, a
->rn
),
2099 vsz
, vsz
, 0, fns
[a
->esz
]);
2104 static bool trans_TBL(DisasContext
*s
, arg_rrr_esz
*a
)
2106 static gen_helper_gvec_3
* const fns
[4] = {
2107 gen_helper_sve_tbl_b
, gen_helper_sve_tbl_h
,
2108 gen_helper_sve_tbl_s
, gen_helper_sve_tbl_d
2111 if (sve_access_check(s
)) {
2112 unsigned vsz
= vec_full_reg_size(s
);
2113 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
2114 vec_full_reg_offset(s
, a
->rn
),
2115 vec_full_reg_offset(s
, a
->rm
),
2116 vsz
, vsz
, 0, fns
[a
->esz
]);
2121 static bool trans_UNPK(DisasContext
*s
, arg_UNPK
*a
)
2123 static gen_helper_gvec_2
* const fns
[4][2] = {
2125 { gen_helper_sve_sunpk_h
, gen_helper_sve_uunpk_h
},
2126 { gen_helper_sve_sunpk_s
, gen_helper_sve_uunpk_s
},
2127 { gen_helper_sve_sunpk_d
, gen_helper_sve_uunpk_d
},
2133 if (sve_access_check(s
)) {
2134 unsigned vsz
= vec_full_reg_size(s
);
2135 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, a
->rd
),
2136 vec_full_reg_offset(s
, a
->rn
)
2137 + (a
->h
? vsz
/ 2 : 0),
2138 vsz
, vsz
, 0, fns
[a
->esz
][a
->u
]);
2144 *** SVE Permute - Predicates Group
2147 static bool do_perm_pred3(DisasContext
*s
, arg_rrr_esz
*a
, bool high_odd
,
2148 gen_helper_gvec_3
*fn
)
2150 if (!sve_access_check(s
)) {
2154 unsigned vsz
= pred_full_reg_size(s
);
2156 /* Predicate sizes may be smaller and cannot use simd_desc.
2157 We cannot round up, as we do elsewhere, because we need
2158 the exact size for ZIP2 and REV. We retain the style for
2159 the other helpers for consistency. */
2160 TCGv_ptr t_d
= tcg_temp_new_ptr();
2161 TCGv_ptr t_n
= tcg_temp_new_ptr();
2162 TCGv_ptr t_m
= tcg_temp_new_ptr();
2167 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
2168 desc
= deposit32(desc
, SIMD_DATA_SHIFT
+ 2, 2, high_odd
);
2170 tcg_gen_addi_ptr(t_d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2171 tcg_gen_addi_ptr(t_n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2172 tcg_gen_addi_ptr(t_m
, cpu_env
, pred_full_reg_offset(s
, a
->rm
));
2173 t_desc
= tcg_const_i32(desc
);
2175 fn(t_d
, t_n
, t_m
, t_desc
);
2177 tcg_temp_free_ptr(t_d
);
2178 tcg_temp_free_ptr(t_n
);
2179 tcg_temp_free_ptr(t_m
);
2180 tcg_temp_free_i32(t_desc
);
2184 static bool do_perm_pred2(DisasContext
*s
, arg_rr_esz
*a
, bool high_odd
,
2185 gen_helper_gvec_2
*fn
)
2187 if (!sve_access_check(s
)) {
2191 unsigned vsz
= pred_full_reg_size(s
);
2192 TCGv_ptr t_d
= tcg_temp_new_ptr();
2193 TCGv_ptr t_n
= tcg_temp_new_ptr();
2197 tcg_gen_addi_ptr(t_d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2198 tcg_gen_addi_ptr(t_n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2200 /* Predicate sizes may be smaller and cannot use simd_desc.
2201 We cannot round up, as we do elsewhere, because we need
2202 the exact size for ZIP2 and REV. We retain the style for
2203 the other helpers for consistency. */
2206 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
2207 desc
= deposit32(desc
, SIMD_DATA_SHIFT
+ 2, 2, high_odd
);
2208 t_desc
= tcg_const_i32(desc
);
2210 fn(t_d
, t_n
, t_desc
);
2212 tcg_temp_free_i32(t_desc
);
2213 tcg_temp_free_ptr(t_d
);
2214 tcg_temp_free_ptr(t_n
);
2218 static bool trans_ZIP1_p(DisasContext
*s
, arg_rrr_esz
*a
)
2220 return do_perm_pred3(s
, a
, 0, gen_helper_sve_zip_p
);
2223 static bool trans_ZIP2_p(DisasContext
*s
, arg_rrr_esz
*a
)
2225 return do_perm_pred3(s
, a
, 1, gen_helper_sve_zip_p
);
2228 static bool trans_UZP1_p(DisasContext
*s
, arg_rrr_esz
*a
)
2230 return do_perm_pred3(s
, a
, 0, gen_helper_sve_uzp_p
);
2233 static bool trans_UZP2_p(DisasContext
*s
, arg_rrr_esz
*a
)
2235 return do_perm_pred3(s
, a
, 1, gen_helper_sve_uzp_p
);
2238 static bool trans_TRN1_p(DisasContext
*s
, arg_rrr_esz
*a
)
2240 return do_perm_pred3(s
, a
, 0, gen_helper_sve_trn_p
);
2243 static bool trans_TRN2_p(DisasContext
*s
, arg_rrr_esz
*a
)
2245 return do_perm_pred3(s
, a
, 1, gen_helper_sve_trn_p
);
2248 static bool trans_REV_p(DisasContext
*s
, arg_rr_esz
*a
)
2250 return do_perm_pred2(s
, a
, 0, gen_helper_sve_rev_p
);
2253 static bool trans_PUNPKLO(DisasContext
*s
, arg_PUNPKLO
*a
)
2255 return do_perm_pred2(s
, a
, 0, gen_helper_sve_punpk_p
);
2258 static bool trans_PUNPKHI(DisasContext
*s
, arg_PUNPKHI
*a
)
2260 return do_perm_pred2(s
, a
, 1, gen_helper_sve_punpk_p
);
2264 *** SVE Permute - Interleaving Group
2267 static bool do_zip(DisasContext
*s
, arg_rrr_esz
*a
, bool high
)
2269 static gen_helper_gvec_3
* const fns
[4] = {
2270 gen_helper_sve_zip_b
, gen_helper_sve_zip_h
,
2271 gen_helper_sve_zip_s
, gen_helper_sve_zip_d
,
2274 if (sve_access_check(s
)) {
2275 unsigned vsz
= vec_full_reg_size(s
);
2276 unsigned high_ofs
= high
? vsz
/ 2 : 0;
2277 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
2278 vec_full_reg_offset(s
, a
->rn
) + high_ofs
,
2279 vec_full_reg_offset(s
, a
->rm
) + high_ofs
,
2280 vsz
, vsz
, 0, fns
[a
->esz
]);
2285 static bool do_zzz_data_ool(DisasContext
*s
, arg_rrr_esz
*a
, int data
,
2286 gen_helper_gvec_3
*fn
)
2288 if (sve_access_check(s
)) {
2289 unsigned vsz
= vec_full_reg_size(s
);
2290 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
2291 vec_full_reg_offset(s
, a
->rn
),
2292 vec_full_reg_offset(s
, a
->rm
),
2293 vsz
, vsz
, data
, fn
);
2298 static bool trans_ZIP1_z(DisasContext
*s
, arg_rrr_esz
*a
)
2300 return do_zip(s
, a
, false);
2303 static bool trans_ZIP2_z(DisasContext
*s
, arg_rrr_esz
*a
)
2305 return do_zip(s
, a
, true);
2308 static gen_helper_gvec_3
* const uzp_fns
[4] = {
2309 gen_helper_sve_uzp_b
, gen_helper_sve_uzp_h
,
2310 gen_helper_sve_uzp_s
, gen_helper_sve_uzp_d
,
2313 static bool trans_UZP1_z(DisasContext
*s
, arg_rrr_esz
*a
)
2315 return do_zzz_data_ool(s
, a
, 0, uzp_fns
[a
->esz
]);
2318 static bool trans_UZP2_z(DisasContext
*s
, arg_rrr_esz
*a
)
2320 return do_zzz_data_ool(s
, a
, 1 << a
->esz
, uzp_fns
[a
->esz
]);
2323 static gen_helper_gvec_3
* const trn_fns
[4] = {
2324 gen_helper_sve_trn_b
, gen_helper_sve_trn_h
,
2325 gen_helper_sve_trn_s
, gen_helper_sve_trn_d
,
2328 static bool trans_TRN1_z(DisasContext
*s
, arg_rrr_esz
*a
)
2330 return do_zzz_data_ool(s
, a
, 0, trn_fns
[a
->esz
]);
2333 static bool trans_TRN2_z(DisasContext
*s
, arg_rrr_esz
*a
)
2335 return do_zzz_data_ool(s
, a
, 1 << a
->esz
, trn_fns
[a
->esz
]);
2339 *** SVE Permute Vector - Predicated Group
2342 static bool trans_COMPACT(DisasContext
*s
, arg_rpr_esz
*a
)
2344 static gen_helper_gvec_3
* const fns
[4] = {
2345 NULL
, NULL
, gen_helper_sve_compact_s
, gen_helper_sve_compact_d
2347 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2350 /* Call the helper that computes the ARM LastActiveElement pseudocode
2351 * function, scaled by the element size. This includes the not found
2352 * indication; e.g. not found for esz=3 is -8.
2354 static void find_last_active(DisasContext
*s
, TCGv_i32 ret
, int esz
, int pg
)
2356 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2357 * round up, as we do elsewhere, because we need the exact size.
2359 TCGv_ptr t_p
= tcg_temp_new_ptr();
2361 unsigned vsz
= pred_full_reg_size(s
);
2365 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, esz
);
2367 tcg_gen_addi_ptr(t_p
, cpu_env
, pred_full_reg_offset(s
, pg
));
2368 t_desc
= tcg_const_i32(desc
);
2370 gen_helper_sve_last_active_element(ret
, t_p
, t_desc
);
2372 tcg_temp_free_i32(t_desc
);
2373 tcg_temp_free_ptr(t_p
);
2376 /* Increment LAST to the offset of the next element in the vector,
2377 * wrapping around to 0.
2379 static void incr_last_active(DisasContext
*s
, TCGv_i32 last
, int esz
)
2381 unsigned vsz
= vec_full_reg_size(s
);
2383 tcg_gen_addi_i32(last
, last
, 1 << esz
);
2384 if (is_power_of_2(vsz
)) {
2385 tcg_gen_andi_i32(last
, last
, vsz
- 1);
2387 TCGv_i32 max
= tcg_const_i32(vsz
);
2388 TCGv_i32 zero
= tcg_const_i32(0);
2389 tcg_gen_movcond_i32(TCG_COND_GEU
, last
, last
, max
, zero
, last
);
2390 tcg_temp_free_i32(max
);
2391 tcg_temp_free_i32(zero
);
2395 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2396 static void wrap_last_active(DisasContext
*s
, TCGv_i32 last
, int esz
)
2398 unsigned vsz
= vec_full_reg_size(s
);
2400 if (is_power_of_2(vsz
)) {
2401 tcg_gen_andi_i32(last
, last
, vsz
- 1);
2403 TCGv_i32 max
= tcg_const_i32(vsz
- (1 << esz
));
2404 TCGv_i32 zero
= tcg_const_i32(0);
2405 tcg_gen_movcond_i32(TCG_COND_LT
, last
, last
, zero
, max
, last
);
2406 tcg_temp_free_i32(max
);
2407 tcg_temp_free_i32(zero
);
2411 /* Load an unsigned element of ESZ from BASE+OFS. */
2412 static TCGv_i64
load_esz(TCGv_ptr base
, int ofs
, int esz
)
2414 TCGv_i64 r
= tcg_temp_new_i64();
2418 tcg_gen_ld8u_i64(r
, base
, ofs
);
2421 tcg_gen_ld16u_i64(r
, base
, ofs
);
2424 tcg_gen_ld32u_i64(r
, base
, ofs
);
2427 tcg_gen_ld_i64(r
, base
, ofs
);
2430 g_assert_not_reached();
2435 /* Load an unsigned element of ESZ from RM[LAST]. */
2436 static TCGv_i64
load_last_active(DisasContext
*s
, TCGv_i32 last
,
2439 TCGv_ptr p
= tcg_temp_new_ptr();
2442 /* Convert offset into vector into offset into ENV.
2443 * The final adjustment for the vector register base
2444 * is added via constant offset to the load.
2446 #ifdef HOST_WORDS_BIGENDIAN
2447 /* Adjust for element ordering. See vec_reg_offset. */
2449 tcg_gen_xori_i32(last
, last
, 8 - (1 << esz
));
2452 tcg_gen_ext_i32_ptr(p
, last
);
2453 tcg_gen_add_ptr(p
, p
, cpu_env
);
2455 r
= load_esz(p
, vec_full_reg_offset(s
, rm
), esz
);
2456 tcg_temp_free_ptr(p
);
2461 /* Compute CLAST for a Zreg. */
2462 static bool do_clast_vector(DisasContext
*s
, arg_rprr_esz
*a
, bool before
)
2467 unsigned vsz
, esz
= a
->esz
;
2469 if (!sve_access_check(s
)) {
2473 last
= tcg_temp_local_new_i32();
2474 over
= gen_new_label();
2476 find_last_active(s
, last
, esz
, a
->pg
);
2478 /* There is of course no movcond for a 2048-bit vector,
2479 * so we must branch over the actual store.
2481 tcg_gen_brcondi_i32(TCG_COND_LT
, last
, 0, over
);
2484 incr_last_active(s
, last
, esz
);
2487 ele
= load_last_active(s
, last
, a
->rm
, esz
);
2488 tcg_temp_free_i32(last
);
2490 vsz
= vec_full_reg_size(s
);
2491 tcg_gen_gvec_dup_i64(esz
, vec_full_reg_offset(s
, a
->rd
), vsz
, vsz
, ele
);
2492 tcg_temp_free_i64(ele
);
2494 /* If this insn used MOVPRFX, we may need a second move. */
2495 if (a
->rd
!= a
->rn
) {
2496 TCGLabel
*done
= gen_new_label();
2499 gen_set_label(over
);
2500 do_mov_z(s
, a
->rd
, a
->rn
);
2502 gen_set_label(done
);
2504 gen_set_label(over
);
2509 static bool trans_CLASTA_z(DisasContext
*s
, arg_rprr_esz
*a
)
2511 return do_clast_vector(s
, a
, false);
2514 static bool trans_CLASTB_z(DisasContext
*s
, arg_rprr_esz
*a
)
2516 return do_clast_vector(s
, a
, true);
2519 /* Compute CLAST for a scalar. */
2520 static void do_clast_scalar(DisasContext
*s
, int esz
, int pg
, int rm
,
2521 bool before
, TCGv_i64 reg_val
)
2523 TCGv_i32 last
= tcg_temp_new_i32();
2524 TCGv_i64 ele
, cmp
, zero
;
2526 find_last_active(s
, last
, esz
, pg
);
2528 /* Extend the original value of last prior to incrementing. */
2529 cmp
= tcg_temp_new_i64();
2530 tcg_gen_ext_i32_i64(cmp
, last
);
2533 incr_last_active(s
, last
, esz
);
2536 /* The conceit here is that while last < 0 indicates not found, after
2537 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2538 * from which we can load garbage. We then discard the garbage with
2539 * a conditional move.
2541 ele
= load_last_active(s
, last
, rm
, esz
);
2542 tcg_temp_free_i32(last
);
2544 zero
= tcg_const_i64(0);
2545 tcg_gen_movcond_i64(TCG_COND_GE
, reg_val
, cmp
, zero
, ele
, reg_val
);
2547 tcg_temp_free_i64(zero
);
2548 tcg_temp_free_i64(cmp
);
2549 tcg_temp_free_i64(ele
);
2552 /* Compute CLAST for a Vreg. */
2553 static bool do_clast_fp(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2555 if (sve_access_check(s
)) {
2557 int ofs
= vec_reg_offset(s
, a
->rd
, 0, esz
);
2558 TCGv_i64 reg
= load_esz(cpu_env
, ofs
, esz
);
2560 do_clast_scalar(s
, esz
, a
->pg
, a
->rn
, before
, reg
);
2561 write_fp_dreg(s
, a
->rd
, reg
);
2562 tcg_temp_free_i64(reg
);
2567 static bool trans_CLASTA_v(DisasContext
*s
, arg_rpr_esz
*a
)
2569 return do_clast_fp(s
, a
, false);
2572 static bool trans_CLASTB_v(DisasContext
*s
, arg_rpr_esz
*a
)
2574 return do_clast_fp(s
, a
, true);
2577 /* Compute CLAST for a Xreg. */
2578 static bool do_clast_general(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2582 if (!sve_access_check(s
)) {
2586 reg
= cpu_reg(s
, a
->rd
);
2589 tcg_gen_ext8u_i64(reg
, reg
);
2592 tcg_gen_ext16u_i64(reg
, reg
);
2595 tcg_gen_ext32u_i64(reg
, reg
);
2600 g_assert_not_reached();
2603 do_clast_scalar(s
, a
->esz
, a
->pg
, a
->rn
, before
, reg
);
2607 static bool trans_CLASTA_r(DisasContext
*s
, arg_rpr_esz
*a
)
2609 return do_clast_general(s
, a
, false);
2612 static bool trans_CLASTB_r(DisasContext
*s
, arg_rpr_esz
*a
)
2614 return do_clast_general(s
, a
, true);
2617 /* Compute LAST for a scalar. */
2618 static TCGv_i64
do_last_scalar(DisasContext
*s
, int esz
,
2619 int pg
, int rm
, bool before
)
2621 TCGv_i32 last
= tcg_temp_new_i32();
2624 find_last_active(s
, last
, esz
, pg
);
2626 wrap_last_active(s
, last
, esz
);
2628 incr_last_active(s
, last
, esz
);
2631 ret
= load_last_active(s
, last
, rm
, esz
);
2632 tcg_temp_free_i32(last
);
2636 /* Compute LAST for a Vreg. */
2637 static bool do_last_fp(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2639 if (sve_access_check(s
)) {
2640 TCGv_i64 val
= do_last_scalar(s
, a
->esz
, a
->pg
, a
->rn
, before
);
2641 write_fp_dreg(s
, a
->rd
, val
);
2642 tcg_temp_free_i64(val
);
2647 static bool trans_LASTA_v(DisasContext
*s
, arg_rpr_esz
*a
)
2649 return do_last_fp(s
, a
, false);
2652 static bool trans_LASTB_v(DisasContext
*s
, arg_rpr_esz
*a
)
2654 return do_last_fp(s
, a
, true);
2657 /* Compute LAST for a Xreg. */
2658 static bool do_last_general(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2660 if (sve_access_check(s
)) {
2661 TCGv_i64 val
= do_last_scalar(s
, a
->esz
, a
->pg
, a
->rn
, before
);
2662 tcg_gen_mov_i64(cpu_reg(s
, a
->rd
), val
);
2663 tcg_temp_free_i64(val
);
2668 static bool trans_LASTA_r(DisasContext
*s
, arg_rpr_esz
*a
)
2670 return do_last_general(s
, a
, false);
2673 static bool trans_LASTB_r(DisasContext
*s
, arg_rpr_esz
*a
)
2675 return do_last_general(s
, a
, true);
2678 static bool trans_CPY_m_r(DisasContext
*s
, arg_rpr_esz
*a
)
2680 if (sve_access_check(s
)) {
2681 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rd
, a
->pg
, cpu_reg_sp(s
, a
->rn
));
2686 static bool trans_CPY_m_v(DisasContext
*s
, arg_rpr_esz
*a
)
2688 if (sve_access_check(s
)) {
2689 int ofs
= vec_reg_offset(s
, a
->rn
, 0, a
->esz
);
2690 TCGv_i64 t
= load_esz(cpu_env
, ofs
, a
->esz
);
2691 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rd
, a
->pg
, t
);
2692 tcg_temp_free_i64(t
);
2697 static bool trans_REVB(DisasContext
*s
, arg_rpr_esz
*a
)
2699 static gen_helper_gvec_3
* const fns
[4] = {
2701 gen_helper_sve_revb_h
,
2702 gen_helper_sve_revb_s
,
2703 gen_helper_sve_revb_d
,
2705 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2708 static bool trans_REVH(DisasContext
*s
, arg_rpr_esz
*a
)
2710 static gen_helper_gvec_3
* const fns
[4] = {
2713 gen_helper_sve_revh_s
,
2714 gen_helper_sve_revh_d
,
2716 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2719 static bool trans_REVW(DisasContext
*s
, arg_rpr_esz
*a
)
2721 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_revw_d
: NULL
);
2724 static bool trans_RBIT(DisasContext
*s
, arg_rpr_esz
*a
)
2726 static gen_helper_gvec_3
* const fns
[4] = {
2727 gen_helper_sve_rbit_b
,
2728 gen_helper_sve_rbit_h
,
2729 gen_helper_sve_rbit_s
,
2730 gen_helper_sve_rbit_d
,
2732 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2735 static bool trans_SPLICE(DisasContext
*s
, arg_rprr_esz
*a
)
2737 if (sve_access_check(s
)) {
2738 unsigned vsz
= vec_full_reg_size(s
);
2739 tcg_gen_gvec_4_ool(vec_full_reg_offset(s
, a
->rd
),
2740 vec_full_reg_offset(s
, a
->rn
),
2741 vec_full_reg_offset(s
, a
->rm
),
2742 pred_full_reg_offset(s
, a
->pg
),
2743 vsz
, vsz
, a
->esz
, gen_helper_sve_splice
);
2749 *** SVE Integer Compare - Vectors Group
2752 static bool do_ppzz_flags(DisasContext
*s
, arg_rprr_esz
*a
,
2753 gen_helper_gvec_flags_4
*gen_fn
)
2755 TCGv_ptr pd
, zn
, zm
, pg
;
2759 if (gen_fn
== NULL
) {
2762 if (!sve_access_check(s
)) {
2766 vsz
= vec_full_reg_size(s
);
2767 t
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
2768 pd
= tcg_temp_new_ptr();
2769 zn
= tcg_temp_new_ptr();
2770 zm
= tcg_temp_new_ptr();
2771 pg
= tcg_temp_new_ptr();
2773 tcg_gen_addi_ptr(pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2774 tcg_gen_addi_ptr(zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
2775 tcg_gen_addi_ptr(zm
, cpu_env
, vec_full_reg_offset(s
, a
->rm
));
2776 tcg_gen_addi_ptr(pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2778 gen_fn(t
, pd
, zn
, zm
, pg
, t
);
2780 tcg_temp_free_ptr(pd
);
2781 tcg_temp_free_ptr(zn
);
2782 tcg_temp_free_ptr(zm
);
2783 tcg_temp_free_ptr(pg
);
2787 tcg_temp_free_i32(t
);
2791 #define DO_PPZZ(NAME, name) \
2792 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
2794 static gen_helper_gvec_flags_4 * const fns[4] = { \
2795 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2796 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2798 return do_ppzz_flags(s, a, fns[a->esz]); \
2801 DO_PPZZ(CMPEQ
, cmpeq
)
2802 DO_PPZZ(CMPNE
, cmpne
)
2803 DO_PPZZ(CMPGT
, cmpgt
)
2804 DO_PPZZ(CMPGE
, cmpge
)
2805 DO_PPZZ(CMPHI
, cmphi
)
2806 DO_PPZZ(CMPHS
, cmphs
)
2810 #define DO_PPZW(NAME, name) \
2811 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \
2813 static gen_helper_gvec_flags_4 * const fns[4] = { \
2814 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2815 gen_helper_sve_##name##_ppzw_s, NULL \
2817 return do_ppzz_flags(s, a, fns[a->esz]); \
2820 DO_PPZW(CMPEQ
, cmpeq
)
2821 DO_PPZW(CMPNE
, cmpne
)
2822 DO_PPZW(CMPGT
, cmpgt
)
2823 DO_PPZW(CMPGE
, cmpge
)
2824 DO_PPZW(CMPHI
, cmphi
)
2825 DO_PPZW(CMPHS
, cmphs
)
2826 DO_PPZW(CMPLT
, cmplt
)
2827 DO_PPZW(CMPLE
, cmple
)
2828 DO_PPZW(CMPLO
, cmplo
)
2829 DO_PPZW(CMPLS
, cmpls
)
2834 *** SVE Integer Compare - Immediate Groups
2837 static bool do_ppzi_flags(DisasContext
*s
, arg_rpri_esz
*a
,
2838 gen_helper_gvec_flags_3
*gen_fn
)
2840 TCGv_ptr pd
, zn
, pg
;
2844 if (gen_fn
== NULL
) {
2847 if (!sve_access_check(s
)) {
2851 vsz
= vec_full_reg_size(s
);
2852 t
= tcg_const_i32(simd_desc(vsz
, vsz
, a
->imm
));
2853 pd
= tcg_temp_new_ptr();
2854 zn
= tcg_temp_new_ptr();
2855 pg
= tcg_temp_new_ptr();
2857 tcg_gen_addi_ptr(pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2858 tcg_gen_addi_ptr(zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
2859 tcg_gen_addi_ptr(pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2861 gen_fn(t
, pd
, zn
, pg
, t
);
2863 tcg_temp_free_ptr(pd
);
2864 tcg_temp_free_ptr(zn
);
2865 tcg_temp_free_ptr(pg
);
2869 tcg_temp_free_i32(t
);
2873 #define DO_PPZI(NAME, name) \
2874 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \
2876 static gen_helper_gvec_flags_3 * const fns[4] = { \
2877 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2878 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2880 return do_ppzi_flags(s, a, fns[a->esz]); \
2883 DO_PPZI(CMPEQ
, cmpeq
)
2884 DO_PPZI(CMPNE
, cmpne
)
2885 DO_PPZI(CMPGT
, cmpgt
)
2886 DO_PPZI(CMPGE
, cmpge
)
2887 DO_PPZI(CMPHI
, cmphi
)
2888 DO_PPZI(CMPHS
, cmphs
)
2889 DO_PPZI(CMPLT
, cmplt
)
2890 DO_PPZI(CMPLE
, cmple
)
2891 DO_PPZI(CMPLO
, cmplo
)
2892 DO_PPZI(CMPLS
, cmpls
)
2897 *** SVE Partition Break Group
2900 static bool do_brk3(DisasContext
*s
, arg_rprr_s
*a
,
2901 gen_helper_gvec_4
*fn
, gen_helper_gvec_flags_4
*fn_s
)
2903 if (!sve_access_check(s
)) {
2907 unsigned vsz
= pred_full_reg_size(s
);
2909 /* Predicate sizes may be smaller and cannot use simd_desc. */
2910 TCGv_ptr d
= tcg_temp_new_ptr();
2911 TCGv_ptr n
= tcg_temp_new_ptr();
2912 TCGv_ptr m
= tcg_temp_new_ptr();
2913 TCGv_ptr g
= tcg_temp_new_ptr();
2914 TCGv_i32 t
= tcg_const_i32(vsz
- 2);
2916 tcg_gen_addi_ptr(d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2917 tcg_gen_addi_ptr(n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2918 tcg_gen_addi_ptr(m
, cpu_env
, pred_full_reg_offset(s
, a
->rm
));
2919 tcg_gen_addi_ptr(g
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2922 fn_s(t
, d
, n
, m
, g
, t
);
2927 tcg_temp_free_ptr(d
);
2928 tcg_temp_free_ptr(n
);
2929 tcg_temp_free_ptr(m
);
2930 tcg_temp_free_ptr(g
);
2931 tcg_temp_free_i32(t
);
2935 static bool do_brk2(DisasContext
*s
, arg_rpr_s
*a
,
2936 gen_helper_gvec_3
*fn
, gen_helper_gvec_flags_3
*fn_s
)
2938 if (!sve_access_check(s
)) {
2942 unsigned vsz
= pred_full_reg_size(s
);
2944 /* Predicate sizes may be smaller and cannot use simd_desc. */
2945 TCGv_ptr d
= tcg_temp_new_ptr();
2946 TCGv_ptr n
= tcg_temp_new_ptr();
2947 TCGv_ptr g
= tcg_temp_new_ptr();
2948 TCGv_i32 t
= tcg_const_i32(vsz
- 2);
2950 tcg_gen_addi_ptr(d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2951 tcg_gen_addi_ptr(n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2952 tcg_gen_addi_ptr(g
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2955 fn_s(t
, d
, n
, g
, t
);
2960 tcg_temp_free_ptr(d
);
2961 tcg_temp_free_ptr(n
);
2962 tcg_temp_free_ptr(g
);
2963 tcg_temp_free_i32(t
);
2967 static bool trans_BRKPA(DisasContext
*s
, arg_rprr_s
*a
)
2969 return do_brk3(s
, a
, gen_helper_sve_brkpa
, gen_helper_sve_brkpas
);
2972 static bool trans_BRKPB(DisasContext
*s
, arg_rprr_s
*a
)
2974 return do_brk3(s
, a
, gen_helper_sve_brkpb
, gen_helper_sve_brkpbs
);
2977 static bool trans_BRKA_m(DisasContext
*s
, arg_rpr_s
*a
)
2979 return do_brk2(s
, a
, gen_helper_sve_brka_m
, gen_helper_sve_brkas_m
);
2982 static bool trans_BRKB_m(DisasContext
*s
, arg_rpr_s
*a
)
2984 return do_brk2(s
, a
, gen_helper_sve_brkb_m
, gen_helper_sve_brkbs_m
);
2987 static bool trans_BRKA_z(DisasContext
*s
, arg_rpr_s
*a
)
2989 return do_brk2(s
, a
, gen_helper_sve_brka_z
, gen_helper_sve_brkas_z
);
2992 static bool trans_BRKB_z(DisasContext
*s
, arg_rpr_s
*a
)
2994 return do_brk2(s
, a
, gen_helper_sve_brkb_z
, gen_helper_sve_brkbs_z
);
2997 static bool trans_BRKN(DisasContext
*s
, arg_rpr_s
*a
)
2999 return do_brk2(s
, a
, gen_helper_sve_brkn
, gen_helper_sve_brkns
);
3003 *** SVE Predicate Count Group
3006 static void do_cntp(DisasContext
*s
, TCGv_i64 val
, int esz
, int pn
, int pg
)
3008 unsigned psz
= pred_full_reg_size(s
);
3013 tcg_gen_ld_i64(val
, cpu_env
, pred_full_reg_offset(s
, pn
));
3015 TCGv_i64 g
= tcg_temp_new_i64();
3016 tcg_gen_ld_i64(g
, cpu_env
, pred_full_reg_offset(s
, pg
));
3017 tcg_gen_and_i64(val
, val
, g
);
3018 tcg_temp_free_i64(g
);
3021 /* Reduce the pred_esz_masks value simply to reduce the
3022 * size of the code generated here.
3024 psz_mask
= MAKE_64BIT_MASK(0, psz
* 8);
3025 tcg_gen_andi_i64(val
, val
, pred_esz_masks
[esz
] & psz_mask
);
3027 tcg_gen_ctpop_i64(val
, val
);
3029 TCGv_ptr t_pn
= tcg_temp_new_ptr();
3030 TCGv_ptr t_pg
= tcg_temp_new_ptr();
3035 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, esz
);
3037 tcg_gen_addi_ptr(t_pn
, cpu_env
, pred_full_reg_offset(s
, pn
));
3038 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
3039 t_desc
= tcg_const_i32(desc
);
3041 gen_helper_sve_cntp(val
, t_pn
, t_pg
, t_desc
);
3042 tcg_temp_free_ptr(t_pn
);
3043 tcg_temp_free_ptr(t_pg
);
3044 tcg_temp_free_i32(t_desc
);
3048 static bool trans_CNTP(DisasContext
*s
, arg_CNTP
*a
)
3050 if (sve_access_check(s
)) {
3051 do_cntp(s
, cpu_reg(s
, a
->rd
), a
->esz
, a
->rn
, a
->pg
);
3056 static bool trans_INCDECP_r(DisasContext
*s
, arg_incdec_pred
*a
)
3058 if (sve_access_check(s
)) {
3059 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
3060 TCGv_i64 val
= tcg_temp_new_i64();
3062 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3064 tcg_gen_sub_i64(reg
, reg
, val
);
3066 tcg_gen_add_i64(reg
, reg
, val
);
3068 tcg_temp_free_i64(val
);
3073 static bool trans_INCDECP_z(DisasContext
*s
, arg_incdec2_pred
*a
)
3078 if (sve_access_check(s
)) {
3079 unsigned vsz
= vec_full_reg_size(s
);
3080 TCGv_i64 val
= tcg_temp_new_i64();
3081 GVecGen2sFn
*gvec_fn
= a
->d
? tcg_gen_gvec_subs
: tcg_gen_gvec_adds
;
3083 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3084 gvec_fn(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
3085 vec_full_reg_offset(s
, a
->rn
), val
, vsz
, vsz
);
3090 static bool trans_SINCDECP_r_32(DisasContext
*s
, arg_incdec_pred
*a
)
3092 if (sve_access_check(s
)) {
3093 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
3094 TCGv_i64 val
= tcg_temp_new_i64();
3096 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3097 do_sat_addsub_32(reg
, val
, a
->u
, a
->d
);
3102 static bool trans_SINCDECP_r_64(DisasContext
*s
, arg_incdec_pred
*a
)
3104 if (sve_access_check(s
)) {
3105 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
3106 TCGv_i64 val
= tcg_temp_new_i64();
3108 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3109 do_sat_addsub_64(reg
, val
, a
->u
, a
->d
);
3114 static bool trans_SINCDECP_z(DisasContext
*s
, arg_incdec2_pred
*a
)
3119 if (sve_access_check(s
)) {
3120 TCGv_i64 val
= tcg_temp_new_i64();
3121 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3122 do_sat_addsub_vec(s
, a
->esz
, a
->rd
, a
->rn
, val
, a
->u
, a
->d
);
3128 *** SVE Integer Compare Scalars Group
3131 static bool trans_CTERM(DisasContext
*s
, arg_CTERM
*a
)
3133 if (!sve_access_check(s
)) {
3137 TCGCond cond
= (a
->ne
? TCG_COND_NE
: TCG_COND_EQ
);
3138 TCGv_i64 rn
= read_cpu_reg(s
, a
->rn
, a
->sf
);
3139 TCGv_i64 rm
= read_cpu_reg(s
, a
->rm
, a
->sf
);
3140 TCGv_i64 cmp
= tcg_temp_new_i64();
3142 tcg_gen_setcond_i64(cond
, cmp
, rn
, rm
);
3143 tcg_gen_extrl_i64_i32(cpu_NF
, cmp
);
3144 tcg_temp_free_i64(cmp
);
3146 /* VF = !NF & !CF. */
3147 tcg_gen_xori_i32(cpu_VF
, cpu_NF
, 1);
3148 tcg_gen_andc_i32(cpu_VF
, cpu_VF
, cpu_CF
);
3150 /* Both NF and VF actually look at bit 31. */
3151 tcg_gen_neg_i32(cpu_NF
, cpu_NF
);
3152 tcg_gen_neg_i32(cpu_VF
, cpu_VF
);
3156 static bool trans_WHILE(DisasContext
*s
, arg_WHILE
*a
)
3158 TCGv_i64 op0
, op1
, t0
, t1
, tmax
;
3161 unsigned desc
, vsz
= vec_full_reg_size(s
);
3164 if (!sve_access_check(s
)) {
3168 op0
= read_cpu_reg(s
, a
->rn
, 1);
3169 op1
= read_cpu_reg(s
, a
->rm
, 1);
3173 tcg_gen_ext32u_i64(op0
, op0
);
3174 tcg_gen_ext32u_i64(op1
, op1
);
3176 tcg_gen_ext32s_i64(op0
, op0
);
3177 tcg_gen_ext32s_i64(op1
, op1
);
3181 /* For the helper, compress the different conditions into a computation
3182 * of how many iterations for which the condition is true.
3184 t0
= tcg_temp_new_i64();
3185 t1
= tcg_temp_new_i64();
3186 tcg_gen_sub_i64(t0
, op1
, op0
);
3188 tmax
= tcg_const_i64(vsz
>> a
->esz
);
3190 /* Equality means one more iteration. */
3191 tcg_gen_addi_i64(t0
, t0
, 1);
3193 /* If op1 is max (un)signed integer (and the only time the addition
3194 * above could overflow), then we produce an all-true predicate by
3195 * setting the count to the vector length. This is because the
3196 * pseudocode is described as an increment + compare loop, and the
3197 * max integer would always compare true.
3199 tcg_gen_movi_i64(t1
, (a
->sf
3200 ? (a
->u
? UINT64_MAX
: INT64_MAX
)
3201 : (a
->u
? UINT32_MAX
: INT32_MAX
)));
3202 tcg_gen_movcond_i64(TCG_COND_EQ
, t0
, op1
, t1
, tmax
, t0
);
3205 /* Bound to the maximum. */
3206 tcg_gen_umin_i64(t0
, t0
, tmax
);
3207 tcg_temp_free_i64(tmax
);
3209 /* Set the count to zero if the condition is false. */
3211 ? (a
->eq
? TCG_COND_LEU
: TCG_COND_LTU
)
3212 : (a
->eq
? TCG_COND_LE
: TCG_COND_LT
));
3213 tcg_gen_movi_i64(t1
, 0);
3214 tcg_gen_movcond_i64(cond
, t0
, op0
, op1
, t0
, t1
);
3215 tcg_temp_free_i64(t1
);
3217 /* Since we're bounded, pass as a 32-bit type. */
3218 t2
= tcg_temp_new_i32();
3219 tcg_gen_extrl_i64_i32(t2
, t0
);
3220 tcg_temp_free_i64(t0
);
3222 /* Scale elements to bits. */
3223 tcg_gen_shli_i32(t2
, t2
, a
->esz
);
3225 desc
= (vsz
/ 8) - 2;
3226 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
3227 t3
= tcg_const_i32(desc
);
3229 ptr
= tcg_temp_new_ptr();
3230 tcg_gen_addi_ptr(ptr
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
3232 gen_helper_sve_while(t2
, ptr
, t2
, t3
);
3235 tcg_temp_free_ptr(ptr
);
3236 tcg_temp_free_i32(t2
);
3237 tcg_temp_free_i32(t3
);
3242 *** SVE Integer Wide Immediate - Unpredicated Group
3245 static bool trans_FDUP(DisasContext
*s
, arg_FDUP
*a
)
3250 if (sve_access_check(s
)) {
3251 unsigned vsz
= vec_full_reg_size(s
);
3252 int dofs
= vec_full_reg_offset(s
, a
->rd
);
3255 /* Decode the VFP immediate. */
3256 imm
= vfp_expand_imm(a
->esz
, a
->imm
);
3257 imm
= dup_const(a
->esz
, imm
);
3259 tcg_gen_gvec_dup64i(dofs
, vsz
, vsz
, imm
);
3264 static bool trans_DUP_i(DisasContext
*s
, arg_DUP_i
*a
)
3266 if (a
->esz
== 0 && extract32(s
->insn
, 13, 1)) {
3269 if (sve_access_check(s
)) {
3270 unsigned vsz
= vec_full_reg_size(s
);
3271 int dofs
= vec_full_reg_offset(s
, a
->rd
);
3273 tcg_gen_gvec_dup64i(dofs
, vsz
, vsz
, dup_const(a
->esz
, a
->imm
));
3278 static bool trans_ADD_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3280 if (a
->esz
== 0 && extract32(s
->insn
, 13, 1)) {
3283 if (sve_access_check(s
)) {
3284 unsigned vsz
= vec_full_reg_size(s
);
3285 tcg_gen_gvec_addi(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
3286 vec_full_reg_offset(s
, a
->rn
), a
->imm
, vsz
, vsz
);
3291 static bool trans_SUB_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3294 return trans_ADD_zzi(s
, a
);
3297 static bool trans_SUBR_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3299 static const GVecGen2s op
[4] = {
3300 { .fni8
= tcg_gen_vec_sub8_i64
,
3301 .fniv
= tcg_gen_sub_vec
,
3302 .fno
= gen_helper_sve_subri_b
,
3303 .opc
= INDEX_op_sub_vec
,
3305 .scalar_first
= true },
3306 { .fni8
= tcg_gen_vec_sub16_i64
,
3307 .fniv
= tcg_gen_sub_vec
,
3308 .fno
= gen_helper_sve_subri_h
,
3309 .opc
= INDEX_op_sub_vec
,
3311 .scalar_first
= true },
3312 { .fni4
= tcg_gen_sub_i32
,
3313 .fniv
= tcg_gen_sub_vec
,
3314 .fno
= gen_helper_sve_subri_s
,
3315 .opc
= INDEX_op_sub_vec
,
3317 .scalar_first
= true },
3318 { .fni8
= tcg_gen_sub_i64
,
3319 .fniv
= tcg_gen_sub_vec
,
3320 .fno
= gen_helper_sve_subri_d
,
3321 .opc
= INDEX_op_sub_vec
,
3322 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
3324 .scalar_first
= true }
3327 if (a
->esz
== 0 && extract32(s
->insn
, 13, 1)) {
3330 if (sve_access_check(s
)) {
3331 unsigned vsz
= vec_full_reg_size(s
);
3332 TCGv_i64 c
= tcg_const_i64(a
->imm
);
3333 tcg_gen_gvec_2s(vec_full_reg_offset(s
, a
->rd
),
3334 vec_full_reg_offset(s
, a
->rn
),
3335 vsz
, vsz
, c
, &op
[a
->esz
]);
3336 tcg_temp_free_i64(c
);
3341 static bool trans_MUL_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3343 if (sve_access_check(s
)) {
3344 unsigned vsz
= vec_full_reg_size(s
);
3345 tcg_gen_gvec_muli(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
3346 vec_full_reg_offset(s
, a
->rn
), a
->imm
, vsz
, vsz
);
3351 static bool do_zzi_sat(DisasContext
*s
, arg_rri_esz
*a
, bool u
, bool d
)
3353 if (a
->esz
== 0 && extract32(s
->insn
, 13, 1)) {
3356 if (sve_access_check(s
)) {
3357 TCGv_i64 val
= tcg_const_i64(a
->imm
);
3358 do_sat_addsub_vec(s
, a
->esz
, a
->rd
, a
->rn
, val
, u
, d
);
3359 tcg_temp_free_i64(val
);
3364 static bool trans_SQADD_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3366 return do_zzi_sat(s
, a
, false, false);
3369 static bool trans_UQADD_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3371 return do_zzi_sat(s
, a
, true, false);
3374 static bool trans_SQSUB_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3376 return do_zzi_sat(s
, a
, false, true);
3379 static bool trans_UQSUB_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3381 return do_zzi_sat(s
, a
, true, true);
3384 static bool do_zzi_ool(DisasContext
*s
, arg_rri_esz
*a
, gen_helper_gvec_2i
*fn
)
3386 if (sve_access_check(s
)) {
3387 unsigned vsz
= vec_full_reg_size(s
);
3388 TCGv_i64 c
= tcg_const_i64(a
->imm
);
3390 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s
, a
->rd
),
3391 vec_full_reg_offset(s
, a
->rn
),
3392 c
, vsz
, vsz
, 0, fn
);
3393 tcg_temp_free_i64(c
);
3398 #define DO_ZZI(NAME, name) \
3399 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a) \
3401 static gen_helper_gvec_2i * const fns[4] = { \
3402 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3403 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3405 return do_zzi_ool(s, a, fns[a->esz]); \
3415 static bool trans_DOT_zzz(DisasContext
*s
, arg_DOT_zzz
*a
)
3417 static gen_helper_gvec_3
* const fns
[2][2] = {
3418 { gen_helper_gvec_sdot_b
, gen_helper_gvec_sdot_h
},
3419 { gen_helper_gvec_udot_b
, gen_helper_gvec_udot_h
}
3422 if (sve_access_check(s
)) {
3423 unsigned vsz
= vec_full_reg_size(s
);
3424 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
3425 vec_full_reg_offset(s
, a
->rn
),
3426 vec_full_reg_offset(s
, a
->rm
),
3427 vsz
, vsz
, 0, fns
[a
->u
][a
->sz
]);
3432 static bool trans_DOT_zzx(DisasContext
*s
, arg_DOT_zzx
*a
)
3434 static gen_helper_gvec_3
* const fns
[2][2] = {
3435 { gen_helper_gvec_sdot_idx_b
, gen_helper_gvec_sdot_idx_h
},
3436 { gen_helper_gvec_udot_idx_b
, gen_helper_gvec_udot_idx_h
}
3439 if (sve_access_check(s
)) {
3440 unsigned vsz
= vec_full_reg_size(s
);
3441 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
3442 vec_full_reg_offset(s
, a
->rn
),
3443 vec_full_reg_offset(s
, a
->rm
),
3444 vsz
, vsz
, a
->index
, fns
[a
->u
][a
->sz
]);
3451 *** SVE Floating Point Multiply-Add Indexed Group
3454 static bool trans_FMLA_zzxz(DisasContext
*s
, arg_FMLA_zzxz
*a
)
3456 static gen_helper_gvec_4_ptr
* const fns
[3] = {
3457 gen_helper_gvec_fmla_idx_h
,
3458 gen_helper_gvec_fmla_idx_s
,
3459 gen_helper_gvec_fmla_idx_d
,
3462 if (sve_access_check(s
)) {
3463 unsigned vsz
= vec_full_reg_size(s
);
3464 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3465 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s
, a
->rd
),
3466 vec_full_reg_offset(s
, a
->rn
),
3467 vec_full_reg_offset(s
, a
->rm
),
3468 vec_full_reg_offset(s
, a
->ra
),
3469 status
, vsz
, vsz
, (a
->index
<< 1) | a
->sub
,
3471 tcg_temp_free_ptr(status
);
3477 *** SVE Floating Point Multiply Indexed Group
3480 static bool trans_FMUL_zzx(DisasContext
*s
, arg_FMUL_zzx
*a
)
3482 static gen_helper_gvec_3_ptr
* const fns
[3] = {
3483 gen_helper_gvec_fmul_idx_h
,
3484 gen_helper_gvec_fmul_idx_s
,
3485 gen_helper_gvec_fmul_idx_d
,
3488 if (sve_access_check(s
)) {
3489 unsigned vsz
= vec_full_reg_size(s
);
3490 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3491 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, a
->rd
),
3492 vec_full_reg_offset(s
, a
->rn
),
3493 vec_full_reg_offset(s
, a
->rm
),
3494 status
, vsz
, vsz
, a
->index
, fns
[a
->esz
- 1]);
3495 tcg_temp_free_ptr(status
);
3501 *** SVE Floating Point Fast Reduction Group
3504 typedef void gen_helper_fp_reduce(TCGv_i64
, TCGv_ptr
, TCGv_ptr
,
3505 TCGv_ptr
, TCGv_i32
);
3507 static void do_reduce(DisasContext
*s
, arg_rpr_esz
*a
,
3508 gen_helper_fp_reduce
*fn
)
3510 unsigned vsz
= vec_full_reg_size(s
);
3511 unsigned p2vsz
= pow2ceil(vsz
);
3512 TCGv_i32 t_desc
= tcg_const_i32(simd_desc(vsz
, p2vsz
, 0));
3513 TCGv_ptr t_zn
, t_pg
, status
;
3516 temp
= tcg_temp_new_i64();
3517 t_zn
= tcg_temp_new_ptr();
3518 t_pg
= tcg_temp_new_ptr();
3520 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
3521 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
3522 status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3524 fn(temp
, t_zn
, t_pg
, status
, t_desc
);
3525 tcg_temp_free_ptr(t_zn
);
3526 tcg_temp_free_ptr(t_pg
);
3527 tcg_temp_free_ptr(status
);
3528 tcg_temp_free_i32(t_desc
);
3530 write_fp_dreg(s
, a
->rd
, temp
);
3531 tcg_temp_free_i64(temp
);
3534 #define DO_VPZ(NAME, name) \
3535 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
3537 static gen_helper_fp_reduce * const fns[3] = { \
3538 gen_helper_sve_##name##_h, \
3539 gen_helper_sve_##name##_s, \
3540 gen_helper_sve_##name##_d, \
3542 if (a->esz == 0) { \
3545 if (sve_access_check(s)) { \
3546 do_reduce(s, a, fns[a->esz - 1]); \
3551 DO_VPZ(FADDV
, faddv
)
3552 DO_VPZ(FMINNMV
, fminnmv
)
3553 DO_VPZ(FMAXNMV
, fmaxnmv
)
3554 DO_VPZ(FMINV
, fminv
)
3555 DO_VPZ(FMAXV
, fmaxv
)
3558 *** SVE Floating Point Unary Operations - Unpredicated Group
3561 static void do_zz_fp(DisasContext
*s
, arg_rr_esz
*a
, gen_helper_gvec_2_ptr
*fn
)
3563 unsigned vsz
= vec_full_reg_size(s
);
3564 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3566 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s
, a
->rd
),
3567 vec_full_reg_offset(s
, a
->rn
),
3568 status
, vsz
, vsz
, 0, fn
);
3569 tcg_temp_free_ptr(status
);
3572 static bool trans_FRECPE(DisasContext
*s
, arg_rr_esz
*a
)
3574 static gen_helper_gvec_2_ptr
* const fns
[3] = {
3575 gen_helper_gvec_frecpe_h
,
3576 gen_helper_gvec_frecpe_s
,
3577 gen_helper_gvec_frecpe_d
,
3582 if (sve_access_check(s
)) {
3583 do_zz_fp(s
, a
, fns
[a
->esz
- 1]);
3588 static bool trans_FRSQRTE(DisasContext
*s
, arg_rr_esz
*a
)
3590 static gen_helper_gvec_2_ptr
* const fns
[3] = {
3591 gen_helper_gvec_frsqrte_h
,
3592 gen_helper_gvec_frsqrte_s
,
3593 gen_helper_gvec_frsqrte_d
,
3598 if (sve_access_check(s
)) {
3599 do_zz_fp(s
, a
, fns
[a
->esz
- 1]);
3605 *** SVE Floating Point Compare with Zero Group
3608 static void do_ppz_fp(DisasContext
*s
, arg_rpr_esz
*a
,
3609 gen_helper_gvec_3_ptr
*fn
)
3611 unsigned vsz
= vec_full_reg_size(s
);
3612 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3614 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s
, a
->rd
),
3615 vec_full_reg_offset(s
, a
->rn
),
3616 pred_full_reg_offset(s
, a
->pg
),
3617 status
, vsz
, vsz
, 0, fn
);
3618 tcg_temp_free_ptr(status
);
3621 #define DO_PPZ(NAME, name) \
3622 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
3624 static gen_helper_gvec_3_ptr * const fns[3] = { \
3625 gen_helper_sve_##name##_h, \
3626 gen_helper_sve_##name##_s, \
3627 gen_helper_sve_##name##_d, \
3629 if (a->esz == 0) { \
3632 if (sve_access_check(s)) { \
3633 do_ppz_fp(s, a, fns[a->esz - 1]); \
3638 DO_PPZ(FCMGE_ppz0
, fcmge0
)
3639 DO_PPZ(FCMGT_ppz0
, fcmgt0
)
3640 DO_PPZ(FCMLE_ppz0
, fcmle0
)
3641 DO_PPZ(FCMLT_ppz0
, fcmlt0
)
3642 DO_PPZ(FCMEQ_ppz0
, fcmeq0
)
3643 DO_PPZ(FCMNE_ppz0
, fcmne0
)
3648 *** SVE floating-point trig multiply-add coefficient
3651 static bool trans_FTMAD(DisasContext
*s
, arg_FTMAD
*a
)
3653 static gen_helper_gvec_3_ptr
* const fns
[3] = {
3654 gen_helper_sve_ftmad_h
,
3655 gen_helper_sve_ftmad_s
,
3656 gen_helper_sve_ftmad_d
,
3662 if (sve_access_check(s
)) {
3663 unsigned vsz
= vec_full_reg_size(s
);
3664 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3665 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, a
->rd
),
3666 vec_full_reg_offset(s
, a
->rn
),
3667 vec_full_reg_offset(s
, a
->rm
),
3668 status
, vsz
, vsz
, a
->imm
, fns
[a
->esz
- 1]);
3669 tcg_temp_free_ptr(status
);
3675 *** SVE Floating Point Accumulating Reduction Group
3678 static bool trans_FADDA(DisasContext
*s
, arg_rprr_esz
*a
)
3680 typedef void fadda_fn(TCGv_i64
, TCGv_i64
, TCGv_ptr
,
3681 TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
3682 static fadda_fn
* const fns
[3] = {
3683 gen_helper_sve_fadda_h
,
3684 gen_helper_sve_fadda_s
,
3685 gen_helper_sve_fadda_d
,
3687 unsigned vsz
= vec_full_reg_size(s
);
3688 TCGv_ptr t_rm
, t_pg
, t_fpst
;
3695 if (!sve_access_check(s
)) {
3699 t_val
= load_esz(cpu_env
, vec_reg_offset(s
, a
->rn
, 0, a
->esz
), a
->esz
);
3700 t_rm
= tcg_temp_new_ptr();
3701 t_pg
= tcg_temp_new_ptr();
3702 tcg_gen_addi_ptr(t_rm
, cpu_env
, vec_full_reg_offset(s
, a
->rm
));
3703 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
3704 t_fpst
= get_fpstatus_ptr(a
->esz
== MO_16
);
3705 t_desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
3707 fns
[a
->esz
- 1](t_val
, t_val
, t_rm
, t_pg
, t_fpst
, t_desc
);
3709 tcg_temp_free_i32(t_desc
);
3710 tcg_temp_free_ptr(t_fpst
);
3711 tcg_temp_free_ptr(t_pg
);
3712 tcg_temp_free_ptr(t_rm
);
3714 write_fp_dreg(s
, a
->rd
, t_val
);
3715 tcg_temp_free_i64(t_val
);
3720 *** SVE Floating Point Arithmetic - Unpredicated Group
3723 static bool do_zzz_fp(DisasContext
*s
, arg_rrr_esz
*a
,
3724 gen_helper_gvec_3_ptr
*fn
)
3729 if (sve_access_check(s
)) {
3730 unsigned vsz
= vec_full_reg_size(s
);
3731 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3732 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, a
->rd
),
3733 vec_full_reg_offset(s
, a
->rn
),
3734 vec_full_reg_offset(s
, a
->rm
),
3735 status
, vsz
, vsz
, 0, fn
);
3736 tcg_temp_free_ptr(status
);
3742 #define DO_FP3(NAME, name) \
3743 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
3745 static gen_helper_gvec_3_ptr * const fns[4] = { \
3746 NULL, gen_helper_gvec_##name##_h, \
3747 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3749 return do_zzz_fp(s, a, fns[a->esz]); \
3752 DO_FP3(FADD_zzz
, fadd
)
3753 DO_FP3(FSUB_zzz
, fsub
)
3754 DO_FP3(FMUL_zzz
, fmul
)
3755 DO_FP3(FTSMUL
, ftsmul
)
3756 DO_FP3(FRECPS
, recps
)
3757 DO_FP3(FRSQRTS
, rsqrts
)
3762 *** SVE Floating Point Arithmetic - Predicated Group
3765 static bool do_zpzz_fp(DisasContext
*s
, arg_rprr_esz
*a
,
3766 gen_helper_gvec_4_ptr
*fn
)
3771 if (sve_access_check(s
)) {
3772 unsigned vsz
= vec_full_reg_size(s
);
3773 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3774 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s
, a
->rd
),
3775 vec_full_reg_offset(s
, a
->rn
),
3776 vec_full_reg_offset(s
, a
->rm
),
3777 pred_full_reg_offset(s
, a
->pg
),
3778 status
, vsz
, vsz
, 0, fn
);
3779 tcg_temp_free_ptr(status
);
3784 #define DO_FP3(NAME, name) \
3785 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
3787 static gen_helper_gvec_4_ptr * const fns[4] = { \
3788 NULL, gen_helper_sve_##name##_h, \
3789 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3791 return do_zpzz_fp(s, a, fns[a->esz]); \
3794 DO_FP3(FADD_zpzz
, fadd
)
3795 DO_FP3(FSUB_zpzz
, fsub
)
3796 DO_FP3(FMUL_zpzz
, fmul
)
3797 DO_FP3(FMIN_zpzz
, fmin
)
3798 DO_FP3(FMAX_zpzz
, fmax
)
3799 DO_FP3(FMINNM_zpzz
, fminnum
)
3800 DO_FP3(FMAXNM_zpzz
, fmaxnum
)
3802 DO_FP3(FSCALE
, fscalbn
)
3804 DO_FP3(FMULX
, fmulx
)
3808 typedef void gen_helper_sve_fp2scalar(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
,
3809 TCGv_i64
, TCGv_ptr
, TCGv_i32
);
3811 static void do_fp_scalar(DisasContext
*s
, int zd
, int zn
, int pg
, bool is_fp16
,
3812 TCGv_i64 scalar
, gen_helper_sve_fp2scalar
*fn
)
3814 unsigned vsz
= vec_full_reg_size(s
);
3815 TCGv_ptr t_zd
, t_zn
, t_pg
, status
;
3818 t_zd
= tcg_temp_new_ptr();
3819 t_zn
= tcg_temp_new_ptr();
3820 t_pg
= tcg_temp_new_ptr();
3821 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, zd
));
3822 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, zn
));
3823 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
3825 status
= get_fpstatus_ptr(is_fp16
);
3826 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
3827 fn(t_zd
, t_zn
, t_pg
, scalar
, status
, desc
);
3829 tcg_temp_free_i32(desc
);
3830 tcg_temp_free_ptr(status
);
3831 tcg_temp_free_ptr(t_pg
);
3832 tcg_temp_free_ptr(t_zn
);
3833 tcg_temp_free_ptr(t_zd
);
3836 static void do_fp_imm(DisasContext
*s
, arg_rpri_esz
*a
, uint64_t imm
,
3837 gen_helper_sve_fp2scalar
*fn
)
3839 TCGv_i64 temp
= tcg_const_i64(imm
);
3840 do_fp_scalar(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
== MO_16
, temp
, fn
);
3841 tcg_temp_free_i64(temp
);
3844 #define DO_FP_IMM(NAME, name, const0, const1) \
3845 static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
3847 static gen_helper_sve_fp2scalar * const fns[3] = { \
3848 gen_helper_sve_##name##_h, \
3849 gen_helper_sve_##name##_s, \
3850 gen_helper_sve_##name##_d \
3852 static uint64_t const val[3][2] = { \
3853 { float16_##const0, float16_##const1 }, \
3854 { float32_##const0, float32_##const1 }, \
3855 { float64_##const0, float64_##const1 }, \
3857 if (a->esz == 0) { \
3860 if (sve_access_check(s)) { \
3861 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
3866 #define float16_two make_float16(0x4000)
3867 #define float32_two make_float32(0x40000000)
3868 #define float64_two make_float64(0x4000000000000000ULL)
3870 DO_FP_IMM(FADD
, fadds
, half
, one
)
3871 DO_FP_IMM(FSUB
, fsubs
, half
, one
)
3872 DO_FP_IMM(FMUL
, fmuls
, half
, two
)
3873 DO_FP_IMM(FSUBR
, fsubrs
, half
, one
)
3874 DO_FP_IMM(FMAXNM
, fmaxnms
, zero
, one
)
3875 DO_FP_IMM(FMINNM
, fminnms
, zero
, one
)
3876 DO_FP_IMM(FMAX
, fmaxs
, zero
, one
)
3877 DO_FP_IMM(FMIN
, fmins
, zero
, one
)
3881 static bool do_fp_cmp(DisasContext
*s
, arg_rprr_esz
*a
,
3882 gen_helper_gvec_4_ptr
*fn
)
3887 if (sve_access_check(s
)) {
3888 unsigned vsz
= vec_full_reg_size(s
);
3889 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3890 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s
, a
->rd
),
3891 vec_full_reg_offset(s
, a
->rn
),
3892 vec_full_reg_offset(s
, a
->rm
),
3893 pred_full_reg_offset(s
, a
->pg
),
3894 status
, vsz
, vsz
, 0, fn
);
3895 tcg_temp_free_ptr(status
);
3900 #define DO_FPCMP(NAME, name) \
3901 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
3903 static gen_helper_gvec_4_ptr * const fns[4] = { \
3904 NULL, gen_helper_sve_##name##_h, \
3905 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3907 return do_fp_cmp(s, a, fns[a->esz]); \
3910 DO_FPCMP(FCMGE
, fcmge
)
3911 DO_FPCMP(FCMGT
, fcmgt
)
3912 DO_FPCMP(FCMEQ
, fcmeq
)
3913 DO_FPCMP(FCMNE
, fcmne
)
3914 DO_FPCMP(FCMUO
, fcmuo
)
3915 DO_FPCMP(FACGE
, facge
)
3916 DO_FPCMP(FACGT
, facgt
)
3920 static bool trans_FCADD(DisasContext
*s
, arg_FCADD
*a
)
3922 static gen_helper_gvec_4_ptr
* const fns
[3] = {
3923 gen_helper_sve_fcadd_h
,
3924 gen_helper_sve_fcadd_s
,
3925 gen_helper_sve_fcadd_d
3931 if (sve_access_check(s
)) {
3932 unsigned vsz
= vec_full_reg_size(s
);
3933 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3934 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s
, a
->rd
),
3935 vec_full_reg_offset(s
, a
->rn
),
3936 vec_full_reg_offset(s
, a
->rm
),
3937 pred_full_reg_offset(s
, a
->pg
),
3938 status
, vsz
, vsz
, a
->rot
, fns
[a
->esz
- 1]);
3939 tcg_temp_free_ptr(status
);
3944 typedef void gen_helper_sve_fmla(TCGv_env
, TCGv_ptr
, TCGv_i32
);
3946 static bool do_fmla(DisasContext
*s
, arg_rprrr_esz
*a
, gen_helper_sve_fmla
*fn
)
3951 if (!sve_access_check(s
)) {
3955 unsigned vsz
= vec_full_reg_size(s
);
3958 TCGv_ptr pg
= tcg_temp_new_ptr();
3960 /* We would need 7 operands to pass these arguments "properly".
3961 * So we encode all the register numbers into the descriptor.
3963 desc
= deposit32(a
->rd
, 5, 5, a
->rn
);
3964 desc
= deposit32(desc
, 10, 5, a
->rm
);
3965 desc
= deposit32(desc
, 15, 5, a
->ra
);
3966 desc
= simd_desc(vsz
, vsz
, desc
);
3968 t_desc
= tcg_const_i32(desc
);
3969 tcg_gen_addi_ptr(pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
3970 fn(cpu_env
, pg
, t_desc
);
3971 tcg_temp_free_i32(t_desc
);
3972 tcg_temp_free_ptr(pg
);
3976 #define DO_FMLA(NAME, name) \
3977 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
3979 static gen_helper_sve_fmla * const fns[4] = { \
3980 NULL, gen_helper_sve_##name##_h, \
3981 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3983 return do_fmla(s, a, fns[a->esz]); \
3986 DO_FMLA(FMLA_zpzzz
, fmla_zpzzz
)
3987 DO_FMLA(FMLS_zpzzz
, fmls_zpzzz
)
3988 DO_FMLA(FNMLA_zpzzz
, fnmla_zpzzz
)
3989 DO_FMLA(FNMLS_zpzzz
, fnmls_zpzzz
)
3993 static bool trans_FCMLA_zpzzz(DisasContext
*s
, arg_FCMLA_zpzzz
*a
)
3995 static gen_helper_sve_fmla
* const fns
[3] = {
3996 gen_helper_sve_fcmla_zpzzz_h
,
3997 gen_helper_sve_fcmla_zpzzz_s
,
3998 gen_helper_sve_fcmla_zpzzz_d
,
4004 if (sve_access_check(s
)) {
4005 unsigned vsz
= vec_full_reg_size(s
);
4008 TCGv_ptr pg
= tcg_temp_new_ptr();
4010 /* We would need 7 operands to pass these arguments "properly".
4011 * So we encode all the register numbers into the descriptor.
4013 desc
= deposit32(a
->rd
, 5, 5, a
->rn
);
4014 desc
= deposit32(desc
, 10, 5, a
->rm
);
4015 desc
= deposit32(desc
, 15, 5, a
->ra
);
4016 desc
= deposit32(desc
, 20, 2, a
->rot
);
4017 desc
= sextract32(desc
, 0, 22);
4018 desc
= simd_desc(vsz
, vsz
, desc
);
4020 t_desc
= tcg_const_i32(desc
);
4021 tcg_gen_addi_ptr(pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
4022 fns
[a
->esz
- 1](cpu_env
, pg
, t_desc
);
4023 tcg_temp_free_i32(t_desc
);
4024 tcg_temp_free_ptr(pg
);
4029 static bool trans_FCMLA_zzxz(DisasContext
*s
, arg_FCMLA_zzxz
*a
)
4031 static gen_helper_gvec_3_ptr
* const fns
[2] = {
4032 gen_helper_gvec_fcmlah_idx
,
4033 gen_helper_gvec_fcmlas_idx
,
4036 tcg_debug_assert(a
->esz
== 1 || a
->esz
== 2);
4037 tcg_debug_assert(a
->rd
== a
->ra
);
4038 if (sve_access_check(s
)) {
4039 unsigned vsz
= vec_full_reg_size(s
);
4040 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
4041 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, a
->rd
),
4042 vec_full_reg_offset(s
, a
->rn
),
4043 vec_full_reg_offset(s
, a
->rm
),
4045 a
->index
* 4 + a
->rot
,
4047 tcg_temp_free_ptr(status
);
4053 *** SVE Floating Point Unary Operations Predicated Group
4056 static bool do_zpz_ptr(DisasContext
*s
, int rd
, int rn
, int pg
,
4057 bool is_fp16
, gen_helper_gvec_3_ptr
*fn
)
4059 if (sve_access_check(s
)) {
4060 unsigned vsz
= vec_full_reg_size(s
);
4061 TCGv_ptr status
= get_fpstatus_ptr(is_fp16
);
4062 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, rd
),
4063 vec_full_reg_offset(s
, rn
),
4064 pred_full_reg_offset(s
, pg
),
4065 status
, vsz
, vsz
, 0, fn
);
4066 tcg_temp_free_ptr(status
);
4071 static bool trans_FCVT_sh(DisasContext
*s
, arg_rpr_esz
*a
)
4073 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_sh
);
4076 static bool trans_FCVT_hs(DisasContext
*s
, arg_rpr_esz
*a
)
4078 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_hs
);
4081 static bool trans_FCVT_dh(DisasContext
*s
, arg_rpr_esz
*a
)
4083 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_dh
);
4086 static bool trans_FCVT_hd(DisasContext
*s
, arg_rpr_esz
*a
)
4088 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_hd
);
4091 static bool trans_FCVT_ds(DisasContext
*s
, arg_rpr_esz
*a
)
4093 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_ds
);
4096 static bool trans_FCVT_sd(DisasContext
*s
, arg_rpr_esz
*a
)
4098 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_sd
);
4101 static bool trans_FCVTZS_hh(DisasContext
*s
, arg_rpr_esz
*a
)
4103 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzs_hh
);
4106 static bool trans_FCVTZU_hh(DisasContext
*s
, arg_rpr_esz
*a
)
4108 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzu_hh
);
4111 static bool trans_FCVTZS_hs(DisasContext
*s
, arg_rpr_esz
*a
)
4113 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzs_hs
);
4116 static bool trans_FCVTZU_hs(DisasContext
*s
, arg_rpr_esz
*a
)
4118 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzu_hs
);
4121 static bool trans_FCVTZS_hd(DisasContext
*s
, arg_rpr_esz
*a
)
4123 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzs_hd
);
4126 static bool trans_FCVTZU_hd(DisasContext
*s
, arg_rpr_esz
*a
)
4128 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzu_hd
);
4131 static bool trans_FCVTZS_ss(DisasContext
*s
, arg_rpr_esz
*a
)
4133 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzs_ss
);
4136 static bool trans_FCVTZU_ss(DisasContext
*s
, arg_rpr_esz
*a
)
4138 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzu_ss
);
4141 static bool trans_FCVTZS_sd(DisasContext
*s
, arg_rpr_esz
*a
)
4143 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzs_sd
);
4146 static bool trans_FCVTZU_sd(DisasContext
*s
, arg_rpr_esz
*a
)
4148 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzu_sd
);
4151 static bool trans_FCVTZS_ds(DisasContext
*s
, arg_rpr_esz
*a
)
4153 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzs_ds
);
4156 static bool trans_FCVTZU_ds(DisasContext
*s
, arg_rpr_esz
*a
)
4158 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzu_ds
);
4161 static bool trans_FCVTZS_dd(DisasContext
*s
, arg_rpr_esz
*a
)
4163 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzs_dd
);
4166 static bool trans_FCVTZU_dd(DisasContext
*s
, arg_rpr_esz
*a
)
4168 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzu_dd
);
4171 static gen_helper_gvec_3_ptr
* const frint_fns
[3] = {
4172 gen_helper_sve_frint_h
,
4173 gen_helper_sve_frint_s
,
4174 gen_helper_sve_frint_d
4177 static bool trans_FRINTI(DisasContext
*s
, arg_rpr_esz
*a
)
4182 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
== MO_16
,
4183 frint_fns
[a
->esz
- 1]);
4186 static bool trans_FRINTX(DisasContext
*s
, arg_rpr_esz
*a
)
4188 static gen_helper_gvec_3_ptr
* const fns
[3] = {
4189 gen_helper_sve_frintx_h
,
4190 gen_helper_sve_frintx_s
,
4191 gen_helper_sve_frintx_d
4196 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
== MO_16
, fns
[a
->esz
- 1]);
4199 static bool do_frint_mode(DisasContext
*s
, arg_rpr_esz
*a
, int mode
)
4204 if (sve_access_check(s
)) {
4205 unsigned vsz
= vec_full_reg_size(s
);
4206 TCGv_i32 tmode
= tcg_const_i32(mode
);
4207 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
4209 gen_helper_set_rmode(tmode
, tmode
, status
);
4211 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, a
->rd
),
4212 vec_full_reg_offset(s
, a
->rn
),
4213 pred_full_reg_offset(s
, a
->pg
),
4214 status
, vsz
, vsz
, 0, frint_fns
[a
->esz
- 1]);
4216 gen_helper_set_rmode(tmode
, tmode
, status
);
4217 tcg_temp_free_i32(tmode
);
4218 tcg_temp_free_ptr(status
);
4223 static bool trans_FRINTN(DisasContext
*s
, arg_rpr_esz
*a
)
4225 return do_frint_mode(s
, a
, float_round_nearest_even
);
4228 static bool trans_FRINTP(DisasContext
*s
, arg_rpr_esz
*a
)
4230 return do_frint_mode(s
, a
, float_round_up
);
4233 static bool trans_FRINTM(DisasContext
*s
, arg_rpr_esz
*a
)
4235 return do_frint_mode(s
, a
, float_round_down
);
4238 static bool trans_FRINTZ(DisasContext
*s
, arg_rpr_esz
*a
)
4240 return do_frint_mode(s
, a
, float_round_to_zero
);
4243 static bool trans_FRINTA(DisasContext
*s
, arg_rpr_esz
*a
)
4245 return do_frint_mode(s
, a
, float_round_ties_away
);
4248 static bool trans_FRECPX(DisasContext
*s
, arg_rpr_esz
*a
)
4250 static gen_helper_gvec_3_ptr
* const fns
[3] = {
4251 gen_helper_sve_frecpx_h
,
4252 gen_helper_sve_frecpx_s
,
4253 gen_helper_sve_frecpx_d
4258 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
== MO_16
, fns
[a
->esz
- 1]);
4261 static bool trans_FSQRT(DisasContext
*s
, arg_rpr_esz
*a
)
4263 static gen_helper_gvec_3_ptr
* const fns
[3] = {
4264 gen_helper_sve_fsqrt_h
,
4265 gen_helper_sve_fsqrt_s
,
4266 gen_helper_sve_fsqrt_d
4271 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
== MO_16
, fns
[a
->esz
- 1]);
4274 static bool trans_SCVTF_hh(DisasContext
*s
, arg_rpr_esz
*a
)
4276 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_scvt_hh
);
4279 static bool trans_SCVTF_sh(DisasContext
*s
, arg_rpr_esz
*a
)
4281 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_scvt_sh
);
4284 static bool trans_SCVTF_dh(DisasContext
*s
, arg_rpr_esz
*a
)
4286 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_scvt_dh
);
4289 static bool trans_SCVTF_ss(DisasContext
*s
, arg_rpr_esz
*a
)
4291 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_scvt_ss
);
4294 static bool trans_SCVTF_ds(DisasContext
*s
, arg_rpr_esz
*a
)
4296 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_scvt_ds
);
4299 static bool trans_SCVTF_sd(DisasContext
*s
, arg_rpr_esz
*a
)
4301 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_scvt_sd
);
4304 static bool trans_SCVTF_dd(DisasContext
*s
, arg_rpr_esz
*a
)
4306 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_scvt_dd
);
4309 static bool trans_UCVTF_hh(DisasContext
*s
, arg_rpr_esz
*a
)
4311 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_ucvt_hh
);
4314 static bool trans_UCVTF_sh(DisasContext
*s
, arg_rpr_esz
*a
)
4316 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_ucvt_sh
);
4319 static bool trans_UCVTF_dh(DisasContext
*s
, arg_rpr_esz
*a
)
4321 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_ucvt_dh
);
4324 static bool trans_UCVTF_ss(DisasContext
*s
, arg_rpr_esz
*a
)
4326 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_ucvt_ss
);
4329 static bool trans_UCVTF_ds(DisasContext
*s
, arg_rpr_esz
*a
)
4331 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_ucvt_ds
);
4334 static bool trans_UCVTF_sd(DisasContext
*s
, arg_rpr_esz
*a
)
4336 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_ucvt_sd
);
4339 static bool trans_UCVTF_dd(DisasContext
*s
, arg_rpr_esz
*a
)
4341 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_ucvt_dd
);
4345 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4348 /* Subroutine loading a vector register at VOFS of LEN bytes.
4349 * The load should begin at the address Rn + IMM.
4352 static void do_ldr(DisasContext
*s
, uint32_t vofs
, int len
, int rn
, int imm
)
4354 int len_align
= QEMU_ALIGN_DOWN(len
, 8);
4355 int len_remain
= len
% 8;
4356 int nparts
= len
/ 8 + ctpop8(len_remain
);
4357 int midx
= get_mem_index(s
);
4358 TCGv_i64 addr
, t0
, t1
;
4360 addr
= tcg_temp_new_i64();
4361 t0
= tcg_temp_new_i64();
4363 /* Note that unpredicated load/store of vector/predicate registers
4364 * are defined as a stream of bytes, which equates to little-endian
4365 * operations on larger quantities. There is no nice way to force
4366 * a little-endian load for aarch64_be-linux-user out of line.
4368 * Attempt to keep code expansion to a minimum by limiting the
4369 * amount of unrolling done.
4374 for (i
= 0; i
< len_align
; i
+= 8) {
4375 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ i
);
4376 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LEQ
);
4377 tcg_gen_st_i64(t0
, cpu_env
, vofs
+ i
);
4380 TCGLabel
*loop
= gen_new_label();
4381 TCGv_ptr tp
, i
= tcg_const_local_ptr(0);
4383 gen_set_label(loop
);
4385 /* Minimize the number of local temps that must be re-read from
4386 * the stack each iteration. Instead, re-compute values other
4387 * than the loop counter.
4389 tp
= tcg_temp_new_ptr();
4390 tcg_gen_addi_ptr(tp
, i
, imm
);
4391 tcg_gen_extu_ptr_i64(addr
, tp
);
4392 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, rn
));
4394 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LEQ
);
4396 tcg_gen_add_ptr(tp
, cpu_env
, i
);
4397 tcg_gen_addi_ptr(i
, i
, 8);
4398 tcg_gen_st_i64(t0
, tp
, vofs
);
4399 tcg_temp_free_ptr(tp
);
4401 tcg_gen_brcondi_ptr(TCG_COND_LTU
, i
, len_align
, loop
);
4402 tcg_temp_free_ptr(i
);
4405 /* Predicate register loads can be any multiple of 2.
4406 * Note that we still store the entire 64-bit unit into cpu_env.
4409 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ len_align
);
4411 switch (len_remain
) {
4415 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LE
| ctz32(len_remain
));
4419 t1
= tcg_temp_new_i64();
4420 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LEUL
);
4421 tcg_gen_addi_i64(addr
, addr
, 4);
4422 tcg_gen_qemu_ld_i64(t1
, addr
, midx
, MO_LEUW
);
4423 tcg_gen_deposit_i64(t0
, t0
, t1
, 32, 32);
4424 tcg_temp_free_i64(t1
);
4428 g_assert_not_reached();
4430 tcg_gen_st_i64(t0
, cpu_env
, vofs
+ len_align
);
4432 tcg_temp_free_i64(addr
);
4433 tcg_temp_free_i64(t0
);
4436 /* Similarly for stores. */
4437 static void do_str(DisasContext
*s
, uint32_t vofs
, int len
, int rn
, int imm
)
4439 int len_align
= QEMU_ALIGN_DOWN(len
, 8);
4440 int len_remain
= len
% 8;
4441 int nparts
= len
/ 8 + ctpop8(len_remain
);
4442 int midx
= get_mem_index(s
);
4445 addr
= tcg_temp_new_i64();
4446 t0
= tcg_temp_new_i64();
4448 /* Note that unpredicated load/store of vector/predicate registers
4449 * are defined as a stream of bytes, which equates to little-endian
4450 * operations on larger quantities. There is no nice way to force
4451 * a little-endian store for aarch64_be-linux-user out of line.
4453 * Attempt to keep code expansion to a minimum by limiting the
4454 * amount of unrolling done.
4459 for (i
= 0; i
< len_align
; i
+= 8) {
4460 tcg_gen_ld_i64(t0
, cpu_env
, vofs
+ i
);
4461 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ i
);
4462 tcg_gen_qemu_st_i64(t0
, addr
, midx
, MO_LEQ
);
4465 TCGLabel
*loop
= gen_new_label();
4466 TCGv_ptr t2
, i
= tcg_const_local_ptr(0);
4468 gen_set_label(loop
);
4470 t2
= tcg_temp_new_ptr();
4471 tcg_gen_add_ptr(t2
, cpu_env
, i
);
4472 tcg_gen_ld_i64(t0
, t2
, vofs
);
4474 /* Minimize the number of local temps that must be re-read from
4475 * the stack each iteration. Instead, re-compute values other
4476 * than the loop counter.
4478 tcg_gen_addi_ptr(t2
, i
, imm
);
4479 tcg_gen_extu_ptr_i64(addr
, t2
);
4480 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, rn
));
4481 tcg_temp_free_ptr(t2
);
4483 tcg_gen_qemu_st_i64(t0
, addr
, midx
, MO_LEQ
);
4485 tcg_gen_addi_ptr(i
, i
, 8);
4487 tcg_gen_brcondi_ptr(TCG_COND_LTU
, i
, len_align
, loop
);
4488 tcg_temp_free_ptr(i
);
4491 /* Predicate register stores can be any multiple of 2. */
4493 tcg_gen_ld_i64(t0
, cpu_env
, vofs
+ len_align
);
4494 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ len_align
);
4496 switch (len_remain
) {
4500 tcg_gen_qemu_st_i64(t0
, addr
, midx
, MO_LE
| ctz32(len_remain
));
4504 tcg_gen_qemu_st_i64(t0
, addr
, midx
, MO_LEUL
);
4505 tcg_gen_addi_i64(addr
, addr
, 4);
4506 tcg_gen_shri_i64(t0
, t0
, 32);
4507 tcg_gen_qemu_st_i64(t0
, addr
, midx
, MO_LEUW
);
4511 g_assert_not_reached();
4514 tcg_temp_free_i64(addr
);
4515 tcg_temp_free_i64(t0
);
4518 static bool trans_LDR_zri(DisasContext
*s
, arg_rri
*a
)
4520 if (sve_access_check(s
)) {
4521 int size
= vec_full_reg_size(s
);
4522 int off
= vec_full_reg_offset(s
, a
->rd
);
4523 do_ldr(s
, off
, size
, a
->rn
, a
->imm
* size
);
4528 static bool trans_LDR_pri(DisasContext
*s
, arg_rri
*a
)
4530 if (sve_access_check(s
)) {
4531 int size
= pred_full_reg_size(s
);
4532 int off
= pred_full_reg_offset(s
, a
->rd
);
4533 do_ldr(s
, off
, size
, a
->rn
, a
->imm
* size
);
4538 static bool trans_STR_zri(DisasContext
*s
, arg_rri
*a
)
4540 if (sve_access_check(s
)) {
4541 int size
= vec_full_reg_size(s
);
4542 int off
= vec_full_reg_offset(s
, a
->rd
);
4543 do_str(s
, off
, size
, a
->rn
, a
->imm
* size
);
4548 static bool trans_STR_pri(DisasContext
*s
, arg_rri
*a
)
4550 if (sve_access_check(s
)) {
4551 int size
= pred_full_reg_size(s
);
4552 int off
= pred_full_reg_offset(s
, a
->rd
);
4553 do_str(s
, off
, size
, a
->rn
, a
->imm
* size
);
4559 *** SVE Memory - Contiguous Load Group
4562 /* The memory mode of the dtype. */
4563 static const TCGMemOp dtype_mop
[16] = {
4564 MO_UB
, MO_UB
, MO_UB
, MO_UB
,
4565 MO_SL
, MO_UW
, MO_UW
, MO_UW
,
4566 MO_SW
, MO_SW
, MO_UL
, MO_UL
,
4567 MO_SB
, MO_SB
, MO_SB
, MO_Q
4570 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4572 /* The vector element size of dtype. */
4573 static const uint8_t dtype_esz
[16] = {
4580 static TCGMemOpIdx
sve_memopidx(DisasContext
*s
, int dtype
)
4582 return make_memop_idx(s
->be_data
| dtype_mop
[dtype
], get_mem_index(s
));
4585 static void do_mem_zpa(DisasContext
*s
, int zt
, int pg
, TCGv_i64 addr
,
4586 int dtype
, gen_helper_gvec_mem
*fn
)
4588 unsigned vsz
= vec_full_reg_size(s
);
4593 /* For e.g. LD4, there are not enough arguments to pass all 4
4594 * registers as pointers, so encode the regno into the data field.
4595 * For consistency, do this even for LD1.
4597 desc
= sve_memopidx(s
, dtype
);
4598 desc
|= zt
<< MEMOPIDX_SHIFT
;
4599 desc
= simd_desc(vsz
, vsz
, desc
);
4600 t_desc
= tcg_const_i32(desc
);
4601 t_pg
= tcg_temp_new_ptr();
4603 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
4604 fn(cpu_env
, t_pg
, addr
, t_desc
);
4606 tcg_temp_free_ptr(t_pg
);
4607 tcg_temp_free_i32(t_desc
);
4610 static void do_ld_zpa(DisasContext
*s
, int zt
, int pg
,
4611 TCGv_i64 addr
, int dtype
, int nreg
)
4613 static gen_helper_gvec_mem
* const fns
[2][16][4] = {
4615 { { gen_helper_sve_ld1bb_r
, gen_helper_sve_ld2bb_r
,
4616 gen_helper_sve_ld3bb_r
, gen_helper_sve_ld4bb_r
},
4617 { gen_helper_sve_ld1bhu_r
, NULL
, NULL
, NULL
},
4618 { gen_helper_sve_ld1bsu_r
, NULL
, NULL
, NULL
},
4619 { gen_helper_sve_ld1bdu_r
, NULL
, NULL
, NULL
},
4621 { gen_helper_sve_ld1sds_le_r
, NULL
, NULL
, NULL
},
4622 { gen_helper_sve_ld1hh_le_r
, gen_helper_sve_ld2hh_le_r
,
4623 gen_helper_sve_ld3hh_le_r
, gen_helper_sve_ld4hh_le_r
},
4624 { gen_helper_sve_ld1hsu_le_r
, NULL
, NULL
, NULL
},
4625 { gen_helper_sve_ld1hdu_le_r
, NULL
, NULL
, NULL
},
4627 { gen_helper_sve_ld1hds_le_r
, NULL
, NULL
, NULL
},
4628 { gen_helper_sve_ld1hss_le_r
, NULL
, NULL
, NULL
},
4629 { gen_helper_sve_ld1ss_le_r
, gen_helper_sve_ld2ss_le_r
,
4630 gen_helper_sve_ld3ss_le_r
, gen_helper_sve_ld4ss_le_r
},
4631 { gen_helper_sve_ld1sdu_le_r
, NULL
, NULL
, NULL
},
4633 { gen_helper_sve_ld1bds_r
, NULL
, NULL
, NULL
},
4634 { gen_helper_sve_ld1bss_r
, NULL
, NULL
, NULL
},
4635 { gen_helper_sve_ld1bhs_r
, NULL
, NULL
, NULL
},
4636 { gen_helper_sve_ld1dd_le_r
, gen_helper_sve_ld2dd_le_r
,
4637 gen_helper_sve_ld3dd_le_r
, gen_helper_sve_ld4dd_le_r
} },
4640 { { gen_helper_sve_ld1bb_r
, gen_helper_sve_ld2bb_r
,
4641 gen_helper_sve_ld3bb_r
, gen_helper_sve_ld4bb_r
},
4642 { gen_helper_sve_ld1bhu_r
, NULL
, NULL
, NULL
},
4643 { gen_helper_sve_ld1bsu_r
, NULL
, NULL
, NULL
},
4644 { gen_helper_sve_ld1bdu_r
, NULL
, NULL
, NULL
},
4646 { gen_helper_sve_ld1sds_be_r
, NULL
, NULL
, NULL
},
4647 { gen_helper_sve_ld1hh_be_r
, gen_helper_sve_ld2hh_be_r
,
4648 gen_helper_sve_ld3hh_be_r
, gen_helper_sve_ld4hh_be_r
},
4649 { gen_helper_sve_ld1hsu_be_r
, NULL
, NULL
, NULL
},
4650 { gen_helper_sve_ld1hdu_be_r
, NULL
, NULL
, NULL
},
4652 { gen_helper_sve_ld1hds_be_r
, NULL
, NULL
, NULL
},
4653 { gen_helper_sve_ld1hss_be_r
, NULL
, NULL
, NULL
},
4654 { gen_helper_sve_ld1ss_be_r
, gen_helper_sve_ld2ss_be_r
,
4655 gen_helper_sve_ld3ss_be_r
, gen_helper_sve_ld4ss_be_r
},
4656 { gen_helper_sve_ld1sdu_be_r
, NULL
, NULL
, NULL
},
4658 { gen_helper_sve_ld1bds_r
, NULL
, NULL
, NULL
},
4659 { gen_helper_sve_ld1bss_r
, NULL
, NULL
, NULL
},
4660 { gen_helper_sve_ld1bhs_r
, NULL
, NULL
, NULL
},
4661 { gen_helper_sve_ld1dd_be_r
, gen_helper_sve_ld2dd_be_r
,
4662 gen_helper_sve_ld3dd_be_r
, gen_helper_sve_ld4dd_be_r
} }
4664 gen_helper_gvec_mem
*fn
= fns
[s
->be_data
== MO_BE
][dtype
][nreg
];
4666 /* While there are holes in the table, they are not
4667 * accessible via the instruction encoding.
4670 do_mem_zpa(s
, zt
, pg
, addr
, dtype
, fn
);
4673 static bool trans_LD_zprr(DisasContext
*s
, arg_rprr_load
*a
)
4678 if (sve_access_check(s
)) {
4679 TCGv_i64 addr
= new_tmp_a64(s
);
4680 tcg_gen_shli_i64(addr
, cpu_reg(s
, a
->rm
), dtype_msz(a
->dtype
));
4681 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
4682 do_ld_zpa(s
, a
->rd
, a
->pg
, addr
, a
->dtype
, a
->nreg
);
4687 static bool trans_LD_zpri(DisasContext
*s
, arg_rpri_load
*a
)
4689 if (sve_access_check(s
)) {
4690 int vsz
= vec_full_reg_size(s
);
4691 int elements
= vsz
>> dtype_esz
[a
->dtype
];
4692 TCGv_i64 addr
= new_tmp_a64(s
);
4694 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
),
4695 (a
->imm
* elements
* (a
->nreg
+ 1))
4696 << dtype_msz(a
->dtype
));
4697 do_ld_zpa(s
, a
->rd
, a
->pg
, addr
, a
->dtype
, a
->nreg
);
4702 static bool trans_LDFF1_zprr(DisasContext
*s
, arg_rprr_load
*a
)
4704 static gen_helper_gvec_mem
* const fns
[2][16] = {
4706 { gen_helper_sve_ldff1bb_r
,
4707 gen_helper_sve_ldff1bhu_r
,
4708 gen_helper_sve_ldff1bsu_r
,
4709 gen_helper_sve_ldff1bdu_r
,
4711 gen_helper_sve_ldff1sds_le_r
,
4712 gen_helper_sve_ldff1hh_le_r
,
4713 gen_helper_sve_ldff1hsu_le_r
,
4714 gen_helper_sve_ldff1hdu_le_r
,
4716 gen_helper_sve_ldff1hds_le_r
,
4717 gen_helper_sve_ldff1hss_le_r
,
4718 gen_helper_sve_ldff1ss_le_r
,
4719 gen_helper_sve_ldff1sdu_le_r
,
4721 gen_helper_sve_ldff1bds_r
,
4722 gen_helper_sve_ldff1bss_r
,
4723 gen_helper_sve_ldff1bhs_r
,
4724 gen_helper_sve_ldff1dd_le_r
},
4727 { gen_helper_sve_ldff1bb_r
,
4728 gen_helper_sve_ldff1bhu_r
,
4729 gen_helper_sve_ldff1bsu_r
,
4730 gen_helper_sve_ldff1bdu_r
,
4732 gen_helper_sve_ldff1sds_be_r
,
4733 gen_helper_sve_ldff1hh_be_r
,
4734 gen_helper_sve_ldff1hsu_be_r
,
4735 gen_helper_sve_ldff1hdu_be_r
,
4737 gen_helper_sve_ldff1hds_be_r
,
4738 gen_helper_sve_ldff1hss_be_r
,
4739 gen_helper_sve_ldff1ss_be_r
,
4740 gen_helper_sve_ldff1sdu_be_r
,
4742 gen_helper_sve_ldff1bds_r
,
4743 gen_helper_sve_ldff1bss_r
,
4744 gen_helper_sve_ldff1bhs_r
,
4745 gen_helper_sve_ldff1dd_be_r
},
4748 if (sve_access_check(s
)) {
4749 TCGv_i64 addr
= new_tmp_a64(s
);
4750 tcg_gen_shli_i64(addr
, cpu_reg(s
, a
->rm
), dtype_msz(a
->dtype
));
4751 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
4752 do_mem_zpa(s
, a
->rd
, a
->pg
, addr
, a
->dtype
,
4753 fns
[s
->be_data
== MO_BE
][a
->dtype
]);
4758 static bool trans_LDNF1_zpri(DisasContext
*s
, arg_rpri_load
*a
)
4760 static gen_helper_gvec_mem
* const fns
[2][16] = {
4762 { gen_helper_sve_ldnf1bb_r
,
4763 gen_helper_sve_ldnf1bhu_r
,
4764 gen_helper_sve_ldnf1bsu_r
,
4765 gen_helper_sve_ldnf1bdu_r
,
4767 gen_helper_sve_ldnf1sds_le_r
,
4768 gen_helper_sve_ldnf1hh_le_r
,
4769 gen_helper_sve_ldnf1hsu_le_r
,
4770 gen_helper_sve_ldnf1hdu_le_r
,
4772 gen_helper_sve_ldnf1hds_le_r
,
4773 gen_helper_sve_ldnf1hss_le_r
,
4774 gen_helper_sve_ldnf1ss_le_r
,
4775 gen_helper_sve_ldnf1sdu_le_r
,
4777 gen_helper_sve_ldnf1bds_r
,
4778 gen_helper_sve_ldnf1bss_r
,
4779 gen_helper_sve_ldnf1bhs_r
,
4780 gen_helper_sve_ldnf1dd_le_r
},
4783 { gen_helper_sve_ldnf1bb_r
,
4784 gen_helper_sve_ldnf1bhu_r
,
4785 gen_helper_sve_ldnf1bsu_r
,
4786 gen_helper_sve_ldnf1bdu_r
,
4788 gen_helper_sve_ldnf1sds_be_r
,
4789 gen_helper_sve_ldnf1hh_be_r
,
4790 gen_helper_sve_ldnf1hsu_be_r
,
4791 gen_helper_sve_ldnf1hdu_be_r
,
4793 gen_helper_sve_ldnf1hds_be_r
,
4794 gen_helper_sve_ldnf1hss_be_r
,
4795 gen_helper_sve_ldnf1ss_be_r
,
4796 gen_helper_sve_ldnf1sdu_be_r
,
4798 gen_helper_sve_ldnf1bds_r
,
4799 gen_helper_sve_ldnf1bss_r
,
4800 gen_helper_sve_ldnf1bhs_r
,
4801 gen_helper_sve_ldnf1dd_be_r
},
4804 if (sve_access_check(s
)) {
4805 int vsz
= vec_full_reg_size(s
);
4806 int elements
= vsz
>> dtype_esz
[a
->dtype
];
4807 int off
= (a
->imm
* elements
) << dtype_msz(a
->dtype
);
4808 TCGv_i64 addr
= new_tmp_a64(s
);
4810 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
), off
);
4811 do_mem_zpa(s
, a
->rd
, a
->pg
, addr
, a
->dtype
,
4812 fns
[s
->be_data
== MO_BE
][a
->dtype
]);
4817 static void do_ldrq(DisasContext
*s
, int zt
, int pg
, TCGv_i64 addr
, int msz
)
4819 static gen_helper_gvec_mem
* const fns
[2][4] = {
4820 { gen_helper_sve_ld1bb_r
, gen_helper_sve_ld1hh_le_r
,
4821 gen_helper_sve_ld1ss_le_r
, gen_helper_sve_ld1dd_le_r
},
4822 { gen_helper_sve_ld1bb_r
, gen_helper_sve_ld1hh_be_r
,
4823 gen_helper_sve_ld1ss_be_r
, gen_helper_sve_ld1dd_be_r
},
4825 unsigned vsz
= vec_full_reg_size(s
);
4830 /* Load the first quadword using the normal predicated load helpers. */
4831 desc
= sve_memopidx(s
, msz_dtype(msz
));
4832 desc
|= zt
<< MEMOPIDX_SHIFT
;
4833 desc
= simd_desc(16, 16, desc
);
4834 t_desc
= tcg_const_i32(desc
);
4836 poff
= pred_full_reg_offset(s
, pg
);
4839 * Zero-extend the first 16 bits of the predicate into a temporary.
4840 * This avoids triggering an assert making sure we don't have bits
4841 * set within a predicate beyond VQ, but we have lowered VQ to 1
4842 * for this load operation.
4844 TCGv_i64 tmp
= tcg_temp_new_i64();
4845 #ifdef HOST_WORDS_BIGENDIAN
4848 tcg_gen_ld16u_i64(tmp
, cpu_env
, poff
);
4850 poff
= offsetof(CPUARMState
, vfp
.preg_tmp
);
4851 tcg_gen_st_i64(tmp
, cpu_env
, poff
);
4852 tcg_temp_free_i64(tmp
);
4855 t_pg
= tcg_temp_new_ptr();
4856 tcg_gen_addi_ptr(t_pg
, cpu_env
, poff
);
4858 fns
[s
->be_data
== MO_BE
][msz
](cpu_env
, t_pg
, addr
, t_desc
);
4860 tcg_temp_free_ptr(t_pg
);
4861 tcg_temp_free_i32(t_desc
);
4863 /* Replicate that first quadword. */
4865 unsigned dofs
= vec_full_reg_offset(s
, zt
);
4866 tcg_gen_gvec_dup_mem(4, dofs
+ 16, dofs
, vsz
- 16, vsz
- 16);
4870 static bool trans_LD1RQ_zprr(DisasContext
*s
, arg_rprr_load
*a
)
4875 if (sve_access_check(s
)) {
4876 int msz
= dtype_msz(a
->dtype
);
4877 TCGv_i64 addr
= new_tmp_a64(s
);
4878 tcg_gen_shli_i64(addr
, cpu_reg(s
, a
->rm
), msz
);
4879 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
4880 do_ldrq(s
, a
->rd
, a
->pg
, addr
, msz
);
4885 static bool trans_LD1RQ_zpri(DisasContext
*s
, arg_rpri_load
*a
)
4887 if (sve_access_check(s
)) {
4888 TCGv_i64 addr
= new_tmp_a64(s
);
4889 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
), a
->imm
* 16);
4890 do_ldrq(s
, a
->rd
, a
->pg
, addr
, dtype_msz(a
->dtype
));
4895 /* Load and broadcast element. */
4896 static bool trans_LD1R_zpri(DisasContext
*s
, arg_rpri_load
*a
)
4898 if (!sve_access_check(s
)) {
4902 unsigned vsz
= vec_full_reg_size(s
);
4903 unsigned psz
= pred_full_reg_size(s
);
4904 unsigned esz
= dtype_esz
[a
->dtype
];
4905 unsigned msz
= dtype_msz(a
->dtype
);
4906 TCGLabel
*over
= gen_new_label();
4909 /* If the guarding predicate has no bits set, no load occurs. */
4911 /* Reduce the pred_esz_masks value simply to reduce the
4912 * size of the code generated here.
4914 uint64_t psz_mask
= MAKE_64BIT_MASK(0, psz
* 8);
4915 temp
= tcg_temp_new_i64();
4916 tcg_gen_ld_i64(temp
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
4917 tcg_gen_andi_i64(temp
, temp
, pred_esz_masks
[esz
] & psz_mask
);
4918 tcg_gen_brcondi_i64(TCG_COND_EQ
, temp
, 0, over
);
4919 tcg_temp_free_i64(temp
);
4921 TCGv_i32 t32
= tcg_temp_new_i32();
4922 find_last_active(s
, t32
, esz
, a
->pg
);
4923 tcg_gen_brcondi_i32(TCG_COND_LT
, t32
, 0, over
);
4924 tcg_temp_free_i32(t32
);
4927 /* Load the data. */
4928 temp
= tcg_temp_new_i64();
4929 tcg_gen_addi_i64(temp
, cpu_reg_sp(s
, a
->rn
), a
->imm
<< msz
);
4930 tcg_gen_qemu_ld_i64(temp
, temp
, get_mem_index(s
),
4931 s
->be_data
| dtype_mop
[a
->dtype
]);
4933 /* Broadcast to *all* elements. */
4934 tcg_gen_gvec_dup_i64(esz
, vec_full_reg_offset(s
, a
->rd
),
4936 tcg_temp_free_i64(temp
);
4938 /* Zero the inactive elements. */
4939 gen_set_label(over
);
4940 do_movz_zpz(s
, a
->rd
, a
->rd
, a
->pg
, esz
);
4944 static void do_st_zpa(DisasContext
*s
, int zt
, int pg
, TCGv_i64 addr
,
4945 int msz
, int esz
, int nreg
)
4947 static gen_helper_gvec_mem
* const fn_single
[2][4][4] = {
4948 { { gen_helper_sve_st1bb_r
,
4949 gen_helper_sve_st1bh_r
,
4950 gen_helper_sve_st1bs_r
,
4951 gen_helper_sve_st1bd_r
},
4953 gen_helper_sve_st1hh_le_r
,
4954 gen_helper_sve_st1hs_le_r
,
4955 gen_helper_sve_st1hd_le_r
},
4957 gen_helper_sve_st1ss_le_r
,
4958 gen_helper_sve_st1sd_le_r
},
4960 gen_helper_sve_st1dd_le_r
} },
4961 { { gen_helper_sve_st1bb_r
,
4962 gen_helper_sve_st1bh_r
,
4963 gen_helper_sve_st1bs_r
,
4964 gen_helper_sve_st1bd_r
},
4966 gen_helper_sve_st1hh_be_r
,
4967 gen_helper_sve_st1hs_be_r
,
4968 gen_helper_sve_st1hd_be_r
},
4970 gen_helper_sve_st1ss_be_r
,
4971 gen_helper_sve_st1sd_be_r
},
4973 gen_helper_sve_st1dd_be_r
} },
4975 static gen_helper_gvec_mem
* const fn_multiple
[2][3][4] = {
4976 { { gen_helper_sve_st2bb_r
,
4977 gen_helper_sve_st2hh_le_r
,
4978 gen_helper_sve_st2ss_le_r
,
4979 gen_helper_sve_st2dd_le_r
},
4980 { gen_helper_sve_st3bb_r
,
4981 gen_helper_sve_st3hh_le_r
,
4982 gen_helper_sve_st3ss_le_r
,
4983 gen_helper_sve_st3dd_le_r
},
4984 { gen_helper_sve_st4bb_r
,
4985 gen_helper_sve_st4hh_le_r
,
4986 gen_helper_sve_st4ss_le_r
,
4987 gen_helper_sve_st4dd_le_r
} },
4988 { { gen_helper_sve_st2bb_r
,
4989 gen_helper_sve_st2hh_be_r
,
4990 gen_helper_sve_st2ss_be_r
,
4991 gen_helper_sve_st2dd_be_r
},
4992 { gen_helper_sve_st3bb_r
,
4993 gen_helper_sve_st3hh_be_r
,
4994 gen_helper_sve_st3ss_be_r
,
4995 gen_helper_sve_st3dd_be_r
},
4996 { gen_helper_sve_st4bb_r
,
4997 gen_helper_sve_st4hh_be_r
,
4998 gen_helper_sve_st4ss_be_r
,
4999 gen_helper_sve_st4dd_be_r
} },
5001 gen_helper_gvec_mem
*fn
;
5002 int be
= s
->be_data
== MO_BE
;
5006 fn
= fn_single
[be
][msz
][esz
];
5008 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5010 fn
= fn_multiple
[be
][nreg
- 1][msz
];
5013 do_mem_zpa(s
, zt
, pg
, addr
, msz_dtype(msz
), fn
);
5016 static bool trans_ST_zprr(DisasContext
*s
, arg_rprr_store
*a
)
5018 if (a
->rm
== 31 || a
->msz
> a
->esz
) {
5021 if (sve_access_check(s
)) {
5022 TCGv_i64 addr
= new_tmp_a64(s
);
5023 tcg_gen_shli_i64(addr
, cpu_reg(s
, a
->rm
), a
->msz
);
5024 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
5025 do_st_zpa(s
, a
->rd
, a
->pg
, addr
, a
->msz
, a
->esz
, a
->nreg
);
5030 static bool trans_ST_zpri(DisasContext
*s
, arg_rpri_store
*a
)
5032 if (a
->msz
> a
->esz
) {
5035 if (sve_access_check(s
)) {
5036 int vsz
= vec_full_reg_size(s
);
5037 int elements
= vsz
>> a
->esz
;
5038 TCGv_i64 addr
= new_tmp_a64(s
);
5040 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
),
5041 (a
->imm
* elements
* (a
->nreg
+ 1)) << a
->msz
);
5042 do_st_zpa(s
, a
->rd
, a
->pg
, addr
, a
->msz
, a
->esz
, a
->nreg
);
5048 *** SVE gather loads / scatter stores
5051 static void do_mem_zpz(DisasContext
*s
, int zt
, int pg
, int zm
,
5052 int scale
, TCGv_i64 scalar
, int msz
,
5053 gen_helper_gvec_mem_scatter
*fn
)
5055 unsigned vsz
= vec_full_reg_size(s
);
5056 TCGv_ptr t_zm
= tcg_temp_new_ptr();
5057 TCGv_ptr t_pg
= tcg_temp_new_ptr();
5058 TCGv_ptr t_zt
= tcg_temp_new_ptr();
5062 desc
= sve_memopidx(s
, msz_dtype(msz
));
5063 desc
|= scale
<< MEMOPIDX_SHIFT
;
5064 desc
= simd_desc(vsz
, vsz
, desc
);
5065 t_desc
= tcg_const_i32(desc
);
5067 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
5068 tcg_gen_addi_ptr(t_zm
, cpu_env
, vec_full_reg_offset(s
, zm
));
5069 tcg_gen_addi_ptr(t_zt
, cpu_env
, vec_full_reg_offset(s
, zt
));
5070 fn(cpu_env
, t_zt
, t_pg
, t_zm
, scalar
, t_desc
);
5072 tcg_temp_free_ptr(t_zt
);
5073 tcg_temp_free_ptr(t_zm
);
5074 tcg_temp_free_ptr(t_pg
);
5075 tcg_temp_free_i32(t_desc
);
5078 /* Indexed by [be][ff][xs][u][msz]. */
5079 static gen_helper_gvec_mem_scatter
* const gather_load_fn32
[2][2][2][2][3] = {
5081 { { { { gen_helper_sve_ldbss_zsu
,
5082 gen_helper_sve_ldhss_le_zsu
,
5084 { gen_helper_sve_ldbsu_zsu
,
5085 gen_helper_sve_ldhsu_le_zsu
,
5086 gen_helper_sve_ldss_le_zsu
, } },
5087 { { gen_helper_sve_ldbss_zss
,
5088 gen_helper_sve_ldhss_le_zss
,
5090 { gen_helper_sve_ldbsu_zss
,
5091 gen_helper_sve_ldhsu_le_zss
,
5092 gen_helper_sve_ldss_le_zss
, } } },
5095 { { { gen_helper_sve_ldffbss_zsu
,
5096 gen_helper_sve_ldffhss_le_zsu
,
5098 { gen_helper_sve_ldffbsu_zsu
,
5099 gen_helper_sve_ldffhsu_le_zsu
,
5100 gen_helper_sve_ldffss_le_zsu
, } },
5101 { { gen_helper_sve_ldffbss_zss
,
5102 gen_helper_sve_ldffhss_le_zss
,
5104 { gen_helper_sve_ldffbsu_zss
,
5105 gen_helper_sve_ldffhsu_le_zss
,
5106 gen_helper_sve_ldffss_le_zss
, } } } },
5109 { { { { gen_helper_sve_ldbss_zsu
,
5110 gen_helper_sve_ldhss_be_zsu
,
5112 { gen_helper_sve_ldbsu_zsu
,
5113 gen_helper_sve_ldhsu_be_zsu
,
5114 gen_helper_sve_ldss_be_zsu
, } },
5115 { { gen_helper_sve_ldbss_zss
,
5116 gen_helper_sve_ldhss_be_zss
,
5118 { gen_helper_sve_ldbsu_zss
,
5119 gen_helper_sve_ldhsu_be_zss
,
5120 gen_helper_sve_ldss_be_zss
, } } },
5123 { { { gen_helper_sve_ldffbss_zsu
,
5124 gen_helper_sve_ldffhss_be_zsu
,
5126 { gen_helper_sve_ldffbsu_zsu
,
5127 gen_helper_sve_ldffhsu_be_zsu
,
5128 gen_helper_sve_ldffss_be_zsu
, } },
5129 { { gen_helper_sve_ldffbss_zss
,
5130 gen_helper_sve_ldffhss_be_zss
,
5132 { gen_helper_sve_ldffbsu_zss
,
5133 gen_helper_sve_ldffhsu_be_zss
,
5134 gen_helper_sve_ldffss_be_zss
, } } } },
5137 /* Note that we overload xs=2 to indicate 64-bit offset. */
5138 static gen_helper_gvec_mem_scatter
* const gather_load_fn64
[2][2][3][2][4] = {
5140 { { { { gen_helper_sve_ldbds_zsu
,
5141 gen_helper_sve_ldhds_le_zsu
,
5142 gen_helper_sve_ldsds_le_zsu
,
5144 { gen_helper_sve_ldbdu_zsu
,
5145 gen_helper_sve_ldhdu_le_zsu
,
5146 gen_helper_sve_ldsdu_le_zsu
,
5147 gen_helper_sve_lddd_le_zsu
, } },
5148 { { gen_helper_sve_ldbds_zss
,
5149 gen_helper_sve_ldhds_le_zss
,
5150 gen_helper_sve_ldsds_le_zss
,
5152 { gen_helper_sve_ldbdu_zss
,
5153 gen_helper_sve_ldhdu_le_zss
,
5154 gen_helper_sve_ldsdu_le_zss
,
5155 gen_helper_sve_lddd_le_zss
, } },
5156 { { gen_helper_sve_ldbds_zd
,
5157 gen_helper_sve_ldhds_le_zd
,
5158 gen_helper_sve_ldsds_le_zd
,
5160 { gen_helper_sve_ldbdu_zd
,
5161 gen_helper_sve_ldhdu_le_zd
,
5162 gen_helper_sve_ldsdu_le_zd
,
5163 gen_helper_sve_lddd_le_zd
, } } },
5166 { { { gen_helper_sve_ldffbds_zsu
,
5167 gen_helper_sve_ldffhds_le_zsu
,
5168 gen_helper_sve_ldffsds_le_zsu
,
5170 { gen_helper_sve_ldffbdu_zsu
,
5171 gen_helper_sve_ldffhdu_le_zsu
,
5172 gen_helper_sve_ldffsdu_le_zsu
,
5173 gen_helper_sve_ldffdd_le_zsu
, } },
5174 { { gen_helper_sve_ldffbds_zss
,
5175 gen_helper_sve_ldffhds_le_zss
,
5176 gen_helper_sve_ldffsds_le_zss
,
5178 { gen_helper_sve_ldffbdu_zss
,
5179 gen_helper_sve_ldffhdu_le_zss
,
5180 gen_helper_sve_ldffsdu_le_zss
,
5181 gen_helper_sve_ldffdd_le_zss
, } },
5182 { { gen_helper_sve_ldffbds_zd
,
5183 gen_helper_sve_ldffhds_le_zd
,
5184 gen_helper_sve_ldffsds_le_zd
,
5186 { gen_helper_sve_ldffbdu_zd
,
5187 gen_helper_sve_ldffhdu_le_zd
,
5188 gen_helper_sve_ldffsdu_le_zd
,
5189 gen_helper_sve_ldffdd_le_zd
, } } } },
5192 { { { { gen_helper_sve_ldbds_zsu
,
5193 gen_helper_sve_ldhds_be_zsu
,
5194 gen_helper_sve_ldsds_be_zsu
,
5196 { gen_helper_sve_ldbdu_zsu
,
5197 gen_helper_sve_ldhdu_be_zsu
,
5198 gen_helper_sve_ldsdu_be_zsu
,
5199 gen_helper_sve_lddd_be_zsu
, } },
5200 { { gen_helper_sve_ldbds_zss
,
5201 gen_helper_sve_ldhds_be_zss
,
5202 gen_helper_sve_ldsds_be_zss
,
5204 { gen_helper_sve_ldbdu_zss
,
5205 gen_helper_sve_ldhdu_be_zss
,
5206 gen_helper_sve_ldsdu_be_zss
,
5207 gen_helper_sve_lddd_be_zss
, } },
5208 { { gen_helper_sve_ldbds_zd
,
5209 gen_helper_sve_ldhds_be_zd
,
5210 gen_helper_sve_ldsds_be_zd
,
5212 { gen_helper_sve_ldbdu_zd
,
5213 gen_helper_sve_ldhdu_be_zd
,
5214 gen_helper_sve_ldsdu_be_zd
,
5215 gen_helper_sve_lddd_be_zd
, } } },
5218 { { { gen_helper_sve_ldffbds_zsu
,
5219 gen_helper_sve_ldffhds_be_zsu
,
5220 gen_helper_sve_ldffsds_be_zsu
,
5222 { gen_helper_sve_ldffbdu_zsu
,
5223 gen_helper_sve_ldffhdu_be_zsu
,
5224 gen_helper_sve_ldffsdu_be_zsu
,
5225 gen_helper_sve_ldffdd_be_zsu
, } },
5226 { { gen_helper_sve_ldffbds_zss
,
5227 gen_helper_sve_ldffhds_be_zss
,
5228 gen_helper_sve_ldffsds_be_zss
,
5230 { gen_helper_sve_ldffbdu_zss
,
5231 gen_helper_sve_ldffhdu_be_zss
,
5232 gen_helper_sve_ldffsdu_be_zss
,
5233 gen_helper_sve_ldffdd_be_zss
, } },
5234 { { gen_helper_sve_ldffbds_zd
,
5235 gen_helper_sve_ldffhds_be_zd
,
5236 gen_helper_sve_ldffsds_be_zd
,
5238 { gen_helper_sve_ldffbdu_zd
,
5239 gen_helper_sve_ldffhdu_be_zd
,
5240 gen_helper_sve_ldffsdu_be_zd
,
5241 gen_helper_sve_ldffdd_be_zd
, } } } },
5244 static bool trans_LD1_zprz(DisasContext
*s
, arg_LD1_zprz
*a
)
5246 gen_helper_gvec_mem_scatter
*fn
= NULL
;
5247 int be
= s
->be_data
== MO_BE
;
5249 if (!sve_access_check(s
)) {
5255 fn
= gather_load_fn32
[be
][a
->ff
][a
->xs
][a
->u
][a
->msz
];
5258 fn
= gather_load_fn64
[be
][a
->ff
][a
->xs
][a
->u
][a
->msz
];
5263 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rm
, a
->scale
* a
->msz
,
5264 cpu_reg_sp(s
, a
->rn
), a
->msz
, fn
);
5268 static bool trans_LD1_zpiz(DisasContext
*s
, arg_LD1_zpiz
*a
)
5270 gen_helper_gvec_mem_scatter
*fn
= NULL
;
5271 int be
= s
->be_data
== MO_BE
;
5274 if (a
->esz
< a
->msz
|| (a
->esz
== a
->msz
&& !a
->u
)) {
5277 if (!sve_access_check(s
)) {
5283 fn
= gather_load_fn32
[be
][a
->ff
][0][a
->u
][a
->msz
];
5286 fn
= gather_load_fn64
[be
][a
->ff
][2][a
->u
][a
->msz
];
5291 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5292 * by loading the immediate into the scalar parameter.
5294 imm
= tcg_const_i64(a
->imm
<< a
->msz
);
5295 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rn
, 0, imm
, a
->msz
, fn
);
5296 tcg_temp_free_i64(imm
);
5300 /* Indexed by [be][xs][msz]. */
5301 static gen_helper_gvec_mem_scatter
* const scatter_store_fn32
[2][2][3] = {
5303 { { gen_helper_sve_stbs_zsu
,
5304 gen_helper_sve_sths_le_zsu
,
5305 gen_helper_sve_stss_le_zsu
, },
5306 { gen_helper_sve_stbs_zss
,
5307 gen_helper_sve_sths_le_zss
,
5308 gen_helper_sve_stss_le_zss
, } },
5310 { { gen_helper_sve_stbs_zsu
,
5311 gen_helper_sve_sths_be_zsu
,
5312 gen_helper_sve_stss_be_zsu
, },
5313 { gen_helper_sve_stbs_zss
,
5314 gen_helper_sve_sths_be_zss
,
5315 gen_helper_sve_stss_be_zss
, } },
5318 /* Note that we overload xs=2 to indicate 64-bit offset. */
5319 static gen_helper_gvec_mem_scatter
* const scatter_store_fn64
[2][3][4] = {
5321 { { gen_helper_sve_stbd_zsu
,
5322 gen_helper_sve_sthd_le_zsu
,
5323 gen_helper_sve_stsd_le_zsu
,
5324 gen_helper_sve_stdd_le_zsu
, },
5325 { gen_helper_sve_stbd_zss
,
5326 gen_helper_sve_sthd_le_zss
,
5327 gen_helper_sve_stsd_le_zss
,
5328 gen_helper_sve_stdd_le_zss
, },
5329 { gen_helper_sve_stbd_zd
,
5330 gen_helper_sve_sthd_le_zd
,
5331 gen_helper_sve_stsd_le_zd
,
5332 gen_helper_sve_stdd_le_zd
, } },
5334 { { gen_helper_sve_stbd_zsu
,
5335 gen_helper_sve_sthd_be_zsu
,
5336 gen_helper_sve_stsd_be_zsu
,
5337 gen_helper_sve_stdd_be_zsu
, },
5338 { gen_helper_sve_stbd_zss
,
5339 gen_helper_sve_sthd_be_zss
,
5340 gen_helper_sve_stsd_be_zss
,
5341 gen_helper_sve_stdd_be_zss
, },
5342 { gen_helper_sve_stbd_zd
,
5343 gen_helper_sve_sthd_be_zd
,
5344 gen_helper_sve_stsd_be_zd
,
5345 gen_helper_sve_stdd_be_zd
, } },
5348 static bool trans_ST1_zprz(DisasContext
*s
, arg_ST1_zprz
*a
)
5350 gen_helper_gvec_mem_scatter
*fn
;
5351 int be
= s
->be_data
== MO_BE
;
5353 if (a
->esz
< a
->msz
|| (a
->msz
== 0 && a
->scale
)) {
5356 if (!sve_access_check(s
)) {
5361 fn
= scatter_store_fn32
[be
][a
->xs
][a
->msz
];
5364 fn
= scatter_store_fn64
[be
][a
->xs
][a
->msz
];
5367 g_assert_not_reached();
5369 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rm
, a
->scale
* a
->msz
,
5370 cpu_reg_sp(s
, a
->rn
), a
->msz
, fn
);
5374 static bool trans_ST1_zpiz(DisasContext
*s
, arg_ST1_zpiz
*a
)
5376 gen_helper_gvec_mem_scatter
*fn
= NULL
;
5377 int be
= s
->be_data
== MO_BE
;
5380 if (a
->esz
< a
->msz
) {
5383 if (!sve_access_check(s
)) {
5389 fn
= scatter_store_fn32
[be
][0][a
->msz
];
5392 fn
= scatter_store_fn64
[be
][2][a
->msz
];
5397 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5398 * by loading the immediate into the scalar parameter.
5400 imm
= tcg_const_i64(a
->imm
<< a
->msz
);
5401 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rn
, 0, imm
, a
->msz
, fn
);
5402 tcg_temp_free_i64(imm
);
5410 static bool trans_PRF(DisasContext
*s
, arg_PRF
*a
)
5412 /* Prefetch is a nop within QEMU. */
5413 (void)sve_access_check(s
);
5417 static bool trans_PRF_rr(DisasContext
*s
, arg_PRF_rr
*a
)
5422 /* Prefetch is a nop within QEMU. */
5423 (void)sve_access_check(s
);
5430 * TODO: The implementation so far could handle predicated merging movprfx.
5431 * The helper functions as written take an extra source register to
5432 * use in the operation, but the result is only written when predication
5433 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
5434 * to allow the final write back to the destination to be unconditional.
5435 * For predicated zeroing movprfx, we need to rearrange the helpers to
5436 * allow the final write back to zero inactives.
5438 * In the meantime, just emit the moves.
5441 static bool trans_MOVPRFX(DisasContext
*s
, arg_MOVPRFX
*a
)
5443 return do_mov_z(s
, a
->rd
, a
->rn
);
5446 static bool trans_MOVPRFX_m(DisasContext
*s
, arg_rpr_esz
*a
)
5448 if (sve_access_check(s
)) {
5449 do_sel_z(s
, a
->rd
, a
->rn
, a
->rd
, a
->pg
, a
->esz
);
5454 static bool trans_MOVPRFX_z(DisasContext
*s
, arg_rpr_esz
*a
)
5456 if (sve_access_check(s
)) {
5457 do_movz_zpz(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
);