2 * AArch64 SVE translation
4 * Copyright (c) 2018 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
22 #include "exec/exec-all.h"
23 #include "tcg/tcg-op.h"
24 #include "tcg/tcg-op-gvec.h"
25 #include "tcg/tcg-gvec-desc.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
35 #include "fpu/softfloat.h"
38 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
39 TCGv_i64
, uint32_t, uint32_t);
41 typedef void gen_helper_gvec_flags_3(TCGv_i32
, TCGv_ptr
, TCGv_ptr
,
43 typedef void gen_helper_gvec_flags_4(TCGv_i32
, TCGv_ptr
, TCGv_ptr
,
44 TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
46 typedef void gen_helper_gvec_mem(TCGv_env
, TCGv_ptr
, TCGv_i64
, TCGv_i32
);
47 typedef void gen_helper_gvec_mem_scatter(TCGv_env
, TCGv_ptr
, TCGv_ptr
,
48 TCGv_ptr
, TCGv_i64
, TCGv_i32
);
51 * Helpers for extracting complex instruction fields.
54 /* See e.g. ASR (immediate, predicated).
55 * Returns -1 for unallocated encoding; diagnose later.
57 static int tszimm_esz(DisasContext
*s
, int x
)
59 x
>>= 3; /* discard imm3 */
63 static int tszimm_shr(DisasContext
*s
, int x
)
65 return (16 << tszimm_esz(s
, x
)) - x
;
68 /* See e.g. LSL (immediate, predicated). */
69 static int tszimm_shl(DisasContext
*s
, int x
)
71 return x
- (8 << tszimm_esz(s
, x
));
74 static inline int plus1(DisasContext
*s
, int x
)
79 /* The SH bit is in bit 8. Extract the low 8 and shift. */
80 static inline int expand_imm_sh8s(DisasContext
*s
, int x
)
82 return (int8_t)x
<< (x
& 0x100 ? 8 : 0);
85 static inline int expand_imm_sh8u(DisasContext
*s
, int x
)
87 return (uint8_t)x
<< (x
& 0x100 ? 8 : 0);
90 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
91 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
93 static inline int msz_dtype(DisasContext
*s
, int msz
)
95 static const uint8_t dtype
[4] = { 0, 5, 10, 15 };
100 * Include the generated decoder.
103 #include "decode-sve.inc.c"
106 * Implement all of the translator functions referenced by the decoder.
109 /* Return the offset info CPUARMState of the predicate vector register Pn.
110 * Note for this purpose, FFR is P16.
112 static inline int pred_full_reg_offset(DisasContext
*s
, int regno
)
114 return offsetof(CPUARMState
, vfp
.pregs
[regno
]);
117 /* Return the byte size of the whole predicate register, VL / 64. */
118 static inline int pred_full_reg_size(DisasContext
*s
)
120 return s
->sve_len
>> 3;
123 /* Round up the size of a register to a size allowed by
124 * the tcg vector infrastructure. Any operation which uses this
125 * size may assume that the bits above pred_full_reg_size are zero,
126 * and must leave them the same way.
128 * Note that this is not needed for the vector registers as they
129 * are always properly sized for tcg vectors.
131 static int size_for_gvec(int size
)
136 return QEMU_ALIGN_UP(size
, 16);
140 static int pred_gvec_reg_size(DisasContext
*s
)
142 return size_for_gvec(pred_full_reg_size(s
));
145 /* Invoke a vector expander on two Zregs. */
146 static bool do_vector2_z(DisasContext
*s
, GVecGen2Fn
*gvec_fn
,
147 int esz
, int rd
, int rn
)
149 if (sve_access_check(s
)) {
150 unsigned vsz
= vec_full_reg_size(s
);
151 gvec_fn(esz
, vec_full_reg_offset(s
, rd
),
152 vec_full_reg_offset(s
, rn
), vsz
, vsz
);
157 /* Invoke a vector expander on three Zregs. */
158 static bool do_vector3_z(DisasContext
*s
, GVecGen3Fn
*gvec_fn
,
159 int esz
, int rd
, int rn
, int rm
)
161 if (sve_access_check(s
)) {
162 unsigned vsz
= vec_full_reg_size(s
);
163 gvec_fn(esz
, vec_full_reg_offset(s
, rd
),
164 vec_full_reg_offset(s
, rn
),
165 vec_full_reg_offset(s
, rm
), vsz
, vsz
);
170 /* Invoke a vector move on two Zregs. */
171 static bool do_mov_z(DisasContext
*s
, int rd
, int rn
)
173 return do_vector2_z(s
, tcg_gen_gvec_mov
, 0, rd
, rn
);
176 /* Initialize a Zreg with replications of a 64-bit immediate. */
177 static void do_dupi_z(DisasContext
*s
, int rd
, uint64_t word
)
179 unsigned vsz
= vec_full_reg_size(s
);
180 tcg_gen_gvec_dup64i(vec_full_reg_offset(s
, rd
), vsz
, vsz
, word
);
183 /* Invoke a vector expander on two Pregs. */
184 static bool do_vector2_p(DisasContext
*s
, GVecGen2Fn
*gvec_fn
,
185 int esz
, int rd
, int rn
)
187 if (sve_access_check(s
)) {
188 unsigned psz
= pred_gvec_reg_size(s
);
189 gvec_fn(esz
, pred_full_reg_offset(s
, rd
),
190 pred_full_reg_offset(s
, rn
), psz
, psz
);
195 /* Invoke a vector expander on three Pregs. */
196 static bool do_vector3_p(DisasContext
*s
, GVecGen3Fn
*gvec_fn
,
197 int esz
, int rd
, int rn
, int rm
)
199 if (sve_access_check(s
)) {
200 unsigned psz
= pred_gvec_reg_size(s
);
201 gvec_fn(esz
, pred_full_reg_offset(s
, rd
),
202 pred_full_reg_offset(s
, rn
),
203 pred_full_reg_offset(s
, rm
), psz
, psz
);
208 /* Invoke a vector operation on four Pregs. */
209 static bool do_vecop4_p(DisasContext
*s
, const GVecGen4
*gvec_op
,
210 int rd
, int rn
, int rm
, int rg
)
212 if (sve_access_check(s
)) {
213 unsigned psz
= pred_gvec_reg_size(s
);
214 tcg_gen_gvec_4(pred_full_reg_offset(s
, rd
),
215 pred_full_reg_offset(s
, rn
),
216 pred_full_reg_offset(s
, rm
),
217 pred_full_reg_offset(s
, rg
),
223 /* Invoke a vector move on two Pregs. */
224 static bool do_mov_p(DisasContext
*s
, int rd
, int rn
)
226 return do_vector2_p(s
, tcg_gen_gvec_mov
, 0, rd
, rn
);
229 /* Set the cpu flags as per a return from an SVE helper. */
230 static void do_pred_flags(TCGv_i32 t
)
232 tcg_gen_mov_i32(cpu_NF
, t
);
233 tcg_gen_andi_i32(cpu_ZF
, t
, 2);
234 tcg_gen_andi_i32(cpu_CF
, t
, 1);
235 tcg_gen_movi_i32(cpu_VF
, 0);
238 /* Subroutines computing the ARM PredTest psuedofunction. */
239 static void do_predtest1(TCGv_i64 d
, TCGv_i64 g
)
241 TCGv_i32 t
= tcg_temp_new_i32();
243 gen_helper_sve_predtest1(t
, d
, g
);
245 tcg_temp_free_i32(t
);
248 static void do_predtest(DisasContext
*s
, int dofs
, int gofs
, int words
)
250 TCGv_ptr dptr
= tcg_temp_new_ptr();
251 TCGv_ptr gptr
= tcg_temp_new_ptr();
254 tcg_gen_addi_ptr(dptr
, cpu_env
, dofs
);
255 tcg_gen_addi_ptr(gptr
, cpu_env
, gofs
);
256 t
= tcg_const_i32(words
);
258 gen_helper_sve_predtest(t
, dptr
, gptr
, t
);
259 tcg_temp_free_ptr(dptr
);
260 tcg_temp_free_ptr(gptr
);
263 tcg_temp_free_i32(t
);
266 /* For each element size, the bits within a predicate word that are active. */
267 const uint64_t pred_esz_masks
[4] = {
268 0xffffffffffffffffull
, 0x5555555555555555ull
,
269 0x1111111111111111ull
, 0x0101010101010101ull
273 *** SVE Logical - Unpredicated Group
276 static bool trans_AND_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
278 return do_vector3_z(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->rm
);
281 static bool trans_ORR_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
283 return do_vector3_z(s
, tcg_gen_gvec_or
, 0, a
->rd
, a
->rn
, a
->rm
);
286 static bool trans_EOR_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
288 return do_vector3_z(s
, tcg_gen_gvec_xor
, 0, a
->rd
, a
->rn
, a
->rm
);
291 static bool trans_BIC_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
293 return do_vector3_z(s
, tcg_gen_gvec_andc
, 0, a
->rd
, a
->rn
, a
->rm
);
297 *** SVE Integer Arithmetic - Unpredicated Group
300 static bool trans_ADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
302 return do_vector3_z(s
, tcg_gen_gvec_add
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
305 static bool trans_SUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
307 return do_vector3_z(s
, tcg_gen_gvec_sub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
310 static bool trans_SQADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
312 return do_vector3_z(s
, tcg_gen_gvec_ssadd
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
315 static bool trans_SQSUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
317 return do_vector3_z(s
, tcg_gen_gvec_sssub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
320 static bool trans_UQADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
322 return do_vector3_z(s
, tcg_gen_gvec_usadd
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
325 static bool trans_UQSUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
327 return do_vector3_z(s
, tcg_gen_gvec_ussub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
331 *** SVE Integer Arithmetic - Binary Predicated Group
334 static bool do_zpzz_ool(DisasContext
*s
, arg_rprr_esz
*a
, gen_helper_gvec_4
*fn
)
336 unsigned vsz
= vec_full_reg_size(s
);
340 if (sve_access_check(s
)) {
341 tcg_gen_gvec_4_ool(vec_full_reg_offset(s
, a
->rd
),
342 vec_full_reg_offset(s
, a
->rn
),
343 vec_full_reg_offset(s
, a
->rm
),
344 pred_full_reg_offset(s
, a
->pg
),
350 /* Select active elememnts from Zn and inactive elements from Zm,
351 * storing the result in Zd.
353 static void do_sel_z(DisasContext
*s
, int rd
, int rn
, int rm
, int pg
, int esz
)
355 static gen_helper_gvec_4
* const fns
[4] = {
356 gen_helper_sve_sel_zpzz_b
, gen_helper_sve_sel_zpzz_h
,
357 gen_helper_sve_sel_zpzz_s
, gen_helper_sve_sel_zpzz_d
359 unsigned vsz
= vec_full_reg_size(s
);
360 tcg_gen_gvec_4_ool(vec_full_reg_offset(s
, rd
),
361 vec_full_reg_offset(s
, rn
),
362 vec_full_reg_offset(s
, rm
),
363 pred_full_reg_offset(s
, pg
),
364 vsz
, vsz
, 0, fns
[esz
]);
367 #define DO_ZPZZ(NAME, name) \
368 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a) \
370 static gen_helper_gvec_4 * const fns[4] = { \
371 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
372 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
374 return do_zpzz_ool(s, a, fns[a->esz]); \
393 DO_ZPZZ(SMULH
, smulh
)
394 DO_ZPZZ(UMULH
, umulh
)
400 static bool trans_SDIV_zpzz(DisasContext
*s
, arg_rprr_esz
*a
)
402 static gen_helper_gvec_4
* const fns
[4] = {
403 NULL
, NULL
, gen_helper_sve_sdiv_zpzz_s
, gen_helper_sve_sdiv_zpzz_d
405 return do_zpzz_ool(s
, a
, fns
[a
->esz
]);
408 static bool trans_UDIV_zpzz(DisasContext
*s
, arg_rprr_esz
*a
)
410 static gen_helper_gvec_4
* const fns
[4] = {
411 NULL
, NULL
, gen_helper_sve_udiv_zpzz_s
, gen_helper_sve_udiv_zpzz_d
413 return do_zpzz_ool(s
, a
, fns
[a
->esz
]);
416 static bool trans_SEL_zpzz(DisasContext
*s
, arg_rprr_esz
*a
)
418 if (sve_access_check(s
)) {
419 do_sel_z(s
, a
->rd
, a
->rn
, a
->rm
, a
->pg
, a
->esz
);
427 *** SVE Integer Arithmetic - Unary Predicated Group
430 static bool do_zpz_ool(DisasContext
*s
, arg_rpr_esz
*a
, gen_helper_gvec_3
*fn
)
435 if (sve_access_check(s
)) {
436 unsigned vsz
= vec_full_reg_size(s
);
437 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
438 vec_full_reg_offset(s
, a
->rn
),
439 pred_full_reg_offset(s
, a
->pg
),
445 #define DO_ZPZ(NAME, name) \
446 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
448 static gen_helper_gvec_3 * const fns[4] = { \
449 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
450 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
452 return do_zpz_ool(s, a, fns[a->esz]); \
457 DO_ZPZ(CNT_zpz
, cnt_zpz
)
459 DO_ZPZ(NOT_zpz
, not_zpz
)
463 static bool trans_FABS(DisasContext
*s
, arg_rpr_esz
*a
)
465 static gen_helper_gvec_3
* const fns
[4] = {
467 gen_helper_sve_fabs_h
,
468 gen_helper_sve_fabs_s
,
469 gen_helper_sve_fabs_d
471 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
474 static bool trans_FNEG(DisasContext
*s
, arg_rpr_esz
*a
)
476 static gen_helper_gvec_3
* const fns
[4] = {
478 gen_helper_sve_fneg_h
,
479 gen_helper_sve_fneg_s
,
480 gen_helper_sve_fneg_d
482 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
485 static bool trans_SXTB(DisasContext
*s
, arg_rpr_esz
*a
)
487 static gen_helper_gvec_3
* const fns
[4] = {
489 gen_helper_sve_sxtb_h
,
490 gen_helper_sve_sxtb_s
,
491 gen_helper_sve_sxtb_d
493 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
496 static bool trans_UXTB(DisasContext
*s
, arg_rpr_esz
*a
)
498 static gen_helper_gvec_3
* const fns
[4] = {
500 gen_helper_sve_uxtb_h
,
501 gen_helper_sve_uxtb_s
,
502 gen_helper_sve_uxtb_d
504 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
507 static bool trans_SXTH(DisasContext
*s
, arg_rpr_esz
*a
)
509 static gen_helper_gvec_3
* const fns
[4] = {
511 gen_helper_sve_sxth_s
,
512 gen_helper_sve_sxth_d
514 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
517 static bool trans_UXTH(DisasContext
*s
, arg_rpr_esz
*a
)
519 static gen_helper_gvec_3
* const fns
[4] = {
521 gen_helper_sve_uxth_s
,
522 gen_helper_sve_uxth_d
524 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
527 static bool trans_SXTW(DisasContext
*s
, arg_rpr_esz
*a
)
529 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_sxtw_d
: NULL
);
532 static bool trans_UXTW(DisasContext
*s
, arg_rpr_esz
*a
)
534 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_uxtw_d
: NULL
);
540 *** SVE Integer Reduction Group
543 typedef void gen_helper_gvec_reduc(TCGv_i64
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
544 static bool do_vpz_ool(DisasContext
*s
, arg_rpr_esz
*a
,
545 gen_helper_gvec_reduc
*fn
)
547 unsigned vsz
= vec_full_reg_size(s
);
555 if (!sve_access_check(s
)) {
559 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
560 temp
= tcg_temp_new_i64();
561 t_zn
= tcg_temp_new_ptr();
562 t_pg
= tcg_temp_new_ptr();
564 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
565 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
566 fn(temp
, t_zn
, t_pg
, desc
);
567 tcg_temp_free_ptr(t_zn
);
568 tcg_temp_free_ptr(t_pg
);
569 tcg_temp_free_i32(desc
);
571 write_fp_dreg(s
, a
->rd
, temp
);
572 tcg_temp_free_i64(temp
);
576 #define DO_VPZ(NAME, name) \
577 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
579 static gen_helper_gvec_reduc * const fns[4] = { \
580 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
581 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
583 return do_vpz_ool(s, a, fns[a->esz]); \
596 static bool trans_SADDV(DisasContext
*s
, arg_rpr_esz
*a
)
598 static gen_helper_gvec_reduc
* const fns
[4] = {
599 gen_helper_sve_saddv_b
, gen_helper_sve_saddv_h
,
600 gen_helper_sve_saddv_s
, NULL
602 return do_vpz_ool(s
, a
, fns
[a
->esz
]);
608 *** SVE Shift by Immediate - Predicated Group
611 /* Store zero into every active element of Zd. We will use this for two
612 * and three-operand predicated instructions for which logic dictates a
615 static bool do_clr_zp(DisasContext
*s
, int rd
, int pg
, int esz
)
617 static gen_helper_gvec_2
* const fns
[4] = {
618 gen_helper_sve_clr_b
, gen_helper_sve_clr_h
,
619 gen_helper_sve_clr_s
, gen_helper_sve_clr_d
,
621 if (sve_access_check(s
)) {
622 unsigned vsz
= vec_full_reg_size(s
);
623 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, rd
),
624 pred_full_reg_offset(s
, pg
),
625 vsz
, vsz
, 0, fns
[esz
]);
630 /* Copy Zn into Zd, storing zeros into inactive elements. */
631 static void do_movz_zpz(DisasContext
*s
, int rd
, int rn
, int pg
, int esz
)
633 static gen_helper_gvec_3
* const fns
[4] = {
634 gen_helper_sve_movz_b
, gen_helper_sve_movz_h
,
635 gen_helper_sve_movz_s
, gen_helper_sve_movz_d
,
637 unsigned vsz
= vec_full_reg_size(s
);
638 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, rd
),
639 vec_full_reg_offset(s
, rn
),
640 pred_full_reg_offset(s
, pg
),
641 vsz
, vsz
, 0, fns
[esz
]);
644 static bool do_zpzi_ool(DisasContext
*s
, arg_rpri_esz
*a
,
645 gen_helper_gvec_3
*fn
)
647 if (sve_access_check(s
)) {
648 unsigned vsz
= vec_full_reg_size(s
);
649 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
650 vec_full_reg_offset(s
, a
->rn
),
651 pred_full_reg_offset(s
, a
->pg
),
652 vsz
, vsz
, a
->imm
, fn
);
657 static bool trans_ASR_zpzi(DisasContext
*s
, arg_rpri_esz
*a
)
659 static gen_helper_gvec_3
* const fns
[4] = {
660 gen_helper_sve_asr_zpzi_b
, gen_helper_sve_asr_zpzi_h
,
661 gen_helper_sve_asr_zpzi_s
, gen_helper_sve_asr_zpzi_d
,
664 /* Invalid tsz encoding -- see tszimm_esz. */
667 /* Shift by element size is architecturally valid. For
668 arithmetic right-shift, it's the same as by one less. */
669 a
->imm
= MIN(a
->imm
, (8 << a
->esz
) - 1);
670 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
673 static bool trans_LSR_zpzi(DisasContext
*s
, arg_rpri_esz
*a
)
675 static gen_helper_gvec_3
* const fns
[4] = {
676 gen_helper_sve_lsr_zpzi_b
, gen_helper_sve_lsr_zpzi_h
,
677 gen_helper_sve_lsr_zpzi_s
, gen_helper_sve_lsr_zpzi_d
,
682 /* Shift by element size is architecturally valid.
683 For logical shifts, it is a zeroing operation. */
684 if (a
->imm
>= (8 << a
->esz
)) {
685 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
687 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
691 static bool trans_LSL_zpzi(DisasContext
*s
, arg_rpri_esz
*a
)
693 static gen_helper_gvec_3
* const fns
[4] = {
694 gen_helper_sve_lsl_zpzi_b
, gen_helper_sve_lsl_zpzi_h
,
695 gen_helper_sve_lsl_zpzi_s
, gen_helper_sve_lsl_zpzi_d
,
700 /* Shift by element size is architecturally valid.
701 For logical shifts, it is a zeroing operation. */
702 if (a
->imm
>= (8 << a
->esz
)) {
703 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
705 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
709 static bool trans_ASRD(DisasContext
*s
, arg_rpri_esz
*a
)
711 static gen_helper_gvec_3
* const fns
[4] = {
712 gen_helper_sve_asrd_b
, gen_helper_sve_asrd_h
,
713 gen_helper_sve_asrd_s
, gen_helper_sve_asrd_d
,
718 /* Shift by element size is architecturally valid. For arithmetic
719 right shift for division, it is a zeroing operation. */
720 if (a
->imm
>= (8 << a
->esz
)) {
721 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
723 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
728 *** SVE Bitwise Shift - Predicated Group
731 #define DO_ZPZW(NAME, name) \
732 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a) \
734 static gen_helper_gvec_4 * const fns[3] = { \
735 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
736 gen_helper_sve_##name##_zpzw_s, \
738 if (a->esz < 0 || a->esz >= 3) { \
741 return do_zpzz_ool(s, a, fns[a->esz]); \
751 *** SVE Bitwise Shift - Unpredicated Group
754 static bool do_shift_imm(DisasContext
*s
, arg_rri_esz
*a
, bool asr
,
755 void (*gvec_fn
)(unsigned, uint32_t, uint32_t,
756 int64_t, uint32_t, uint32_t))
759 /* Invalid tsz encoding -- see tszimm_esz. */
762 if (sve_access_check(s
)) {
763 unsigned vsz
= vec_full_reg_size(s
);
764 /* Shift by element size is architecturally valid. For
765 arithmetic right-shift, it's the same as by one less.
766 Otherwise it is a zeroing operation. */
767 if (a
->imm
>= 8 << a
->esz
) {
769 a
->imm
= (8 << a
->esz
) - 1;
771 do_dupi_z(s
, a
->rd
, 0);
775 gvec_fn(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
776 vec_full_reg_offset(s
, a
->rn
), a
->imm
, vsz
, vsz
);
781 static bool trans_ASR_zzi(DisasContext
*s
, arg_rri_esz
*a
)
783 return do_shift_imm(s
, a
, true, tcg_gen_gvec_sari
);
786 static bool trans_LSR_zzi(DisasContext
*s
, arg_rri_esz
*a
)
788 return do_shift_imm(s
, a
, false, tcg_gen_gvec_shri
);
791 static bool trans_LSL_zzi(DisasContext
*s
, arg_rri_esz
*a
)
793 return do_shift_imm(s
, a
, false, tcg_gen_gvec_shli
);
796 static bool do_zzw_ool(DisasContext
*s
, arg_rrr_esz
*a
, gen_helper_gvec_3
*fn
)
801 if (sve_access_check(s
)) {
802 unsigned vsz
= vec_full_reg_size(s
);
803 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
804 vec_full_reg_offset(s
, a
->rn
),
805 vec_full_reg_offset(s
, a
->rm
),
811 #define DO_ZZW(NAME, name) \
812 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a) \
814 static gen_helper_gvec_3 * const fns[4] = { \
815 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
816 gen_helper_sve_##name##_zzw_s, NULL \
818 return do_zzw_ool(s, a, fns[a->esz]); \
828 *** SVE Integer Multiply-Add Group
831 static bool do_zpzzz_ool(DisasContext
*s
, arg_rprrr_esz
*a
,
832 gen_helper_gvec_5
*fn
)
834 if (sve_access_check(s
)) {
835 unsigned vsz
= vec_full_reg_size(s
);
836 tcg_gen_gvec_5_ool(vec_full_reg_offset(s
, a
->rd
),
837 vec_full_reg_offset(s
, a
->ra
),
838 vec_full_reg_offset(s
, a
->rn
),
839 vec_full_reg_offset(s
, a
->rm
),
840 pred_full_reg_offset(s
, a
->pg
),
846 #define DO_ZPZZZ(NAME, name) \
847 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
849 static gen_helper_gvec_5 * const fns[4] = { \
850 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
851 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
853 return do_zpzzz_ool(s, a, fns[a->esz]); \
862 *** SVE Index Generation Group
865 static void do_index(DisasContext
*s
, int esz
, int rd
,
866 TCGv_i64 start
, TCGv_i64 incr
)
868 unsigned vsz
= vec_full_reg_size(s
);
869 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
870 TCGv_ptr t_zd
= tcg_temp_new_ptr();
872 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, rd
));
874 gen_helper_sve_index_d(t_zd
, start
, incr
, desc
);
876 typedef void index_fn(TCGv_ptr
, TCGv_i32
, TCGv_i32
, TCGv_i32
);
877 static index_fn
* const fns
[3] = {
878 gen_helper_sve_index_b
,
879 gen_helper_sve_index_h
,
880 gen_helper_sve_index_s
,
882 TCGv_i32 s32
= tcg_temp_new_i32();
883 TCGv_i32 i32
= tcg_temp_new_i32();
885 tcg_gen_extrl_i64_i32(s32
, start
);
886 tcg_gen_extrl_i64_i32(i32
, incr
);
887 fns
[esz
](t_zd
, s32
, i32
, desc
);
889 tcg_temp_free_i32(s32
);
890 tcg_temp_free_i32(i32
);
892 tcg_temp_free_ptr(t_zd
);
893 tcg_temp_free_i32(desc
);
896 static bool trans_INDEX_ii(DisasContext
*s
, arg_INDEX_ii
*a
)
898 if (sve_access_check(s
)) {
899 TCGv_i64 start
= tcg_const_i64(a
->imm1
);
900 TCGv_i64 incr
= tcg_const_i64(a
->imm2
);
901 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
902 tcg_temp_free_i64(start
);
903 tcg_temp_free_i64(incr
);
908 static bool trans_INDEX_ir(DisasContext
*s
, arg_INDEX_ir
*a
)
910 if (sve_access_check(s
)) {
911 TCGv_i64 start
= tcg_const_i64(a
->imm
);
912 TCGv_i64 incr
= cpu_reg(s
, a
->rm
);
913 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
914 tcg_temp_free_i64(start
);
919 static bool trans_INDEX_ri(DisasContext
*s
, arg_INDEX_ri
*a
)
921 if (sve_access_check(s
)) {
922 TCGv_i64 start
= cpu_reg(s
, a
->rn
);
923 TCGv_i64 incr
= tcg_const_i64(a
->imm
);
924 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
925 tcg_temp_free_i64(incr
);
930 static bool trans_INDEX_rr(DisasContext
*s
, arg_INDEX_rr
*a
)
932 if (sve_access_check(s
)) {
933 TCGv_i64 start
= cpu_reg(s
, a
->rn
);
934 TCGv_i64 incr
= cpu_reg(s
, a
->rm
);
935 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
941 *** SVE Stack Allocation Group
944 static bool trans_ADDVL(DisasContext
*s
, arg_ADDVL
*a
)
946 if (sve_access_check(s
)) {
947 TCGv_i64 rd
= cpu_reg_sp(s
, a
->rd
);
948 TCGv_i64 rn
= cpu_reg_sp(s
, a
->rn
);
949 tcg_gen_addi_i64(rd
, rn
, a
->imm
* vec_full_reg_size(s
));
954 static bool trans_ADDPL(DisasContext
*s
, arg_ADDPL
*a
)
956 if (sve_access_check(s
)) {
957 TCGv_i64 rd
= cpu_reg_sp(s
, a
->rd
);
958 TCGv_i64 rn
= cpu_reg_sp(s
, a
->rn
);
959 tcg_gen_addi_i64(rd
, rn
, a
->imm
* pred_full_reg_size(s
));
964 static bool trans_RDVL(DisasContext
*s
, arg_RDVL
*a
)
966 if (sve_access_check(s
)) {
967 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
968 tcg_gen_movi_i64(reg
, a
->imm
* vec_full_reg_size(s
));
974 *** SVE Compute Vector Address Group
977 static bool do_adr(DisasContext
*s
, arg_rrri
*a
, gen_helper_gvec_3
*fn
)
979 if (sve_access_check(s
)) {
980 unsigned vsz
= vec_full_reg_size(s
);
981 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
982 vec_full_reg_offset(s
, a
->rn
),
983 vec_full_reg_offset(s
, a
->rm
),
984 vsz
, vsz
, a
->imm
, fn
);
989 static bool trans_ADR_p32(DisasContext
*s
, arg_rrri
*a
)
991 return do_adr(s
, a
, gen_helper_sve_adr_p32
);
994 static bool trans_ADR_p64(DisasContext
*s
, arg_rrri
*a
)
996 return do_adr(s
, a
, gen_helper_sve_adr_p64
);
999 static bool trans_ADR_s32(DisasContext
*s
, arg_rrri
*a
)
1001 return do_adr(s
, a
, gen_helper_sve_adr_s32
);
1004 static bool trans_ADR_u32(DisasContext
*s
, arg_rrri
*a
)
1006 return do_adr(s
, a
, gen_helper_sve_adr_u32
);
1010 *** SVE Integer Misc - Unpredicated Group
1013 static bool trans_FEXPA(DisasContext
*s
, arg_rr_esz
*a
)
1015 static gen_helper_gvec_2
* const fns
[4] = {
1017 gen_helper_sve_fexpa_h
,
1018 gen_helper_sve_fexpa_s
,
1019 gen_helper_sve_fexpa_d
,
1024 if (sve_access_check(s
)) {
1025 unsigned vsz
= vec_full_reg_size(s
);
1026 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, a
->rd
),
1027 vec_full_reg_offset(s
, a
->rn
),
1028 vsz
, vsz
, 0, fns
[a
->esz
]);
1033 static bool trans_FTSSEL(DisasContext
*s
, arg_rrr_esz
*a
)
1035 static gen_helper_gvec_3
* const fns
[4] = {
1037 gen_helper_sve_ftssel_h
,
1038 gen_helper_sve_ftssel_s
,
1039 gen_helper_sve_ftssel_d
,
1044 if (sve_access_check(s
)) {
1045 unsigned vsz
= vec_full_reg_size(s
);
1046 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
1047 vec_full_reg_offset(s
, a
->rn
),
1048 vec_full_reg_offset(s
, a
->rm
),
1049 vsz
, vsz
, 0, fns
[a
->esz
]);
1055 *** SVE Predicate Logical Operations Group
1058 static bool do_pppp_flags(DisasContext
*s
, arg_rprr_s
*a
,
1059 const GVecGen4
*gvec_op
)
1061 if (!sve_access_check(s
)) {
1065 unsigned psz
= pred_gvec_reg_size(s
);
1066 int dofs
= pred_full_reg_offset(s
, a
->rd
);
1067 int nofs
= pred_full_reg_offset(s
, a
->rn
);
1068 int mofs
= pred_full_reg_offset(s
, a
->rm
);
1069 int gofs
= pred_full_reg_offset(s
, a
->pg
);
1072 /* Do the operation and the flags generation in temps. */
1073 TCGv_i64 pd
= tcg_temp_new_i64();
1074 TCGv_i64 pn
= tcg_temp_new_i64();
1075 TCGv_i64 pm
= tcg_temp_new_i64();
1076 TCGv_i64 pg
= tcg_temp_new_i64();
1078 tcg_gen_ld_i64(pn
, cpu_env
, nofs
);
1079 tcg_gen_ld_i64(pm
, cpu_env
, mofs
);
1080 tcg_gen_ld_i64(pg
, cpu_env
, gofs
);
1082 gvec_op
->fni8(pd
, pn
, pm
, pg
);
1083 tcg_gen_st_i64(pd
, cpu_env
, dofs
);
1085 do_predtest1(pd
, pg
);
1087 tcg_temp_free_i64(pd
);
1088 tcg_temp_free_i64(pn
);
1089 tcg_temp_free_i64(pm
);
1090 tcg_temp_free_i64(pg
);
1092 /* The operation and flags generation is large. The computation
1093 * of the flags depends on the original contents of the guarding
1094 * predicate. If the destination overwrites the guarding predicate,
1095 * then the easiest way to get this right is to save a copy.
1098 if (a
->rd
== a
->pg
) {
1099 tofs
= offsetof(CPUARMState
, vfp
.preg_tmp
);
1100 tcg_gen_gvec_mov(0, tofs
, gofs
, psz
, psz
);
1103 tcg_gen_gvec_4(dofs
, nofs
, mofs
, gofs
, psz
, psz
, gvec_op
);
1104 do_predtest(s
, dofs
, tofs
, psz
/ 8);
1109 static void gen_and_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1111 tcg_gen_and_i64(pd
, pn
, pm
);
1112 tcg_gen_and_i64(pd
, pd
, pg
);
1115 static void gen_and_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1116 TCGv_vec pm
, TCGv_vec pg
)
1118 tcg_gen_and_vec(vece
, pd
, pn
, pm
);
1119 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1122 static bool trans_AND_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1124 static const GVecGen4 op
= {
1125 .fni8
= gen_and_pg_i64
,
1126 .fniv
= gen_and_pg_vec
,
1127 .fno
= gen_helper_sve_and_pppp
,
1128 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1131 return do_pppp_flags(s
, a
, &op
);
1132 } else if (a
->rn
== a
->rm
) {
1133 if (a
->pg
== a
->rn
) {
1134 return do_mov_p(s
, a
->rd
, a
->rn
);
1136 return do_vector3_p(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->pg
);
1138 } else if (a
->pg
== a
->rn
|| a
->pg
== a
->rm
) {
1139 return do_vector3_p(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->rm
);
1141 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1145 static void gen_bic_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1147 tcg_gen_andc_i64(pd
, pn
, pm
);
1148 tcg_gen_and_i64(pd
, pd
, pg
);
1151 static void gen_bic_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1152 TCGv_vec pm
, TCGv_vec pg
)
1154 tcg_gen_andc_vec(vece
, pd
, pn
, pm
);
1155 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1158 static bool trans_BIC_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1160 static const GVecGen4 op
= {
1161 .fni8
= gen_bic_pg_i64
,
1162 .fniv
= gen_bic_pg_vec
,
1163 .fno
= gen_helper_sve_bic_pppp
,
1164 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1167 return do_pppp_flags(s
, a
, &op
);
1168 } else if (a
->pg
== a
->rn
) {
1169 return do_vector3_p(s
, tcg_gen_gvec_andc
, 0, a
->rd
, a
->rn
, a
->rm
);
1171 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1175 static void gen_eor_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1177 tcg_gen_xor_i64(pd
, pn
, pm
);
1178 tcg_gen_and_i64(pd
, pd
, pg
);
1181 static void gen_eor_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1182 TCGv_vec pm
, TCGv_vec pg
)
1184 tcg_gen_xor_vec(vece
, pd
, pn
, pm
);
1185 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1188 static bool trans_EOR_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1190 static const GVecGen4 op
= {
1191 .fni8
= gen_eor_pg_i64
,
1192 .fniv
= gen_eor_pg_vec
,
1193 .fno
= gen_helper_sve_eor_pppp
,
1194 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1197 return do_pppp_flags(s
, a
, &op
);
1199 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1203 static void gen_sel_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1205 tcg_gen_and_i64(pn
, pn
, pg
);
1206 tcg_gen_andc_i64(pm
, pm
, pg
);
1207 tcg_gen_or_i64(pd
, pn
, pm
);
1210 static void gen_sel_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1211 TCGv_vec pm
, TCGv_vec pg
)
1213 tcg_gen_and_vec(vece
, pn
, pn
, pg
);
1214 tcg_gen_andc_vec(vece
, pm
, pm
, pg
);
1215 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1218 static bool trans_SEL_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1220 static const GVecGen4 op
= {
1221 .fni8
= gen_sel_pg_i64
,
1222 .fniv
= gen_sel_pg_vec
,
1223 .fno
= gen_helper_sve_sel_pppp
,
1224 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1229 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1233 static void gen_orr_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1235 tcg_gen_or_i64(pd
, pn
, pm
);
1236 tcg_gen_and_i64(pd
, pd
, pg
);
1239 static void gen_orr_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1240 TCGv_vec pm
, TCGv_vec pg
)
1242 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1243 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1246 static bool trans_ORR_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1248 static const GVecGen4 op
= {
1249 .fni8
= gen_orr_pg_i64
,
1250 .fniv
= gen_orr_pg_vec
,
1251 .fno
= gen_helper_sve_orr_pppp
,
1252 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1255 return do_pppp_flags(s
, a
, &op
);
1256 } else if (a
->pg
== a
->rn
&& a
->rn
== a
->rm
) {
1257 return do_mov_p(s
, a
->rd
, a
->rn
);
1259 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1263 static void gen_orn_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1265 tcg_gen_orc_i64(pd
, pn
, pm
);
1266 tcg_gen_and_i64(pd
, pd
, pg
);
1269 static void gen_orn_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1270 TCGv_vec pm
, TCGv_vec pg
)
1272 tcg_gen_orc_vec(vece
, pd
, pn
, pm
);
1273 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1276 static bool trans_ORN_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1278 static const GVecGen4 op
= {
1279 .fni8
= gen_orn_pg_i64
,
1280 .fniv
= gen_orn_pg_vec
,
1281 .fno
= gen_helper_sve_orn_pppp
,
1282 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1285 return do_pppp_flags(s
, a
, &op
);
1287 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1291 static void gen_nor_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1293 tcg_gen_or_i64(pd
, pn
, pm
);
1294 tcg_gen_andc_i64(pd
, pg
, pd
);
1297 static void gen_nor_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1298 TCGv_vec pm
, TCGv_vec pg
)
1300 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1301 tcg_gen_andc_vec(vece
, pd
, pg
, pd
);
1304 static bool trans_NOR_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1306 static const GVecGen4 op
= {
1307 .fni8
= gen_nor_pg_i64
,
1308 .fniv
= gen_nor_pg_vec
,
1309 .fno
= gen_helper_sve_nor_pppp
,
1310 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1313 return do_pppp_flags(s
, a
, &op
);
1315 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1319 static void gen_nand_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1321 tcg_gen_and_i64(pd
, pn
, pm
);
1322 tcg_gen_andc_i64(pd
, pg
, pd
);
1325 static void gen_nand_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1326 TCGv_vec pm
, TCGv_vec pg
)
1328 tcg_gen_and_vec(vece
, pd
, pn
, pm
);
1329 tcg_gen_andc_vec(vece
, pd
, pg
, pd
);
1332 static bool trans_NAND_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1334 static const GVecGen4 op
= {
1335 .fni8
= gen_nand_pg_i64
,
1336 .fniv
= gen_nand_pg_vec
,
1337 .fno
= gen_helper_sve_nand_pppp
,
1338 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1341 return do_pppp_flags(s
, a
, &op
);
1343 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1348 *** SVE Predicate Misc Group
1351 static bool trans_PTEST(DisasContext
*s
, arg_PTEST
*a
)
1353 if (sve_access_check(s
)) {
1354 int nofs
= pred_full_reg_offset(s
, a
->rn
);
1355 int gofs
= pred_full_reg_offset(s
, a
->pg
);
1356 int words
= DIV_ROUND_UP(pred_full_reg_size(s
), 8);
1359 TCGv_i64 pn
= tcg_temp_new_i64();
1360 TCGv_i64 pg
= tcg_temp_new_i64();
1362 tcg_gen_ld_i64(pn
, cpu_env
, nofs
);
1363 tcg_gen_ld_i64(pg
, cpu_env
, gofs
);
1364 do_predtest1(pn
, pg
);
1366 tcg_temp_free_i64(pn
);
1367 tcg_temp_free_i64(pg
);
1369 do_predtest(s
, nofs
, gofs
, words
);
1375 /* See the ARM pseudocode DecodePredCount. */
1376 static unsigned decode_pred_count(unsigned fullsz
, int pattern
, int esz
)
1378 unsigned elements
= fullsz
>> esz
;
1382 case 0x0: /* POW2 */
1383 return pow2floor(elements
);
1394 case 0x9: /* VL16 */
1395 case 0xa: /* VL32 */
1396 case 0xb: /* VL64 */
1397 case 0xc: /* VL128 */
1398 case 0xd: /* VL256 */
1399 bound
= 16 << (pattern
- 9);
1401 case 0x1d: /* MUL4 */
1402 return elements
- elements
% 4;
1403 case 0x1e: /* MUL3 */
1404 return elements
- elements
% 3;
1405 case 0x1f: /* ALL */
1407 default: /* #uimm5 */
1410 return elements
>= bound
? bound
: 0;
1413 /* This handles all of the predicate initialization instructions,
1414 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1415 * so that decode_pred_count returns 0. For SETFFR, we will have
1416 * set RD == 16 == FFR.
1418 static bool do_predset(DisasContext
*s
, int esz
, int rd
, int pat
, bool setflag
)
1420 if (!sve_access_check(s
)) {
1424 unsigned fullsz
= vec_full_reg_size(s
);
1425 unsigned ofs
= pred_full_reg_offset(s
, rd
);
1426 unsigned numelem
, setsz
, i
;
1427 uint64_t word
, lastword
;
1430 numelem
= decode_pred_count(fullsz
, pat
, esz
);
1432 /* Determine what we must store into each bit, and how many. */
1434 lastword
= word
= 0;
1437 setsz
= numelem
<< esz
;
1438 lastword
= word
= pred_esz_masks
[esz
];
1440 lastword
&= MAKE_64BIT_MASK(0, setsz
% 64);
1444 t
= tcg_temp_new_i64();
1446 tcg_gen_movi_i64(t
, lastword
);
1447 tcg_gen_st_i64(t
, cpu_env
, ofs
);
1451 if (word
== lastword
) {
1452 unsigned maxsz
= size_for_gvec(fullsz
/ 8);
1453 unsigned oprsz
= size_for_gvec(setsz
/ 8);
1455 if (oprsz
* 8 == setsz
) {
1456 tcg_gen_gvec_dup64i(ofs
, oprsz
, maxsz
, word
);
1464 tcg_gen_movi_i64(t
, word
);
1465 for (i
= 0; i
< QEMU_ALIGN_DOWN(setsz
, 8); i
+= 8) {
1466 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1468 if (lastword
!= word
) {
1469 tcg_gen_movi_i64(t
, lastword
);
1470 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1474 tcg_gen_movi_i64(t
, 0);
1475 for (; i
< fullsz
; i
+= 8) {
1476 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1481 tcg_temp_free_i64(t
);
1485 tcg_gen_movi_i32(cpu_NF
, -(word
!= 0));
1486 tcg_gen_movi_i32(cpu_CF
, word
== 0);
1487 tcg_gen_movi_i32(cpu_VF
, 0);
1488 tcg_gen_mov_i32(cpu_ZF
, cpu_NF
);
1493 static bool trans_PTRUE(DisasContext
*s
, arg_PTRUE
*a
)
1495 return do_predset(s
, a
->esz
, a
->rd
, a
->pat
, a
->s
);
1498 static bool trans_SETFFR(DisasContext
*s
, arg_SETFFR
*a
)
1500 /* Note pat == 31 is #all, to set all elements. */
1501 return do_predset(s
, 0, FFR_PRED_NUM
, 31, false);
1504 static bool trans_PFALSE(DisasContext
*s
, arg_PFALSE
*a
)
1506 /* Note pat == 32 is #unimp, to set no elements. */
1507 return do_predset(s
, 0, a
->rd
, 32, false);
1510 static bool trans_RDFFR_p(DisasContext
*s
, arg_RDFFR_p
*a
)
1512 /* The path through do_pppp_flags is complicated enough to want to avoid
1513 * duplication. Frob the arguments into the form of a predicated AND.
1515 arg_rprr_s alt_a
= {
1516 .rd
= a
->rd
, .pg
= a
->pg
, .s
= a
->s
,
1517 .rn
= FFR_PRED_NUM
, .rm
= FFR_PRED_NUM
,
1519 return trans_AND_pppp(s
, &alt_a
);
1522 static bool trans_RDFFR(DisasContext
*s
, arg_RDFFR
*a
)
1524 return do_mov_p(s
, a
->rd
, FFR_PRED_NUM
);
1527 static bool trans_WRFFR(DisasContext
*s
, arg_WRFFR
*a
)
1529 return do_mov_p(s
, FFR_PRED_NUM
, a
->rn
);
1532 static bool do_pfirst_pnext(DisasContext
*s
, arg_rr_esz
*a
,
1533 void (*gen_fn
)(TCGv_i32
, TCGv_ptr
,
1534 TCGv_ptr
, TCGv_i32
))
1536 if (!sve_access_check(s
)) {
1540 TCGv_ptr t_pd
= tcg_temp_new_ptr();
1541 TCGv_ptr t_pg
= tcg_temp_new_ptr();
1545 desc
= DIV_ROUND_UP(pred_full_reg_size(s
), 8);
1546 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
1548 tcg_gen_addi_ptr(t_pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
1549 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
1550 t
= tcg_const_i32(desc
);
1552 gen_fn(t
, t_pd
, t_pg
, t
);
1553 tcg_temp_free_ptr(t_pd
);
1554 tcg_temp_free_ptr(t_pg
);
1557 tcg_temp_free_i32(t
);
1561 static bool trans_PFIRST(DisasContext
*s
, arg_rr_esz
*a
)
1563 return do_pfirst_pnext(s
, a
, gen_helper_sve_pfirst
);
1566 static bool trans_PNEXT(DisasContext
*s
, arg_rr_esz
*a
)
1568 return do_pfirst_pnext(s
, a
, gen_helper_sve_pnext
);
1572 *** SVE Element Count Group
1575 /* Perform an inline saturating addition of a 32-bit value within
1576 * a 64-bit register. The second operand is known to be positive,
1577 * which halves the comparisions we must perform to bound the result.
1579 static void do_sat_addsub_32(TCGv_i64 reg
, TCGv_i64 val
, bool u
, bool d
)
1585 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1587 tcg_gen_ext32u_i64(reg
, reg
);
1589 tcg_gen_ext32s_i64(reg
, reg
);
1592 tcg_gen_sub_i64(reg
, reg
, val
);
1593 ibound
= (u
? 0 : INT32_MIN
);
1596 tcg_gen_add_i64(reg
, reg
, val
);
1597 ibound
= (u
? UINT32_MAX
: INT32_MAX
);
1600 bound
= tcg_const_i64(ibound
);
1601 tcg_gen_movcond_i64(cond
, reg
, reg
, bound
, bound
, reg
);
1602 tcg_temp_free_i64(bound
);
1605 /* Similarly with 64-bit values. */
1606 static void do_sat_addsub_64(TCGv_i64 reg
, TCGv_i64 val
, bool u
, bool d
)
1608 TCGv_i64 t0
= tcg_temp_new_i64();
1609 TCGv_i64 t1
= tcg_temp_new_i64();
1614 tcg_gen_sub_i64(t0
, reg
, val
);
1615 tcg_gen_movi_i64(t1
, 0);
1616 tcg_gen_movcond_i64(TCG_COND_LTU
, reg
, reg
, val
, t1
, t0
);
1618 tcg_gen_add_i64(t0
, reg
, val
);
1619 tcg_gen_movi_i64(t1
, -1);
1620 tcg_gen_movcond_i64(TCG_COND_LTU
, reg
, t0
, reg
, t1
, t0
);
1624 /* Detect signed overflow for subtraction. */
1625 tcg_gen_xor_i64(t0
, reg
, val
);
1626 tcg_gen_sub_i64(t1
, reg
, val
);
1627 tcg_gen_xor_i64(reg
, reg
, t1
);
1628 tcg_gen_and_i64(t0
, t0
, reg
);
1630 /* Bound the result. */
1631 tcg_gen_movi_i64(reg
, INT64_MIN
);
1632 t2
= tcg_const_i64(0);
1633 tcg_gen_movcond_i64(TCG_COND_LT
, reg
, t0
, t2
, reg
, t1
);
1635 /* Detect signed overflow for addition. */
1636 tcg_gen_xor_i64(t0
, reg
, val
);
1637 tcg_gen_add_i64(reg
, reg
, val
);
1638 tcg_gen_xor_i64(t1
, reg
, val
);
1639 tcg_gen_andc_i64(t0
, t1
, t0
);
1641 /* Bound the result. */
1642 tcg_gen_movi_i64(t1
, INT64_MAX
);
1643 t2
= tcg_const_i64(0);
1644 tcg_gen_movcond_i64(TCG_COND_LT
, reg
, t0
, t2
, t1
, reg
);
1646 tcg_temp_free_i64(t2
);
1648 tcg_temp_free_i64(t0
);
1649 tcg_temp_free_i64(t1
);
1652 /* Similarly with a vector and a scalar operand. */
1653 static void do_sat_addsub_vec(DisasContext
*s
, int esz
, int rd
, int rn
,
1654 TCGv_i64 val
, bool u
, bool d
)
1656 unsigned vsz
= vec_full_reg_size(s
);
1657 TCGv_ptr dptr
, nptr
;
1661 dptr
= tcg_temp_new_ptr();
1662 nptr
= tcg_temp_new_ptr();
1663 tcg_gen_addi_ptr(dptr
, cpu_env
, vec_full_reg_offset(s
, rd
));
1664 tcg_gen_addi_ptr(nptr
, cpu_env
, vec_full_reg_offset(s
, rn
));
1665 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
1669 t32
= tcg_temp_new_i32();
1670 tcg_gen_extrl_i64_i32(t32
, val
);
1672 tcg_gen_neg_i32(t32
, t32
);
1675 gen_helper_sve_uqaddi_b(dptr
, nptr
, t32
, desc
);
1677 gen_helper_sve_sqaddi_b(dptr
, nptr
, t32
, desc
);
1679 tcg_temp_free_i32(t32
);
1683 t32
= tcg_temp_new_i32();
1684 tcg_gen_extrl_i64_i32(t32
, val
);
1686 tcg_gen_neg_i32(t32
, t32
);
1689 gen_helper_sve_uqaddi_h(dptr
, nptr
, t32
, desc
);
1691 gen_helper_sve_sqaddi_h(dptr
, nptr
, t32
, desc
);
1693 tcg_temp_free_i32(t32
);
1697 t64
= tcg_temp_new_i64();
1699 tcg_gen_neg_i64(t64
, val
);
1701 tcg_gen_mov_i64(t64
, val
);
1704 gen_helper_sve_uqaddi_s(dptr
, nptr
, t64
, desc
);
1706 gen_helper_sve_sqaddi_s(dptr
, nptr
, t64
, desc
);
1708 tcg_temp_free_i64(t64
);
1714 gen_helper_sve_uqsubi_d(dptr
, nptr
, val
, desc
);
1716 gen_helper_sve_uqaddi_d(dptr
, nptr
, val
, desc
);
1719 t64
= tcg_temp_new_i64();
1720 tcg_gen_neg_i64(t64
, val
);
1721 gen_helper_sve_sqaddi_d(dptr
, nptr
, t64
, desc
);
1722 tcg_temp_free_i64(t64
);
1724 gen_helper_sve_sqaddi_d(dptr
, nptr
, val
, desc
);
1729 g_assert_not_reached();
1732 tcg_temp_free_ptr(dptr
);
1733 tcg_temp_free_ptr(nptr
);
1734 tcg_temp_free_i32(desc
);
1737 static bool trans_CNT_r(DisasContext
*s
, arg_CNT_r
*a
)
1739 if (sve_access_check(s
)) {
1740 unsigned fullsz
= vec_full_reg_size(s
);
1741 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1742 tcg_gen_movi_i64(cpu_reg(s
, a
->rd
), numelem
* a
->imm
);
1747 static bool trans_INCDEC_r(DisasContext
*s
, arg_incdec_cnt
*a
)
1749 if (sve_access_check(s
)) {
1750 unsigned fullsz
= vec_full_reg_size(s
);
1751 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1752 int inc
= numelem
* a
->imm
* (a
->d
? -1 : 1);
1753 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1755 tcg_gen_addi_i64(reg
, reg
, inc
);
1760 static bool trans_SINCDEC_r_32(DisasContext
*s
, arg_incdec_cnt
*a
)
1762 if (!sve_access_check(s
)) {
1766 unsigned fullsz
= vec_full_reg_size(s
);
1767 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1768 int inc
= numelem
* a
->imm
;
1769 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1771 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1774 tcg_gen_ext32u_i64(reg
, reg
);
1776 tcg_gen_ext32s_i64(reg
, reg
);
1779 TCGv_i64 t
= tcg_const_i64(inc
);
1780 do_sat_addsub_32(reg
, t
, a
->u
, a
->d
);
1781 tcg_temp_free_i64(t
);
1786 static bool trans_SINCDEC_r_64(DisasContext
*s
, arg_incdec_cnt
*a
)
1788 if (!sve_access_check(s
)) {
1792 unsigned fullsz
= vec_full_reg_size(s
);
1793 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1794 int inc
= numelem
* a
->imm
;
1795 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1798 TCGv_i64 t
= tcg_const_i64(inc
);
1799 do_sat_addsub_64(reg
, t
, a
->u
, a
->d
);
1800 tcg_temp_free_i64(t
);
1805 static bool trans_INCDEC_v(DisasContext
*s
, arg_incdec2_cnt
*a
)
1811 unsigned fullsz
= vec_full_reg_size(s
);
1812 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1813 int inc
= numelem
* a
->imm
;
1816 if (sve_access_check(s
)) {
1817 TCGv_i64 t
= tcg_const_i64(a
->d
? -inc
: inc
);
1818 tcg_gen_gvec_adds(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
1819 vec_full_reg_offset(s
, a
->rn
),
1821 tcg_temp_free_i64(t
);
1824 do_mov_z(s
, a
->rd
, a
->rn
);
1829 static bool trans_SINCDEC_v(DisasContext
*s
, arg_incdec2_cnt
*a
)
1835 unsigned fullsz
= vec_full_reg_size(s
);
1836 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1837 int inc
= numelem
* a
->imm
;
1840 if (sve_access_check(s
)) {
1841 TCGv_i64 t
= tcg_const_i64(inc
);
1842 do_sat_addsub_vec(s
, a
->esz
, a
->rd
, a
->rn
, t
, a
->u
, a
->d
);
1843 tcg_temp_free_i64(t
);
1846 do_mov_z(s
, a
->rd
, a
->rn
);
1852 *** SVE Bitwise Immediate Group
1855 static bool do_zz_dbm(DisasContext
*s
, arg_rr_dbm
*a
, GVecGen2iFn
*gvec_fn
)
1858 if (!logic_imm_decode_wmask(&imm
, extract32(a
->dbm
, 12, 1),
1859 extract32(a
->dbm
, 0, 6),
1860 extract32(a
->dbm
, 6, 6))) {
1863 if (sve_access_check(s
)) {
1864 unsigned vsz
= vec_full_reg_size(s
);
1865 gvec_fn(MO_64
, vec_full_reg_offset(s
, a
->rd
),
1866 vec_full_reg_offset(s
, a
->rn
), imm
, vsz
, vsz
);
1871 static bool trans_AND_zzi(DisasContext
*s
, arg_rr_dbm
*a
)
1873 return do_zz_dbm(s
, a
, tcg_gen_gvec_andi
);
1876 static bool trans_ORR_zzi(DisasContext
*s
, arg_rr_dbm
*a
)
1878 return do_zz_dbm(s
, a
, tcg_gen_gvec_ori
);
1881 static bool trans_EOR_zzi(DisasContext
*s
, arg_rr_dbm
*a
)
1883 return do_zz_dbm(s
, a
, tcg_gen_gvec_xori
);
1886 static bool trans_DUPM(DisasContext
*s
, arg_DUPM
*a
)
1889 if (!logic_imm_decode_wmask(&imm
, extract32(a
->dbm
, 12, 1),
1890 extract32(a
->dbm
, 0, 6),
1891 extract32(a
->dbm
, 6, 6))) {
1894 if (sve_access_check(s
)) {
1895 do_dupi_z(s
, a
->rd
, imm
);
1901 *** SVE Integer Wide Immediate - Predicated Group
1904 /* Implement all merging copies. This is used for CPY (immediate),
1905 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1907 static void do_cpy_m(DisasContext
*s
, int esz
, int rd
, int rn
, int pg
,
1910 typedef void gen_cpy(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i64
, TCGv_i32
);
1911 static gen_cpy
* const fns
[4] = {
1912 gen_helper_sve_cpy_m_b
, gen_helper_sve_cpy_m_h
,
1913 gen_helper_sve_cpy_m_s
, gen_helper_sve_cpy_m_d
,
1915 unsigned vsz
= vec_full_reg_size(s
);
1916 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
1917 TCGv_ptr t_zd
= tcg_temp_new_ptr();
1918 TCGv_ptr t_zn
= tcg_temp_new_ptr();
1919 TCGv_ptr t_pg
= tcg_temp_new_ptr();
1921 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, rd
));
1922 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, rn
));
1923 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
1925 fns
[esz
](t_zd
, t_zn
, t_pg
, val
, desc
);
1927 tcg_temp_free_ptr(t_zd
);
1928 tcg_temp_free_ptr(t_zn
);
1929 tcg_temp_free_ptr(t_pg
);
1930 tcg_temp_free_i32(desc
);
1933 static bool trans_FCPY(DisasContext
*s
, arg_FCPY
*a
)
1938 if (sve_access_check(s
)) {
1939 /* Decode the VFP immediate. */
1940 uint64_t imm
= vfp_expand_imm(a
->esz
, a
->imm
);
1941 TCGv_i64 t_imm
= tcg_const_i64(imm
);
1942 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rn
, a
->pg
, t_imm
);
1943 tcg_temp_free_i64(t_imm
);
1948 static bool trans_CPY_m_i(DisasContext
*s
, arg_rpri_esz
*a
)
1950 if (a
->esz
== 0 && extract32(s
->insn
, 13, 1)) {
1953 if (sve_access_check(s
)) {
1954 TCGv_i64 t_imm
= tcg_const_i64(a
->imm
);
1955 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rn
, a
->pg
, t_imm
);
1956 tcg_temp_free_i64(t_imm
);
1961 static bool trans_CPY_z_i(DisasContext
*s
, arg_CPY_z_i
*a
)
1963 static gen_helper_gvec_2i
* const fns
[4] = {
1964 gen_helper_sve_cpy_z_b
, gen_helper_sve_cpy_z_h
,
1965 gen_helper_sve_cpy_z_s
, gen_helper_sve_cpy_z_d
,
1968 if (a
->esz
== 0 && extract32(s
->insn
, 13, 1)) {
1971 if (sve_access_check(s
)) {
1972 unsigned vsz
= vec_full_reg_size(s
);
1973 TCGv_i64 t_imm
= tcg_const_i64(a
->imm
);
1974 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s
, a
->rd
),
1975 pred_full_reg_offset(s
, a
->pg
),
1976 t_imm
, vsz
, vsz
, 0, fns
[a
->esz
]);
1977 tcg_temp_free_i64(t_imm
);
1983 *** SVE Permute Extract Group
1986 static bool trans_EXT(DisasContext
*s
, arg_EXT
*a
)
1988 if (!sve_access_check(s
)) {
1992 unsigned vsz
= vec_full_reg_size(s
);
1993 unsigned n_ofs
= a
->imm
>= vsz
? 0 : a
->imm
;
1994 unsigned n_siz
= vsz
- n_ofs
;
1995 unsigned d
= vec_full_reg_offset(s
, a
->rd
);
1996 unsigned n
= vec_full_reg_offset(s
, a
->rn
);
1997 unsigned m
= vec_full_reg_offset(s
, a
->rm
);
1999 /* Use host vector move insns if we have appropriate sizes
2000 * and no unfortunate overlap.
2003 && n_ofs
== size_for_gvec(n_ofs
)
2004 && n_siz
== size_for_gvec(n_siz
)
2005 && (d
!= n
|| n_siz
<= n_ofs
)) {
2006 tcg_gen_gvec_mov(0, d
, n
+ n_ofs
, n_siz
, n_siz
);
2008 tcg_gen_gvec_mov(0, d
+ n_siz
, m
, n_ofs
, n_ofs
);
2011 tcg_gen_gvec_3_ool(d
, n
, m
, vsz
, vsz
, n_ofs
, gen_helper_sve_ext
);
2017 *** SVE Permute - Unpredicated Group
2020 static bool trans_DUP_s(DisasContext
*s
, arg_DUP_s
*a
)
2022 if (sve_access_check(s
)) {
2023 unsigned vsz
= vec_full_reg_size(s
);
2024 tcg_gen_gvec_dup_i64(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
2025 vsz
, vsz
, cpu_reg_sp(s
, a
->rn
));
2030 static bool trans_DUP_x(DisasContext
*s
, arg_DUP_x
*a
)
2032 if ((a
->imm
& 0x1f) == 0) {
2035 if (sve_access_check(s
)) {
2036 unsigned vsz
= vec_full_reg_size(s
);
2037 unsigned dofs
= vec_full_reg_offset(s
, a
->rd
);
2038 unsigned esz
, index
;
2040 esz
= ctz32(a
->imm
);
2041 index
= a
->imm
>> (esz
+ 1);
2043 if ((index
<< esz
) < vsz
) {
2044 unsigned nofs
= vec_reg_offset(s
, a
->rn
, index
, esz
);
2045 tcg_gen_gvec_dup_mem(esz
, dofs
, nofs
, vsz
, vsz
);
2047 tcg_gen_gvec_dup64i(dofs
, vsz
, vsz
, 0);
2053 static void do_insr_i64(DisasContext
*s
, arg_rrr_esz
*a
, TCGv_i64 val
)
2055 typedef void gen_insr(TCGv_ptr
, TCGv_ptr
, TCGv_i64
, TCGv_i32
);
2056 static gen_insr
* const fns
[4] = {
2057 gen_helper_sve_insr_b
, gen_helper_sve_insr_h
,
2058 gen_helper_sve_insr_s
, gen_helper_sve_insr_d
,
2060 unsigned vsz
= vec_full_reg_size(s
);
2061 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
2062 TCGv_ptr t_zd
= tcg_temp_new_ptr();
2063 TCGv_ptr t_zn
= tcg_temp_new_ptr();
2065 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, a
->rd
));
2066 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
2068 fns
[a
->esz
](t_zd
, t_zn
, val
, desc
);
2070 tcg_temp_free_ptr(t_zd
);
2071 tcg_temp_free_ptr(t_zn
);
2072 tcg_temp_free_i32(desc
);
2075 static bool trans_INSR_f(DisasContext
*s
, arg_rrr_esz
*a
)
2077 if (sve_access_check(s
)) {
2078 TCGv_i64 t
= tcg_temp_new_i64();
2079 tcg_gen_ld_i64(t
, cpu_env
, vec_reg_offset(s
, a
->rm
, 0, MO_64
));
2080 do_insr_i64(s
, a
, t
);
2081 tcg_temp_free_i64(t
);
2086 static bool trans_INSR_r(DisasContext
*s
, arg_rrr_esz
*a
)
2088 if (sve_access_check(s
)) {
2089 do_insr_i64(s
, a
, cpu_reg(s
, a
->rm
));
2094 static bool trans_REV_v(DisasContext
*s
, arg_rr_esz
*a
)
2096 static gen_helper_gvec_2
* const fns
[4] = {
2097 gen_helper_sve_rev_b
, gen_helper_sve_rev_h
,
2098 gen_helper_sve_rev_s
, gen_helper_sve_rev_d
2101 if (sve_access_check(s
)) {
2102 unsigned vsz
= vec_full_reg_size(s
);
2103 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, a
->rd
),
2104 vec_full_reg_offset(s
, a
->rn
),
2105 vsz
, vsz
, 0, fns
[a
->esz
]);
2110 static bool trans_TBL(DisasContext
*s
, arg_rrr_esz
*a
)
2112 static gen_helper_gvec_3
* const fns
[4] = {
2113 gen_helper_sve_tbl_b
, gen_helper_sve_tbl_h
,
2114 gen_helper_sve_tbl_s
, gen_helper_sve_tbl_d
2117 if (sve_access_check(s
)) {
2118 unsigned vsz
= vec_full_reg_size(s
);
2119 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
2120 vec_full_reg_offset(s
, a
->rn
),
2121 vec_full_reg_offset(s
, a
->rm
),
2122 vsz
, vsz
, 0, fns
[a
->esz
]);
2127 static bool trans_UNPK(DisasContext
*s
, arg_UNPK
*a
)
2129 static gen_helper_gvec_2
* const fns
[4][2] = {
2131 { gen_helper_sve_sunpk_h
, gen_helper_sve_uunpk_h
},
2132 { gen_helper_sve_sunpk_s
, gen_helper_sve_uunpk_s
},
2133 { gen_helper_sve_sunpk_d
, gen_helper_sve_uunpk_d
},
2139 if (sve_access_check(s
)) {
2140 unsigned vsz
= vec_full_reg_size(s
);
2141 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, a
->rd
),
2142 vec_full_reg_offset(s
, a
->rn
)
2143 + (a
->h
? vsz
/ 2 : 0),
2144 vsz
, vsz
, 0, fns
[a
->esz
][a
->u
]);
2150 *** SVE Permute - Predicates Group
2153 static bool do_perm_pred3(DisasContext
*s
, arg_rrr_esz
*a
, bool high_odd
,
2154 gen_helper_gvec_3
*fn
)
2156 if (!sve_access_check(s
)) {
2160 unsigned vsz
= pred_full_reg_size(s
);
2162 /* Predicate sizes may be smaller and cannot use simd_desc.
2163 We cannot round up, as we do elsewhere, because we need
2164 the exact size for ZIP2 and REV. We retain the style for
2165 the other helpers for consistency. */
2166 TCGv_ptr t_d
= tcg_temp_new_ptr();
2167 TCGv_ptr t_n
= tcg_temp_new_ptr();
2168 TCGv_ptr t_m
= tcg_temp_new_ptr();
2173 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
2174 desc
= deposit32(desc
, SIMD_DATA_SHIFT
+ 2, 2, high_odd
);
2176 tcg_gen_addi_ptr(t_d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2177 tcg_gen_addi_ptr(t_n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2178 tcg_gen_addi_ptr(t_m
, cpu_env
, pred_full_reg_offset(s
, a
->rm
));
2179 t_desc
= tcg_const_i32(desc
);
2181 fn(t_d
, t_n
, t_m
, t_desc
);
2183 tcg_temp_free_ptr(t_d
);
2184 tcg_temp_free_ptr(t_n
);
2185 tcg_temp_free_ptr(t_m
);
2186 tcg_temp_free_i32(t_desc
);
2190 static bool do_perm_pred2(DisasContext
*s
, arg_rr_esz
*a
, bool high_odd
,
2191 gen_helper_gvec_2
*fn
)
2193 if (!sve_access_check(s
)) {
2197 unsigned vsz
= pred_full_reg_size(s
);
2198 TCGv_ptr t_d
= tcg_temp_new_ptr();
2199 TCGv_ptr t_n
= tcg_temp_new_ptr();
2203 tcg_gen_addi_ptr(t_d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2204 tcg_gen_addi_ptr(t_n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2206 /* Predicate sizes may be smaller and cannot use simd_desc.
2207 We cannot round up, as we do elsewhere, because we need
2208 the exact size for ZIP2 and REV. We retain the style for
2209 the other helpers for consistency. */
2212 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
2213 desc
= deposit32(desc
, SIMD_DATA_SHIFT
+ 2, 2, high_odd
);
2214 t_desc
= tcg_const_i32(desc
);
2216 fn(t_d
, t_n
, t_desc
);
2218 tcg_temp_free_i32(t_desc
);
2219 tcg_temp_free_ptr(t_d
);
2220 tcg_temp_free_ptr(t_n
);
2224 static bool trans_ZIP1_p(DisasContext
*s
, arg_rrr_esz
*a
)
2226 return do_perm_pred3(s
, a
, 0, gen_helper_sve_zip_p
);
2229 static bool trans_ZIP2_p(DisasContext
*s
, arg_rrr_esz
*a
)
2231 return do_perm_pred3(s
, a
, 1, gen_helper_sve_zip_p
);
2234 static bool trans_UZP1_p(DisasContext
*s
, arg_rrr_esz
*a
)
2236 return do_perm_pred3(s
, a
, 0, gen_helper_sve_uzp_p
);
2239 static bool trans_UZP2_p(DisasContext
*s
, arg_rrr_esz
*a
)
2241 return do_perm_pred3(s
, a
, 1, gen_helper_sve_uzp_p
);
2244 static bool trans_TRN1_p(DisasContext
*s
, arg_rrr_esz
*a
)
2246 return do_perm_pred3(s
, a
, 0, gen_helper_sve_trn_p
);
2249 static bool trans_TRN2_p(DisasContext
*s
, arg_rrr_esz
*a
)
2251 return do_perm_pred3(s
, a
, 1, gen_helper_sve_trn_p
);
2254 static bool trans_REV_p(DisasContext
*s
, arg_rr_esz
*a
)
2256 return do_perm_pred2(s
, a
, 0, gen_helper_sve_rev_p
);
2259 static bool trans_PUNPKLO(DisasContext
*s
, arg_PUNPKLO
*a
)
2261 return do_perm_pred2(s
, a
, 0, gen_helper_sve_punpk_p
);
2264 static bool trans_PUNPKHI(DisasContext
*s
, arg_PUNPKHI
*a
)
2266 return do_perm_pred2(s
, a
, 1, gen_helper_sve_punpk_p
);
2270 *** SVE Permute - Interleaving Group
2273 static bool do_zip(DisasContext
*s
, arg_rrr_esz
*a
, bool high
)
2275 static gen_helper_gvec_3
* const fns
[4] = {
2276 gen_helper_sve_zip_b
, gen_helper_sve_zip_h
,
2277 gen_helper_sve_zip_s
, gen_helper_sve_zip_d
,
2280 if (sve_access_check(s
)) {
2281 unsigned vsz
= vec_full_reg_size(s
);
2282 unsigned high_ofs
= high
? vsz
/ 2 : 0;
2283 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
2284 vec_full_reg_offset(s
, a
->rn
) + high_ofs
,
2285 vec_full_reg_offset(s
, a
->rm
) + high_ofs
,
2286 vsz
, vsz
, 0, fns
[a
->esz
]);
2291 static bool do_zzz_data_ool(DisasContext
*s
, arg_rrr_esz
*a
, int data
,
2292 gen_helper_gvec_3
*fn
)
2294 if (sve_access_check(s
)) {
2295 unsigned vsz
= vec_full_reg_size(s
);
2296 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
2297 vec_full_reg_offset(s
, a
->rn
),
2298 vec_full_reg_offset(s
, a
->rm
),
2299 vsz
, vsz
, data
, fn
);
2304 static bool trans_ZIP1_z(DisasContext
*s
, arg_rrr_esz
*a
)
2306 return do_zip(s
, a
, false);
2309 static bool trans_ZIP2_z(DisasContext
*s
, arg_rrr_esz
*a
)
2311 return do_zip(s
, a
, true);
2314 static gen_helper_gvec_3
* const uzp_fns
[4] = {
2315 gen_helper_sve_uzp_b
, gen_helper_sve_uzp_h
,
2316 gen_helper_sve_uzp_s
, gen_helper_sve_uzp_d
,
2319 static bool trans_UZP1_z(DisasContext
*s
, arg_rrr_esz
*a
)
2321 return do_zzz_data_ool(s
, a
, 0, uzp_fns
[a
->esz
]);
2324 static bool trans_UZP2_z(DisasContext
*s
, arg_rrr_esz
*a
)
2326 return do_zzz_data_ool(s
, a
, 1 << a
->esz
, uzp_fns
[a
->esz
]);
2329 static gen_helper_gvec_3
* const trn_fns
[4] = {
2330 gen_helper_sve_trn_b
, gen_helper_sve_trn_h
,
2331 gen_helper_sve_trn_s
, gen_helper_sve_trn_d
,
2334 static bool trans_TRN1_z(DisasContext
*s
, arg_rrr_esz
*a
)
2336 return do_zzz_data_ool(s
, a
, 0, trn_fns
[a
->esz
]);
2339 static bool trans_TRN2_z(DisasContext
*s
, arg_rrr_esz
*a
)
2341 return do_zzz_data_ool(s
, a
, 1 << a
->esz
, trn_fns
[a
->esz
]);
2345 *** SVE Permute Vector - Predicated Group
2348 static bool trans_COMPACT(DisasContext
*s
, arg_rpr_esz
*a
)
2350 static gen_helper_gvec_3
* const fns
[4] = {
2351 NULL
, NULL
, gen_helper_sve_compact_s
, gen_helper_sve_compact_d
2353 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2356 /* Call the helper that computes the ARM LastActiveElement pseudocode
2357 * function, scaled by the element size. This includes the not found
2358 * indication; e.g. not found for esz=3 is -8.
2360 static void find_last_active(DisasContext
*s
, TCGv_i32 ret
, int esz
, int pg
)
2362 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2363 * round up, as we do elsewhere, because we need the exact size.
2365 TCGv_ptr t_p
= tcg_temp_new_ptr();
2367 unsigned vsz
= pred_full_reg_size(s
);
2371 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, esz
);
2373 tcg_gen_addi_ptr(t_p
, cpu_env
, pred_full_reg_offset(s
, pg
));
2374 t_desc
= tcg_const_i32(desc
);
2376 gen_helper_sve_last_active_element(ret
, t_p
, t_desc
);
2378 tcg_temp_free_i32(t_desc
);
2379 tcg_temp_free_ptr(t_p
);
2382 /* Increment LAST to the offset of the next element in the vector,
2383 * wrapping around to 0.
2385 static void incr_last_active(DisasContext
*s
, TCGv_i32 last
, int esz
)
2387 unsigned vsz
= vec_full_reg_size(s
);
2389 tcg_gen_addi_i32(last
, last
, 1 << esz
);
2390 if (is_power_of_2(vsz
)) {
2391 tcg_gen_andi_i32(last
, last
, vsz
- 1);
2393 TCGv_i32 max
= tcg_const_i32(vsz
);
2394 TCGv_i32 zero
= tcg_const_i32(0);
2395 tcg_gen_movcond_i32(TCG_COND_GEU
, last
, last
, max
, zero
, last
);
2396 tcg_temp_free_i32(max
);
2397 tcg_temp_free_i32(zero
);
2401 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2402 static void wrap_last_active(DisasContext
*s
, TCGv_i32 last
, int esz
)
2404 unsigned vsz
= vec_full_reg_size(s
);
2406 if (is_power_of_2(vsz
)) {
2407 tcg_gen_andi_i32(last
, last
, vsz
- 1);
2409 TCGv_i32 max
= tcg_const_i32(vsz
- (1 << esz
));
2410 TCGv_i32 zero
= tcg_const_i32(0);
2411 tcg_gen_movcond_i32(TCG_COND_LT
, last
, last
, zero
, max
, last
);
2412 tcg_temp_free_i32(max
);
2413 tcg_temp_free_i32(zero
);
2417 /* Load an unsigned element of ESZ from BASE+OFS. */
2418 static TCGv_i64
load_esz(TCGv_ptr base
, int ofs
, int esz
)
2420 TCGv_i64 r
= tcg_temp_new_i64();
2424 tcg_gen_ld8u_i64(r
, base
, ofs
);
2427 tcg_gen_ld16u_i64(r
, base
, ofs
);
2430 tcg_gen_ld32u_i64(r
, base
, ofs
);
2433 tcg_gen_ld_i64(r
, base
, ofs
);
2436 g_assert_not_reached();
2441 /* Load an unsigned element of ESZ from RM[LAST]. */
2442 static TCGv_i64
load_last_active(DisasContext
*s
, TCGv_i32 last
,
2445 TCGv_ptr p
= tcg_temp_new_ptr();
2448 /* Convert offset into vector into offset into ENV.
2449 * The final adjustment for the vector register base
2450 * is added via constant offset to the load.
2452 #ifdef HOST_WORDS_BIGENDIAN
2453 /* Adjust for element ordering. See vec_reg_offset. */
2455 tcg_gen_xori_i32(last
, last
, 8 - (1 << esz
));
2458 tcg_gen_ext_i32_ptr(p
, last
);
2459 tcg_gen_add_ptr(p
, p
, cpu_env
);
2461 r
= load_esz(p
, vec_full_reg_offset(s
, rm
), esz
);
2462 tcg_temp_free_ptr(p
);
2467 /* Compute CLAST for a Zreg. */
2468 static bool do_clast_vector(DisasContext
*s
, arg_rprr_esz
*a
, bool before
)
2473 unsigned vsz
, esz
= a
->esz
;
2475 if (!sve_access_check(s
)) {
2479 last
= tcg_temp_local_new_i32();
2480 over
= gen_new_label();
2482 find_last_active(s
, last
, esz
, a
->pg
);
2484 /* There is of course no movcond for a 2048-bit vector,
2485 * so we must branch over the actual store.
2487 tcg_gen_brcondi_i32(TCG_COND_LT
, last
, 0, over
);
2490 incr_last_active(s
, last
, esz
);
2493 ele
= load_last_active(s
, last
, a
->rm
, esz
);
2494 tcg_temp_free_i32(last
);
2496 vsz
= vec_full_reg_size(s
);
2497 tcg_gen_gvec_dup_i64(esz
, vec_full_reg_offset(s
, a
->rd
), vsz
, vsz
, ele
);
2498 tcg_temp_free_i64(ele
);
2500 /* If this insn used MOVPRFX, we may need a second move. */
2501 if (a
->rd
!= a
->rn
) {
2502 TCGLabel
*done
= gen_new_label();
2505 gen_set_label(over
);
2506 do_mov_z(s
, a
->rd
, a
->rn
);
2508 gen_set_label(done
);
2510 gen_set_label(over
);
2515 static bool trans_CLASTA_z(DisasContext
*s
, arg_rprr_esz
*a
)
2517 return do_clast_vector(s
, a
, false);
2520 static bool trans_CLASTB_z(DisasContext
*s
, arg_rprr_esz
*a
)
2522 return do_clast_vector(s
, a
, true);
2525 /* Compute CLAST for a scalar. */
2526 static void do_clast_scalar(DisasContext
*s
, int esz
, int pg
, int rm
,
2527 bool before
, TCGv_i64 reg_val
)
2529 TCGv_i32 last
= tcg_temp_new_i32();
2530 TCGv_i64 ele
, cmp
, zero
;
2532 find_last_active(s
, last
, esz
, pg
);
2534 /* Extend the original value of last prior to incrementing. */
2535 cmp
= tcg_temp_new_i64();
2536 tcg_gen_ext_i32_i64(cmp
, last
);
2539 incr_last_active(s
, last
, esz
);
2542 /* The conceit here is that while last < 0 indicates not found, after
2543 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2544 * from which we can load garbage. We then discard the garbage with
2545 * a conditional move.
2547 ele
= load_last_active(s
, last
, rm
, esz
);
2548 tcg_temp_free_i32(last
);
2550 zero
= tcg_const_i64(0);
2551 tcg_gen_movcond_i64(TCG_COND_GE
, reg_val
, cmp
, zero
, ele
, reg_val
);
2553 tcg_temp_free_i64(zero
);
2554 tcg_temp_free_i64(cmp
);
2555 tcg_temp_free_i64(ele
);
2558 /* Compute CLAST for a Vreg. */
2559 static bool do_clast_fp(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2561 if (sve_access_check(s
)) {
2563 int ofs
= vec_reg_offset(s
, a
->rd
, 0, esz
);
2564 TCGv_i64 reg
= load_esz(cpu_env
, ofs
, esz
);
2566 do_clast_scalar(s
, esz
, a
->pg
, a
->rn
, before
, reg
);
2567 write_fp_dreg(s
, a
->rd
, reg
);
2568 tcg_temp_free_i64(reg
);
2573 static bool trans_CLASTA_v(DisasContext
*s
, arg_rpr_esz
*a
)
2575 return do_clast_fp(s
, a
, false);
2578 static bool trans_CLASTB_v(DisasContext
*s
, arg_rpr_esz
*a
)
2580 return do_clast_fp(s
, a
, true);
2583 /* Compute CLAST for a Xreg. */
2584 static bool do_clast_general(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2588 if (!sve_access_check(s
)) {
2592 reg
= cpu_reg(s
, a
->rd
);
2595 tcg_gen_ext8u_i64(reg
, reg
);
2598 tcg_gen_ext16u_i64(reg
, reg
);
2601 tcg_gen_ext32u_i64(reg
, reg
);
2606 g_assert_not_reached();
2609 do_clast_scalar(s
, a
->esz
, a
->pg
, a
->rn
, before
, reg
);
2613 static bool trans_CLASTA_r(DisasContext
*s
, arg_rpr_esz
*a
)
2615 return do_clast_general(s
, a
, false);
2618 static bool trans_CLASTB_r(DisasContext
*s
, arg_rpr_esz
*a
)
2620 return do_clast_general(s
, a
, true);
2623 /* Compute LAST for a scalar. */
2624 static TCGv_i64
do_last_scalar(DisasContext
*s
, int esz
,
2625 int pg
, int rm
, bool before
)
2627 TCGv_i32 last
= tcg_temp_new_i32();
2630 find_last_active(s
, last
, esz
, pg
);
2632 wrap_last_active(s
, last
, esz
);
2634 incr_last_active(s
, last
, esz
);
2637 ret
= load_last_active(s
, last
, rm
, esz
);
2638 tcg_temp_free_i32(last
);
2642 /* Compute LAST for a Vreg. */
2643 static bool do_last_fp(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2645 if (sve_access_check(s
)) {
2646 TCGv_i64 val
= do_last_scalar(s
, a
->esz
, a
->pg
, a
->rn
, before
);
2647 write_fp_dreg(s
, a
->rd
, val
);
2648 tcg_temp_free_i64(val
);
2653 static bool trans_LASTA_v(DisasContext
*s
, arg_rpr_esz
*a
)
2655 return do_last_fp(s
, a
, false);
2658 static bool trans_LASTB_v(DisasContext
*s
, arg_rpr_esz
*a
)
2660 return do_last_fp(s
, a
, true);
2663 /* Compute LAST for a Xreg. */
2664 static bool do_last_general(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2666 if (sve_access_check(s
)) {
2667 TCGv_i64 val
= do_last_scalar(s
, a
->esz
, a
->pg
, a
->rn
, before
);
2668 tcg_gen_mov_i64(cpu_reg(s
, a
->rd
), val
);
2669 tcg_temp_free_i64(val
);
2674 static bool trans_LASTA_r(DisasContext
*s
, arg_rpr_esz
*a
)
2676 return do_last_general(s
, a
, false);
2679 static bool trans_LASTB_r(DisasContext
*s
, arg_rpr_esz
*a
)
2681 return do_last_general(s
, a
, true);
2684 static bool trans_CPY_m_r(DisasContext
*s
, arg_rpr_esz
*a
)
2686 if (sve_access_check(s
)) {
2687 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rd
, a
->pg
, cpu_reg_sp(s
, a
->rn
));
2692 static bool trans_CPY_m_v(DisasContext
*s
, arg_rpr_esz
*a
)
2694 if (sve_access_check(s
)) {
2695 int ofs
= vec_reg_offset(s
, a
->rn
, 0, a
->esz
);
2696 TCGv_i64 t
= load_esz(cpu_env
, ofs
, a
->esz
);
2697 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rd
, a
->pg
, t
);
2698 tcg_temp_free_i64(t
);
2703 static bool trans_REVB(DisasContext
*s
, arg_rpr_esz
*a
)
2705 static gen_helper_gvec_3
* const fns
[4] = {
2707 gen_helper_sve_revb_h
,
2708 gen_helper_sve_revb_s
,
2709 gen_helper_sve_revb_d
,
2711 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2714 static bool trans_REVH(DisasContext
*s
, arg_rpr_esz
*a
)
2716 static gen_helper_gvec_3
* const fns
[4] = {
2719 gen_helper_sve_revh_s
,
2720 gen_helper_sve_revh_d
,
2722 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2725 static bool trans_REVW(DisasContext
*s
, arg_rpr_esz
*a
)
2727 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_revw_d
: NULL
);
2730 static bool trans_RBIT(DisasContext
*s
, arg_rpr_esz
*a
)
2732 static gen_helper_gvec_3
* const fns
[4] = {
2733 gen_helper_sve_rbit_b
,
2734 gen_helper_sve_rbit_h
,
2735 gen_helper_sve_rbit_s
,
2736 gen_helper_sve_rbit_d
,
2738 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2741 static bool trans_SPLICE(DisasContext
*s
, arg_rprr_esz
*a
)
2743 if (sve_access_check(s
)) {
2744 unsigned vsz
= vec_full_reg_size(s
);
2745 tcg_gen_gvec_4_ool(vec_full_reg_offset(s
, a
->rd
),
2746 vec_full_reg_offset(s
, a
->rn
),
2747 vec_full_reg_offset(s
, a
->rm
),
2748 pred_full_reg_offset(s
, a
->pg
),
2749 vsz
, vsz
, a
->esz
, gen_helper_sve_splice
);
2755 *** SVE Integer Compare - Vectors Group
2758 static bool do_ppzz_flags(DisasContext
*s
, arg_rprr_esz
*a
,
2759 gen_helper_gvec_flags_4
*gen_fn
)
2761 TCGv_ptr pd
, zn
, zm
, pg
;
2765 if (gen_fn
== NULL
) {
2768 if (!sve_access_check(s
)) {
2772 vsz
= vec_full_reg_size(s
);
2773 t
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
2774 pd
= tcg_temp_new_ptr();
2775 zn
= tcg_temp_new_ptr();
2776 zm
= tcg_temp_new_ptr();
2777 pg
= tcg_temp_new_ptr();
2779 tcg_gen_addi_ptr(pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2780 tcg_gen_addi_ptr(zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
2781 tcg_gen_addi_ptr(zm
, cpu_env
, vec_full_reg_offset(s
, a
->rm
));
2782 tcg_gen_addi_ptr(pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2784 gen_fn(t
, pd
, zn
, zm
, pg
, t
);
2786 tcg_temp_free_ptr(pd
);
2787 tcg_temp_free_ptr(zn
);
2788 tcg_temp_free_ptr(zm
);
2789 tcg_temp_free_ptr(pg
);
2793 tcg_temp_free_i32(t
);
2797 #define DO_PPZZ(NAME, name) \
2798 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
2800 static gen_helper_gvec_flags_4 * const fns[4] = { \
2801 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2802 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2804 return do_ppzz_flags(s, a, fns[a->esz]); \
2807 DO_PPZZ(CMPEQ
, cmpeq
)
2808 DO_PPZZ(CMPNE
, cmpne
)
2809 DO_PPZZ(CMPGT
, cmpgt
)
2810 DO_PPZZ(CMPGE
, cmpge
)
2811 DO_PPZZ(CMPHI
, cmphi
)
2812 DO_PPZZ(CMPHS
, cmphs
)
2816 #define DO_PPZW(NAME, name) \
2817 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \
2819 static gen_helper_gvec_flags_4 * const fns[4] = { \
2820 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2821 gen_helper_sve_##name##_ppzw_s, NULL \
2823 return do_ppzz_flags(s, a, fns[a->esz]); \
2826 DO_PPZW(CMPEQ
, cmpeq
)
2827 DO_PPZW(CMPNE
, cmpne
)
2828 DO_PPZW(CMPGT
, cmpgt
)
2829 DO_PPZW(CMPGE
, cmpge
)
2830 DO_PPZW(CMPHI
, cmphi
)
2831 DO_PPZW(CMPHS
, cmphs
)
2832 DO_PPZW(CMPLT
, cmplt
)
2833 DO_PPZW(CMPLE
, cmple
)
2834 DO_PPZW(CMPLO
, cmplo
)
2835 DO_PPZW(CMPLS
, cmpls
)
2840 *** SVE Integer Compare - Immediate Groups
2843 static bool do_ppzi_flags(DisasContext
*s
, arg_rpri_esz
*a
,
2844 gen_helper_gvec_flags_3
*gen_fn
)
2846 TCGv_ptr pd
, zn
, pg
;
2850 if (gen_fn
== NULL
) {
2853 if (!sve_access_check(s
)) {
2857 vsz
= vec_full_reg_size(s
);
2858 t
= tcg_const_i32(simd_desc(vsz
, vsz
, a
->imm
));
2859 pd
= tcg_temp_new_ptr();
2860 zn
= tcg_temp_new_ptr();
2861 pg
= tcg_temp_new_ptr();
2863 tcg_gen_addi_ptr(pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2864 tcg_gen_addi_ptr(zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
2865 tcg_gen_addi_ptr(pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2867 gen_fn(t
, pd
, zn
, pg
, t
);
2869 tcg_temp_free_ptr(pd
);
2870 tcg_temp_free_ptr(zn
);
2871 tcg_temp_free_ptr(pg
);
2875 tcg_temp_free_i32(t
);
2879 #define DO_PPZI(NAME, name) \
2880 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \
2882 static gen_helper_gvec_flags_3 * const fns[4] = { \
2883 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2884 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2886 return do_ppzi_flags(s, a, fns[a->esz]); \
2889 DO_PPZI(CMPEQ
, cmpeq
)
2890 DO_PPZI(CMPNE
, cmpne
)
2891 DO_PPZI(CMPGT
, cmpgt
)
2892 DO_PPZI(CMPGE
, cmpge
)
2893 DO_PPZI(CMPHI
, cmphi
)
2894 DO_PPZI(CMPHS
, cmphs
)
2895 DO_PPZI(CMPLT
, cmplt
)
2896 DO_PPZI(CMPLE
, cmple
)
2897 DO_PPZI(CMPLO
, cmplo
)
2898 DO_PPZI(CMPLS
, cmpls
)
2903 *** SVE Partition Break Group
2906 static bool do_brk3(DisasContext
*s
, arg_rprr_s
*a
,
2907 gen_helper_gvec_4
*fn
, gen_helper_gvec_flags_4
*fn_s
)
2909 if (!sve_access_check(s
)) {
2913 unsigned vsz
= pred_full_reg_size(s
);
2915 /* Predicate sizes may be smaller and cannot use simd_desc. */
2916 TCGv_ptr d
= tcg_temp_new_ptr();
2917 TCGv_ptr n
= tcg_temp_new_ptr();
2918 TCGv_ptr m
= tcg_temp_new_ptr();
2919 TCGv_ptr g
= tcg_temp_new_ptr();
2920 TCGv_i32 t
= tcg_const_i32(vsz
- 2);
2922 tcg_gen_addi_ptr(d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2923 tcg_gen_addi_ptr(n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2924 tcg_gen_addi_ptr(m
, cpu_env
, pred_full_reg_offset(s
, a
->rm
));
2925 tcg_gen_addi_ptr(g
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2928 fn_s(t
, d
, n
, m
, g
, t
);
2933 tcg_temp_free_ptr(d
);
2934 tcg_temp_free_ptr(n
);
2935 tcg_temp_free_ptr(m
);
2936 tcg_temp_free_ptr(g
);
2937 tcg_temp_free_i32(t
);
2941 static bool do_brk2(DisasContext
*s
, arg_rpr_s
*a
,
2942 gen_helper_gvec_3
*fn
, gen_helper_gvec_flags_3
*fn_s
)
2944 if (!sve_access_check(s
)) {
2948 unsigned vsz
= pred_full_reg_size(s
);
2950 /* Predicate sizes may be smaller and cannot use simd_desc. */
2951 TCGv_ptr d
= tcg_temp_new_ptr();
2952 TCGv_ptr n
= tcg_temp_new_ptr();
2953 TCGv_ptr g
= tcg_temp_new_ptr();
2954 TCGv_i32 t
= tcg_const_i32(vsz
- 2);
2956 tcg_gen_addi_ptr(d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2957 tcg_gen_addi_ptr(n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2958 tcg_gen_addi_ptr(g
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2961 fn_s(t
, d
, n
, g
, t
);
2966 tcg_temp_free_ptr(d
);
2967 tcg_temp_free_ptr(n
);
2968 tcg_temp_free_ptr(g
);
2969 tcg_temp_free_i32(t
);
2973 static bool trans_BRKPA(DisasContext
*s
, arg_rprr_s
*a
)
2975 return do_brk3(s
, a
, gen_helper_sve_brkpa
, gen_helper_sve_brkpas
);
2978 static bool trans_BRKPB(DisasContext
*s
, arg_rprr_s
*a
)
2980 return do_brk3(s
, a
, gen_helper_sve_brkpb
, gen_helper_sve_brkpbs
);
2983 static bool trans_BRKA_m(DisasContext
*s
, arg_rpr_s
*a
)
2985 return do_brk2(s
, a
, gen_helper_sve_brka_m
, gen_helper_sve_brkas_m
);
2988 static bool trans_BRKB_m(DisasContext
*s
, arg_rpr_s
*a
)
2990 return do_brk2(s
, a
, gen_helper_sve_brkb_m
, gen_helper_sve_brkbs_m
);
2993 static bool trans_BRKA_z(DisasContext
*s
, arg_rpr_s
*a
)
2995 return do_brk2(s
, a
, gen_helper_sve_brka_z
, gen_helper_sve_brkas_z
);
2998 static bool trans_BRKB_z(DisasContext
*s
, arg_rpr_s
*a
)
3000 return do_brk2(s
, a
, gen_helper_sve_brkb_z
, gen_helper_sve_brkbs_z
);
3003 static bool trans_BRKN(DisasContext
*s
, arg_rpr_s
*a
)
3005 return do_brk2(s
, a
, gen_helper_sve_brkn
, gen_helper_sve_brkns
);
3009 *** SVE Predicate Count Group
3012 static void do_cntp(DisasContext
*s
, TCGv_i64 val
, int esz
, int pn
, int pg
)
3014 unsigned psz
= pred_full_reg_size(s
);
3019 tcg_gen_ld_i64(val
, cpu_env
, pred_full_reg_offset(s
, pn
));
3021 TCGv_i64 g
= tcg_temp_new_i64();
3022 tcg_gen_ld_i64(g
, cpu_env
, pred_full_reg_offset(s
, pg
));
3023 tcg_gen_and_i64(val
, val
, g
);
3024 tcg_temp_free_i64(g
);
3027 /* Reduce the pred_esz_masks value simply to reduce the
3028 * size of the code generated here.
3030 psz_mask
= MAKE_64BIT_MASK(0, psz
* 8);
3031 tcg_gen_andi_i64(val
, val
, pred_esz_masks
[esz
] & psz_mask
);
3033 tcg_gen_ctpop_i64(val
, val
);
3035 TCGv_ptr t_pn
= tcg_temp_new_ptr();
3036 TCGv_ptr t_pg
= tcg_temp_new_ptr();
3041 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, esz
);
3043 tcg_gen_addi_ptr(t_pn
, cpu_env
, pred_full_reg_offset(s
, pn
));
3044 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
3045 t_desc
= tcg_const_i32(desc
);
3047 gen_helper_sve_cntp(val
, t_pn
, t_pg
, t_desc
);
3048 tcg_temp_free_ptr(t_pn
);
3049 tcg_temp_free_ptr(t_pg
);
3050 tcg_temp_free_i32(t_desc
);
3054 static bool trans_CNTP(DisasContext
*s
, arg_CNTP
*a
)
3056 if (sve_access_check(s
)) {
3057 do_cntp(s
, cpu_reg(s
, a
->rd
), a
->esz
, a
->rn
, a
->pg
);
3062 static bool trans_INCDECP_r(DisasContext
*s
, arg_incdec_pred
*a
)
3064 if (sve_access_check(s
)) {
3065 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
3066 TCGv_i64 val
= tcg_temp_new_i64();
3068 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3070 tcg_gen_sub_i64(reg
, reg
, val
);
3072 tcg_gen_add_i64(reg
, reg
, val
);
3074 tcg_temp_free_i64(val
);
3079 static bool trans_INCDECP_z(DisasContext
*s
, arg_incdec2_pred
*a
)
3084 if (sve_access_check(s
)) {
3085 unsigned vsz
= vec_full_reg_size(s
);
3086 TCGv_i64 val
= tcg_temp_new_i64();
3087 GVecGen2sFn
*gvec_fn
= a
->d
? tcg_gen_gvec_subs
: tcg_gen_gvec_adds
;
3089 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3090 gvec_fn(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
3091 vec_full_reg_offset(s
, a
->rn
), val
, vsz
, vsz
);
3096 static bool trans_SINCDECP_r_32(DisasContext
*s
, arg_incdec_pred
*a
)
3098 if (sve_access_check(s
)) {
3099 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
3100 TCGv_i64 val
= tcg_temp_new_i64();
3102 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3103 do_sat_addsub_32(reg
, val
, a
->u
, a
->d
);
3108 static bool trans_SINCDECP_r_64(DisasContext
*s
, arg_incdec_pred
*a
)
3110 if (sve_access_check(s
)) {
3111 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
3112 TCGv_i64 val
= tcg_temp_new_i64();
3114 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3115 do_sat_addsub_64(reg
, val
, a
->u
, a
->d
);
3120 static bool trans_SINCDECP_z(DisasContext
*s
, arg_incdec2_pred
*a
)
3125 if (sve_access_check(s
)) {
3126 TCGv_i64 val
= tcg_temp_new_i64();
3127 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3128 do_sat_addsub_vec(s
, a
->esz
, a
->rd
, a
->rn
, val
, a
->u
, a
->d
);
3134 *** SVE Integer Compare Scalars Group
3137 static bool trans_CTERM(DisasContext
*s
, arg_CTERM
*a
)
3139 if (!sve_access_check(s
)) {
3143 TCGCond cond
= (a
->ne
? TCG_COND_NE
: TCG_COND_EQ
);
3144 TCGv_i64 rn
= read_cpu_reg(s
, a
->rn
, a
->sf
);
3145 TCGv_i64 rm
= read_cpu_reg(s
, a
->rm
, a
->sf
);
3146 TCGv_i64 cmp
= tcg_temp_new_i64();
3148 tcg_gen_setcond_i64(cond
, cmp
, rn
, rm
);
3149 tcg_gen_extrl_i64_i32(cpu_NF
, cmp
);
3150 tcg_temp_free_i64(cmp
);
3152 /* VF = !NF & !CF. */
3153 tcg_gen_xori_i32(cpu_VF
, cpu_NF
, 1);
3154 tcg_gen_andc_i32(cpu_VF
, cpu_VF
, cpu_CF
);
3156 /* Both NF and VF actually look at bit 31. */
3157 tcg_gen_neg_i32(cpu_NF
, cpu_NF
);
3158 tcg_gen_neg_i32(cpu_VF
, cpu_VF
);
3162 static bool trans_WHILE(DisasContext
*s
, arg_WHILE
*a
)
3164 TCGv_i64 op0
, op1
, t0
, t1
, tmax
;
3167 unsigned desc
, vsz
= vec_full_reg_size(s
);
3170 if (!sve_access_check(s
)) {
3174 op0
= read_cpu_reg(s
, a
->rn
, 1);
3175 op1
= read_cpu_reg(s
, a
->rm
, 1);
3179 tcg_gen_ext32u_i64(op0
, op0
);
3180 tcg_gen_ext32u_i64(op1
, op1
);
3182 tcg_gen_ext32s_i64(op0
, op0
);
3183 tcg_gen_ext32s_i64(op1
, op1
);
3187 /* For the helper, compress the different conditions into a computation
3188 * of how many iterations for which the condition is true.
3190 t0
= tcg_temp_new_i64();
3191 t1
= tcg_temp_new_i64();
3192 tcg_gen_sub_i64(t0
, op1
, op0
);
3194 tmax
= tcg_const_i64(vsz
>> a
->esz
);
3196 /* Equality means one more iteration. */
3197 tcg_gen_addi_i64(t0
, t0
, 1);
3199 /* If op1 is max (un)signed integer (and the only time the addition
3200 * above could overflow), then we produce an all-true predicate by
3201 * setting the count to the vector length. This is because the
3202 * pseudocode is described as an increment + compare loop, and the
3203 * max integer would always compare true.
3205 tcg_gen_movi_i64(t1
, (a
->sf
3206 ? (a
->u
? UINT64_MAX
: INT64_MAX
)
3207 : (a
->u
? UINT32_MAX
: INT32_MAX
)));
3208 tcg_gen_movcond_i64(TCG_COND_EQ
, t0
, op1
, t1
, tmax
, t0
);
3211 /* Bound to the maximum. */
3212 tcg_gen_umin_i64(t0
, t0
, tmax
);
3213 tcg_temp_free_i64(tmax
);
3215 /* Set the count to zero if the condition is false. */
3217 ? (a
->eq
? TCG_COND_LEU
: TCG_COND_LTU
)
3218 : (a
->eq
? TCG_COND_LE
: TCG_COND_LT
));
3219 tcg_gen_movi_i64(t1
, 0);
3220 tcg_gen_movcond_i64(cond
, t0
, op0
, op1
, t0
, t1
);
3221 tcg_temp_free_i64(t1
);
3223 /* Since we're bounded, pass as a 32-bit type. */
3224 t2
= tcg_temp_new_i32();
3225 tcg_gen_extrl_i64_i32(t2
, t0
);
3226 tcg_temp_free_i64(t0
);
3228 /* Scale elements to bits. */
3229 tcg_gen_shli_i32(t2
, t2
, a
->esz
);
3231 desc
= (vsz
/ 8) - 2;
3232 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
3233 t3
= tcg_const_i32(desc
);
3235 ptr
= tcg_temp_new_ptr();
3236 tcg_gen_addi_ptr(ptr
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
3238 gen_helper_sve_while(t2
, ptr
, t2
, t3
);
3241 tcg_temp_free_ptr(ptr
);
3242 tcg_temp_free_i32(t2
);
3243 tcg_temp_free_i32(t3
);
3248 *** SVE Integer Wide Immediate - Unpredicated Group
3251 static bool trans_FDUP(DisasContext
*s
, arg_FDUP
*a
)
3256 if (sve_access_check(s
)) {
3257 unsigned vsz
= vec_full_reg_size(s
);
3258 int dofs
= vec_full_reg_offset(s
, a
->rd
);
3261 /* Decode the VFP immediate. */
3262 imm
= vfp_expand_imm(a
->esz
, a
->imm
);
3263 imm
= dup_const(a
->esz
, imm
);
3265 tcg_gen_gvec_dup64i(dofs
, vsz
, vsz
, imm
);
3270 static bool trans_DUP_i(DisasContext
*s
, arg_DUP_i
*a
)
3272 if (a
->esz
== 0 && extract32(s
->insn
, 13, 1)) {
3275 if (sve_access_check(s
)) {
3276 unsigned vsz
= vec_full_reg_size(s
);
3277 int dofs
= vec_full_reg_offset(s
, a
->rd
);
3279 tcg_gen_gvec_dup64i(dofs
, vsz
, vsz
, dup_const(a
->esz
, a
->imm
));
3284 static bool trans_ADD_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3286 if (a
->esz
== 0 && extract32(s
->insn
, 13, 1)) {
3289 if (sve_access_check(s
)) {
3290 unsigned vsz
= vec_full_reg_size(s
);
3291 tcg_gen_gvec_addi(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
3292 vec_full_reg_offset(s
, a
->rn
), a
->imm
, vsz
, vsz
);
3297 static bool trans_SUB_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3300 return trans_ADD_zzi(s
, a
);
3303 static bool trans_SUBR_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3305 static const TCGOpcode vecop_list
[] = { INDEX_op_sub_vec
, 0 };
3306 static const GVecGen2s op
[4] = {
3307 { .fni8
= tcg_gen_vec_sub8_i64
,
3308 .fniv
= tcg_gen_sub_vec
,
3309 .fno
= gen_helper_sve_subri_b
,
3310 .opt_opc
= vecop_list
,
3312 .scalar_first
= true },
3313 { .fni8
= tcg_gen_vec_sub16_i64
,
3314 .fniv
= tcg_gen_sub_vec
,
3315 .fno
= gen_helper_sve_subri_h
,
3316 .opt_opc
= vecop_list
,
3318 .scalar_first
= true },
3319 { .fni4
= tcg_gen_sub_i32
,
3320 .fniv
= tcg_gen_sub_vec
,
3321 .fno
= gen_helper_sve_subri_s
,
3322 .opt_opc
= vecop_list
,
3324 .scalar_first
= true },
3325 { .fni8
= tcg_gen_sub_i64
,
3326 .fniv
= tcg_gen_sub_vec
,
3327 .fno
= gen_helper_sve_subri_d
,
3328 .opt_opc
= vecop_list
,
3329 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
3331 .scalar_first
= true }
3334 if (a
->esz
== 0 && extract32(s
->insn
, 13, 1)) {
3337 if (sve_access_check(s
)) {
3338 unsigned vsz
= vec_full_reg_size(s
);
3339 TCGv_i64 c
= tcg_const_i64(a
->imm
);
3340 tcg_gen_gvec_2s(vec_full_reg_offset(s
, a
->rd
),
3341 vec_full_reg_offset(s
, a
->rn
),
3342 vsz
, vsz
, c
, &op
[a
->esz
]);
3343 tcg_temp_free_i64(c
);
3348 static bool trans_MUL_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3350 if (sve_access_check(s
)) {
3351 unsigned vsz
= vec_full_reg_size(s
);
3352 tcg_gen_gvec_muli(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
3353 vec_full_reg_offset(s
, a
->rn
), a
->imm
, vsz
, vsz
);
3358 static bool do_zzi_sat(DisasContext
*s
, arg_rri_esz
*a
, bool u
, bool d
)
3360 if (a
->esz
== 0 && extract32(s
->insn
, 13, 1)) {
3363 if (sve_access_check(s
)) {
3364 TCGv_i64 val
= tcg_const_i64(a
->imm
);
3365 do_sat_addsub_vec(s
, a
->esz
, a
->rd
, a
->rn
, val
, u
, d
);
3366 tcg_temp_free_i64(val
);
3371 static bool trans_SQADD_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3373 return do_zzi_sat(s
, a
, false, false);
3376 static bool trans_UQADD_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3378 return do_zzi_sat(s
, a
, true, false);
3381 static bool trans_SQSUB_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3383 return do_zzi_sat(s
, a
, false, true);
3386 static bool trans_UQSUB_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3388 return do_zzi_sat(s
, a
, true, true);
3391 static bool do_zzi_ool(DisasContext
*s
, arg_rri_esz
*a
, gen_helper_gvec_2i
*fn
)
3393 if (sve_access_check(s
)) {
3394 unsigned vsz
= vec_full_reg_size(s
);
3395 TCGv_i64 c
= tcg_const_i64(a
->imm
);
3397 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s
, a
->rd
),
3398 vec_full_reg_offset(s
, a
->rn
),
3399 c
, vsz
, vsz
, 0, fn
);
3400 tcg_temp_free_i64(c
);
3405 #define DO_ZZI(NAME, name) \
3406 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a) \
3408 static gen_helper_gvec_2i * const fns[4] = { \
3409 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3410 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3412 return do_zzi_ool(s, a, fns[a->esz]); \
3422 static bool trans_DOT_zzz(DisasContext
*s
, arg_DOT_zzz
*a
)
3424 static gen_helper_gvec_3
* const fns
[2][2] = {
3425 { gen_helper_gvec_sdot_b
, gen_helper_gvec_sdot_h
},
3426 { gen_helper_gvec_udot_b
, gen_helper_gvec_udot_h
}
3429 if (sve_access_check(s
)) {
3430 unsigned vsz
= vec_full_reg_size(s
);
3431 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
3432 vec_full_reg_offset(s
, a
->rn
),
3433 vec_full_reg_offset(s
, a
->rm
),
3434 vsz
, vsz
, 0, fns
[a
->u
][a
->sz
]);
3439 static bool trans_DOT_zzx(DisasContext
*s
, arg_DOT_zzx
*a
)
3441 static gen_helper_gvec_3
* const fns
[2][2] = {
3442 { gen_helper_gvec_sdot_idx_b
, gen_helper_gvec_sdot_idx_h
},
3443 { gen_helper_gvec_udot_idx_b
, gen_helper_gvec_udot_idx_h
}
3446 if (sve_access_check(s
)) {
3447 unsigned vsz
= vec_full_reg_size(s
);
3448 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
3449 vec_full_reg_offset(s
, a
->rn
),
3450 vec_full_reg_offset(s
, a
->rm
),
3451 vsz
, vsz
, a
->index
, fns
[a
->u
][a
->sz
]);
3458 *** SVE Floating Point Multiply-Add Indexed Group
3461 static bool trans_FMLA_zzxz(DisasContext
*s
, arg_FMLA_zzxz
*a
)
3463 static gen_helper_gvec_4_ptr
* const fns
[3] = {
3464 gen_helper_gvec_fmla_idx_h
,
3465 gen_helper_gvec_fmla_idx_s
,
3466 gen_helper_gvec_fmla_idx_d
,
3469 if (sve_access_check(s
)) {
3470 unsigned vsz
= vec_full_reg_size(s
);
3471 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3472 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s
, a
->rd
),
3473 vec_full_reg_offset(s
, a
->rn
),
3474 vec_full_reg_offset(s
, a
->rm
),
3475 vec_full_reg_offset(s
, a
->ra
),
3476 status
, vsz
, vsz
, (a
->index
<< 1) | a
->sub
,
3478 tcg_temp_free_ptr(status
);
3484 *** SVE Floating Point Multiply Indexed Group
3487 static bool trans_FMUL_zzx(DisasContext
*s
, arg_FMUL_zzx
*a
)
3489 static gen_helper_gvec_3_ptr
* const fns
[3] = {
3490 gen_helper_gvec_fmul_idx_h
,
3491 gen_helper_gvec_fmul_idx_s
,
3492 gen_helper_gvec_fmul_idx_d
,
3495 if (sve_access_check(s
)) {
3496 unsigned vsz
= vec_full_reg_size(s
);
3497 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3498 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, a
->rd
),
3499 vec_full_reg_offset(s
, a
->rn
),
3500 vec_full_reg_offset(s
, a
->rm
),
3501 status
, vsz
, vsz
, a
->index
, fns
[a
->esz
- 1]);
3502 tcg_temp_free_ptr(status
);
3508 *** SVE Floating Point Fast Reduction Group
3511 typedef void gen_helper_fp_reduce(TCGv_i64
, TCGv_ptr
, TCGv_ptr
,
3512 TCGv_ptr
, TCGv_i32
);
3514 static void do_reduce(DisasContext
*s
, arg_rpr_esz
*a
,
3515 gen_helper_fp_reduce
*fn
)
3517 unsigned vsz
= vec_full_reg_size(s
);
3518 unsigned p2vsz
= pow2ceil(vsz
);
3519 TCGv_i32 t_desc
= tcg_const_i32(simd_desc(vsz
, p2vsz
, 0));
3520 TCGv_ptr t_zn
, t_pg
, status
;
3523 temp
= tcg_temp_new_i64();
3524 t_zn
= tcg_temp_new_ptr();
3525 t_pg
= tcg_temp_new_ptr();
3527 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
3528 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
3529 status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3531 fn(temp
, t_zn
, t_pg
, status
, t_desc
);
3532 tcg_temp_free_ptr(t_zn
);
3533 tcg_temp_free_ptr(t_pg
);
3534 tcg_temp_free_ptr(status
);
3535 tcg_temp_free_i32(t_desc
);
3537 write_fp_dreg(s
, a
->rd
, temp
);
3538 tcg_temp_free_i64(temp
);
3541 #define DO_VPZ(NAME, name) \
3542 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
3544 static gen_helper_fp_reduce * const fns[3] = { \
3545 gen_helper_sve_##name##_h, \
3546 gen_helper_sve_##name##_s, \
3547 gen_helper_sve_##name##_d, \
3549 if (a->esz == 0) { \
3552 if (sve_access_check(s)) { \
3553 do_reduce(s, a, fns[a->esz - 1]); \
3558 DO_VPZ(FADDV
, faddv
)
3559 DO_VPZ(FMINNMV
, fminnmv
)
3560 DO_VPZ(FMAXNMV
, fmaxnmv
)
3561 DO_VPZ(FMINV
, fminv
)
3562 DO_VPZ(FMAXV
, fmaxv
)
3565 *** SVE Floating Point Unary Operations - Unpredicated Group
3568 static void do_zz_fp(DisasContext
*s
, arg_rr_esz
*a
, gen_helper_gvec_2_ptr
*fn
)
3570 unsigned vsz
= vec_full_reg_size(s
);
3571 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3573 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s
, a
->rd
),
3574 vec_full_reg_offset(s
, a
->rn
),
3575 status
, vsz
, vsz
, 0, fn
);
3576 tcg_temp_free_ptr(status
);
3579 static bool trans_FRECPE(DisasContext
*s
, arg_rr_esz
*a
)
3581 static gen_helper_gvec_2_ptr
* const fns
[3] = {
3582 gen_helper_gvec_frecpe_h
,
3583 gen_helper_gvec_frecpe_s
,
3584 gen_helper_gvec_frecpe_d
,
3589 if (sve_access_check(s
)) {
3590 do_zz_fp(s
, a
, fns
[a
->esz
- 1]);
3595 static bool trans_FRSQRTE(DisasContext
*s
, arg_rr_esz
*a
)
3597 static gen_helper_gvec_2_ptr
* const fns
[3] = {
3598 gen_helper_gvec_frsqrte_h
,
3599 gen_helper_gvec_frsqrte_s
,
3600 gen_helper_gvec_frsqrte_d
,
3605 if (sve_access_check(s
)) {
3606 do_zz_fp(s
, a
, fns
[a
->esz
- 1]);
3612 *** SVE Floating Point Compare with Zero Group
3615 static void do_ppz_fp(DisasContext
*s
, arg_rpr_esz
*a
,
3616 gen_helper_gvec_3_ptr
*fn
)
3618 unsigned vsz
= vec_full_reg_size(s
);
3619 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3621 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s
, a
->rd
),
3622 vec_full_reg_offset(s
, a
->rn
),
3623 pred_full_reg_offset(s
, a
->pg
),
3624 status
, vsz
, vsz
, 0, fn
);
3625 tcg_temp_free_ptr(status
);
3628 #define DO_PPZ(NAME, name) \
3629 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
3631 static gen_helper_gvec_3_ptr * const fns[3] = { \
3632 gen_helper_sve_##name##_h, \
3633 gen_helper_sve_##name##_s, \
3634 gen_helper_sve_##name##_d, \
3636 if (a->esz == 0) { \
3639 if (sve_access_check(s)) { \
3640 do_ppz_fp(s, a, fns[a->esz - 1]); \
3645 DO_PPZ(FCMGE_ppz0
, fcmge0
)
3646 DO_PPZ(FCMGT_ppz0
, fcmgt0
)
3647 DO_PPZ(FCMLE_ppz0
, fcmle0
)
3648 DO_PPZ(FCMLT_ppz0
, fcmlt0
)
3649 DO_PPZ(FCMEQ_ppz0
, fcmeq0
)
3650 DO_PPZ(FCMNE_ppz0
, fcmne0
)
3655 *** SVE floating-point trig multiply-add coefficient
3658 static bool trans_FTMAD(DisasContext
*s
, arg_FTMAD
*a
)
3660 static gen_helper_gvec_3_ptr
* const fns
[3] = {
3661 gen_helper_sve_ftmad_h
,
3662 gen_helper_sve_ftmad_s
,
3663 gen_helper_sve_ftmad_d
,
3669 if (sve_access_check(s
)) {
3670 unsigned vsz
= vec_full_reg_size(s
);
3671 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3672 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, a
->rd
),
3673 vec_full_reg_offset(s
, a
->rn
),
3674 vec_full_reg_offset(s
, a
->rm
),
3675 status
, vsz
, vsz
, a
->imm
, fns
[a
->esz
- 1]);
3676 tcg_temp_free_ptr(status
);
3682 *** SVE Floating Point Accumulating Reduction Group
3685 static bool trans_FADDA(DisasContext
*s
, arg_rprr_esz
*a
)
3687 typedef void fadda_fn(TCGv_i64
, TCGv_i64
, TCGv_ptr
,
3688 TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
3689 static fadda_fn
* const fns
[3] = {
3690 gen_helper_sve_fadda_h
,
3691 gen_helper_sve_fadda_s
,
3692 gen_helper_sve_fadda_d
,
3694 unsigned vsz
= vec_full_reg_size(s
);
3695 TCGv_ptr t_rm
, t_pg
, t_fpst
;
3702 if (!sve_access_check(s
)) {
3706 t_val
= load_esz(cpu_env
, vec_reg_offset(s
, a
->rn
, 0, a
->esz
), a
->esz
);
3707 t_rm
= tcg_temp_new_ptr();
3708 t_pg
= tcg_temp_new_ptr();
3709 tcg_gen_addi_ptr(t_rm
, cpu_env
, vec_full_reg_offset(s
, a
->rm
));
3710 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
3711 t_fpst
= get_fpstatus_ptr(a
->esz
== MO_16
);
3712 t_desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
3714 fns
[a
->esz
- 1](t_val
, t_val
, t_rm
, t_pg
, t_fpst
, t_desc
);
3716 tcg_temp_free_i32(t_desc
);
3717 tcg_temp_free_ptr(t_fpst
);
3718 tcg_temp_free_ptr(t_pg
);
3719 tcg_temp_free_ptr(t_rm
);
3721 write_fp_dreg(s
, a
->rd
, t_val
);
3722 tcg_temp_free_i64(t_val
);
3727 *** SVE Floating Point Arithmetic - Unpredicated Group
3730 static bool do_zzz_fp(DisasContext
*s
, arg_rrr_esz
*a
,
3731 gen_helper_gvec_3_ptr
*fn
)
3736 if (sve_access_check(s
)) {
3737 unsigned vsz
= vec_full_reg_size(s
);
3738 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3739 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, a
->rd
),
3740 vec_full_reg_offset(s
, a
->rn
),
3741 vec_full_reg_offset(s
, a
->rm
),
3742 status
, vsz
, vsz
, 0, fn
);
3743 tcg_temp_free_ptr(status
);
3749 #define DO_FP3(NAME, name) \
3750 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
3752 static gen_helper_gvec_3_ptr * const fns[4] = { \
3753 NULL, gen_helper_gvec_##name##_h, \
3754 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3756 return do_zzz_fp(s, a, fns[a->esz]); \
3759 DO_FP3(FADD_zzz
, fadd
)
3760 DO_FP3(FSUB_zzz
, fsub
)
3761 DO_FP3(FMUL_zzz
, fmul
)
3762 DO_FP3(FTSMUL
, ftsmul
)
3763 DO_FP3(FRECPS
, recps
)
3764 DO_FP3(FRSQRTS
, rsqrts
)
3769 *** SVE Floating Point Arithmetic - Predicated Group
3772 static bool do_zpzz_fp(DisasContext
*s
, arg_rprr_esz
*a
,
3773 gen_helper_gvec_4_ptr
*fn
)
3778 if (sve_access_check(s
)) {
3779 unsigned vsz
= vec_full_reg_size(s
);
3780 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3781 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s
, a
->rd
),
3782 vec_full_reg_offset(s
, a
->rn
),
3783 vec_full_reg_offset(s
, a
->rm
),
3784 pred_full_reg_offset(s
, a
->pg
),
3785 status
, vsz
, vsz
, 0, fn
);
3786 tcg_temp_free_ptr(status
);
3791 #define DO_FP3(NAME, name) \
3792 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
3794 static gen_helper_gvec_4_ptr * const fns[4] = { \
3795 NULL, gen_helper_sve_##name##_h, \
3796 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3798 return do_zpzz_fp(s, a, fns[a->esz]); \
3801 DO_FP3(FADD_zpzz
, fadd
)
3802 DO_FP3(FSUB_zpzz
, fsub
)
3803 DO_FP3(FMUL_zpzz
, fmul
)
3804 DO_FP3(FMIN_zpzz
, fmin
)
3805 DO_FP3(FMAX_zpzz
, fmax
)
3806 DO_FP3(FMINNM_zpzz
, fminnum
)
3807 DO_FP3(FMAXNM_zpzz
, fmaxnum
)
3809 DO_FP3(FSCALE
, fscalbn
)
3811 DO_FP3(FMULX
, fmulx
)
3815 typedef void gen_helper_sve_fp2scalar(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
,
3816 TCGv_i64
, TCGv_ptr
, TCGv_i32
);
3818 static void do_fp_scalar(DisasContext
*s
, int zd
, int zn
, int pg
, bool is_fp16
,
3819 TCGv_i64 scalar
, gen_helper_sve_fp2scalar
*fn
)
3821 unsigned vsz
= vec_full_reg_size(s
);
3822 TCGv_ptr t_zd
, t_zn
, t_pg
, status
;
3825 t_zd
= tcg_temp_new_ptr();
3826 t_zn
= tcg_temp_new_ptr();
3827 t_pg
= tcg_temp_new_ptr();
3828 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, zd
));
3829 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, zn
));
3830 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
3832 status
= get_fpstatus_ptr(is_fp16
);
3833 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
3834 fn(t_zd
, t_zn
, t_pg
, scalar
, status
, desc
);
3836 tcg_temp_free_i32(desc
);
3837 tcg_temp_free_ptr(status
);
3838 tcg_temp_free_ptr(t_pg
);
3839 tcg_temp_free_ptr(t_zn
);
3840 tcg_temp_free_ptr(t_zd
);
3843 static void do_fp_imm(DisasContext
*s
, arg_rpri_esz
*a
, uint64_t imm
,
3844 gen_helper_sve_fp2scalar
*fn
)
3846 TCGv_i64 temp
= tcg_const_i64(imm
);
3847 do_fp_scalar(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
== MO_16
, temp
, fn
);
3848 tcg_temp_free_i64(temp
);
3851 #define DO_FP_IMM(NAME, name, const0, const1) \
3852 static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
3854 static gen_helper_sve_fp2scalar * const fns[3] = { \
3855 gen_helper_sve_##name##_h, \
3856 gen_helper_sve_##name##_s, \
3857 gen_helper_sve_##name##_d \
3859 static uint64_t const val[3][2] = { \
3860 { float16_##const0, float16_##const1 }, \
3861 { float32_##const0, float32_##const1 }, \
3862 { float64_##const0, float64_##const1 }, \
3864 if (a->esz == 0) { \
3867 if (sve_access_check(s)) { \
3868 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
3873 #define float16_two make_float16(0x4000)
3874 #define float32_two make_float32(0x40000000)
3875 #define float64_two make_float64(0x4000000000000000ULL)
3877 DO_FP_IMM(FADD
, fadds
, half
, one
)
3878 DO_FP_IMM(FSUB
, fsubs
, half
, one
)
3879 DO_FP_IMM(FMUL
, fmuls
, half
, two
)
3880 DO_FP_IMM(FSUBR
, fsubrs
, half
, one
)
3881 DO_FP_IMM(FMAXNM
, fmaxnms
, zero
, one
)
3882 DO_FP_IMM(FMINNM
, fminnms
, zero
, one
)
3883 DO_FP_IMM(FMAX
, fmaxs
, zero
, one
)
3884 DO_FP_IMM(FMIN
, fmins
, zero
, one
)
3888 static bool do_fp_cmp(DisasContext
*s
, arg_rprr_esz
*a
,
3889 gen_helper_gvec_4_ptr
*fn
)
3894 if (sve_access_check(s
)) {
3895 unsigned vsz
= vec_full_reg_size(s
);
3896 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3897 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s
, a
->rd
),
3898 vec_full_reg_offset(s
, a
->rn
),
3899 vec_full_reg_offset(s
, a
->rm
),
3900 pred_full_reg_offset(s
, a
->pg
),
3901 status
, vsz
, vsz
, 0, fn
);
3902 tcg_temp_free_ptr(status
);
3907 #define DO_FPCMP(NAME, name) \
3908 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
3910 static gen_helper_gvec_4_ptr * const fns[4] = { \
3911 NULL, gen_helper_sve_##name##_h, \
3912 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3914 return do_fp_cmp(s, a, fns[a->esz]); \
3917 DO_FPCMP(FCMGE
, fcmge
)
3918 DO_FPCMP(FCMGT
, fcmgt
)
3919 DO_FPCMP(FCMEQ
, fcmeq
)
3920 DO_FPCMP(FCMNE
, fcmne
)
3921 DO_FPCMP(FCMUO
, fcmuo
)
3922 DO_FPCMP(FACGE
, facge
)
3923 DO_FPCMP(FACGT
, facgt
)
3927 static bool trans_FCADD(DisasContext
*s
, arg_FCADD
*a
)
3929 static gen_helper_gvec_4_ptr
* const fns
[3] = {
3930 gen_helper_sve_fcadd_h
,
3931 gen_helper_sve_fcadd_s
,
3932 gen_helper_sve_fcadd_d
3938 if (sve_access_check(s
)) {
3939 unsigned vsz
= vec_full_reg_size(s
);
3940 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3941 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s
, a
->rd
),
3942 vec_full_reg_offset(s
, a
->rn
),
3943 vec_full_reg_offset(s
, a
->rm
),
3944 pred_full_reg_offset(s
, a
->pg
),
3945 status
, vsz
, vsz
, a
->rot
, fns
[a
->esz
- 1]);
3946 tcg_temp_free_ptr(status
);
3951 typedef void gen_helper_sve_fmla(TCGv_env
, TCGv_ptr
, TCGv_i32
);
3953 static bool do_fmla(DisasContext
*s
, arg_rprrr_esz
*a
, gen_helper_sve_fmla
*fn
)
3958 if (!sve_access_check(s
)) {
3962 unsigned vsz
= vec_full_reg_size(s
);
3965 TCGv_ptr pg
= tcg_temp_new_ptr();
3967 /* We would need 7 operands to pass these arguments "properly".
3968 * So we encode all the register numbers into the descriptor.
3970 desc
= deposit32(a
->rd
, 5, 5, a
->rn
);
3971 desc
= deposit32(desc
, 10, 5, a
->rm
);
3972 desc
= deposit32(desc
, 15, 5, a
->ra
);
3973 desc
= simd_desc(vsz
, vsz
, desc
);
3975 t_desc
= tcg_const_i32(desc
);
3976 tcg_gen_addi_ptr(pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
3977 fn(cpu_env
, pg
, t_desc
);
3978 tcg_temp_free_i32(t_desc
);
3979 tcg_temp_free_ptr(pg
);
3983 #define DO_FMLA(NAME, name) \
3984 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
3986 static gen_helper_sve_fmla * const fns[4] = { \
3987 NULL, gen_helper_sve_##name##_h, \
3988 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3990 return do_fmla(s, a, fns[a->esz]); \
3993 DO_FMLA(FMLA_zpzzz
, fmla_zpzzz
)
3994 DO_FMLA(FMLS_zpzzz
, fmls_zpzzz
)
3995 DO_FMLA(FNMLA_zpzzz
, fnmla_zpzzz
)
3996 DO_FMLA(FNMLS_zpzzz
, fnmls_zpzzz
)
4000 static bool trans_FCMLA_zpzzz(DisasContext
*s
, arg_FCMLA_zpzzz
*a
)
4002 static gen_helper_sve_fmla
* const fns
[3] = {
4003 gen_helper_sve_fcmla_zpzzz_h
,
4004 gen_helper_sve_fcmla_zpzzz_s
,
4005 gen_helper_sve_fcmla_zpzzz_d
,
4011 if (sve_access_check(s
)) {
4012 unsigned vsz
= vec_full_reg_size(s
);
4015 TCGv_ptr pg
= tcg_temp_new_ptr();
4017 /* We would need 7 operands to pass these arguments "properly".
4018 * So we encode all the register numbers into the descriptor.
4020 desc
= deposit32(a
->rd
, 5, 5, a
->rn
);
4021 desc
= deposit32(desc
, 10, 5, a
->rm
);
4022 desc
= deposit32(desc
, 15, 5, a
->ra
);
4023 desc
= deposit32(desc
, 20, 2, a
->rot
);
4024 desc
= sextract32(desc
, 0, 22);
4025 desc
= simd_desc(vsz
, vsz
, desc
);
4027 t_desc
= tcg_const_i32(desc
);
4028 tcg_gen_addi_ptr(pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
4029 fns
[a
->esz
- 1](cpu_env
, pg
, t_desc
);
4030 tcg_temp_free_i32(t_desc
);
4031 tcg_temp_free_ptr(pg
);
4036 static bool trans_FCMLA_zzxz(DisasContext
*s
, arg_FCMLA_zzxz
*a
)
4038 static gen_helper_gvec_3_ptr
* const fns
[2] = {
4039 gen_helper_gvec_fcmlah_idx
,
4040 gen_helper_gvec_fcmlas_idx
,
4043 tcg_debug_assert(a
->esz
== 1 || a
->esz
== 2);
4044 tcg_debug_assert(a
->rd
== a
->ra
);
4045 if (sve_access_check(s
)) {
4046 unsigned vsz
= vec_full_reg_size(s
);
4047 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
4048 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, a
->rd
),
4049 vec_full_reg_offset(s
, a
->rn
),
4050 vec_full_reg_offset(s
, a
->rm
),
4052 a
->index
* 4 + a
->rot
,
4054 tcg_temp_free_ptr(status
);
4060 *** SVE Floating Point Unary Operations Predicated Group
4063 static bool do_zpz_ptr(DisasContext
*s
, int rd
, int rn
, int pg
,
4064 bool is_fp16
, gen_helper_gvec_3_ptr
*fn
)
4066 if (sve_access_check(s
)) {
4067 unsigned vsz
= vec_full_reg_size(s
);
4068 TCGv_ptr status
= get_fpstatus_ptr(is_fp16
);
4069 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, rd
),
4070 vec_full_reg_offset(s
, rn
),
4071 pred_full_reg_offset(s
, pg
),
4072 status
, vsz
, vsz
, 0, fn
);
4073 tcg_temp_free_ptr(status
);
4078 static bool trans_FCVT_sh(DisasContext
*s
, arg_rpr_esz
*a
)
4080 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_sh
);
4083 static bool trans_FCVT_hs(DisasContext
*s
, arg_rpr_esz
*a
)
4085 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_hs
);
4088 static bool trans_FCVT_dh(DisasContext
*s
, arg_rpr_esz
*a
)
4090 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_dh
);
4093 static bool trans_FCVT_hd(DisasContext
*s
, arg_rpr_esz
*a
)
4095 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_hd
);
4098 static bool trans_FCVT_ds(DisasContext
*s
, arg_rpr_esz
*a
)
4100 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_ds
);
4103 static bool trans_FCVT_sd(DisasContext
*s
, arg_rpr_esz
*a
)
4105 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_sd
);
4108 static bool trans_FCVTZS_hh(DisasContext
*s
, arg_rpr_esz
*a
)
4110 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzs_hh
);
4113 static bool trans_FCVTZU_hh(DisasContext
*s
, arg_rpr_esz
*a
)
4115 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzu_hh
);
4118 static bool trans_FCVTZS_hs(DisasContext
*s
, arg_rpr_esz
*a
)
4120 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzs_hs
);
4123 static bool trans_FCVTZU_hs(DisasContext
*s
, arg_rpr_esz
*a
)
4125 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzu_hs
);
4128 static bool trans_FCVTZS_hd(DisasContext
*s
, arg_rpr_esz
*a
)
4130 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzs_hd
);
4133 static bool trans_FCVTZU_hd(DisasContext
*s
, arg_rpr_esz
*a
)
4135 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzu_hd
);
4138 static bool trans_FCVTZS_ss(DisasContext
*s
, arg_rpr_esz
*a
)
4140 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzs_ss
);
4143 static bool trans_FCVTZU_ss(DisasContext
*s
, arg_rpr_esz
*a
)
4145 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzu_ss
);
4148 static bool trans_FCVTZS_sd(DisasContext
*s
, arg_rpr_esz
*a
)
4150 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzs_sd
);
4153 static bool trans_FCVTZU_sd(DisasContext
*s
, arg_rpr_esz
*a
)
4155 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzu_sd
);
4158 static bool trans_FCVTZS_ds(DisasContext
*s
, arg_rpr_esz
*a
)
4160 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzs_ds
);
4163 static bool trans_FCVTZU_ds(DisasContext
*s
, arg_rpr_esz
*a
)
4165 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzu_ds
);
4168 static bool trans_FCVTZS_dd(DisasContext
*s
, arg_rpr_esz
*a
)
4170 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzs_dd
);
4173 static bool trans_FCVTZU_dd(DisasContext
*s
, arg_rpr_esz
*a
)
4175 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzu_dd
);
4178 static gen_helper_gvec_3_ptr
* const frint_fns
[3] = {
4179 gen_helper_sve_frint_h
,
4180 gen_helper_sve_frint_s
,
4181 gen_helper_sve_frint_d
4184 static bool trans_FRINTI(DisasContext
*s
, arg_rpr_esz
*a
)
4189 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
== MO_16
,
4190 frint_fns
[a
->esz
- 1]);
4193 static bool trans_FRINTX(DisasContext
*s
, arg_rpr_esz
*a
)
4195 static gen_helper_gvec_3_ptr
* const fns
[3] = {
4196 gen_helper_sve_frintx_h
,
4197 gen_helper_sve_frintx_s
,
4198 gen_helper_sve_frintx_d
4203 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
== MO_16
, fns
[a
->esz
- 1]);
4206 static bool do_frint_mode(DisasContext
*s
, arg_rpr_esz
*a
, int mode
)
4211 if (sve_access_check(s
)) {
4212 unsigned vsz
= vec_full_reg_size(s
);
4213 TCGv_i32 tmode
= tcg_const_i32(mode
);
4214 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
4216 gen_helper_set_rmode(tmode
, tmode
, status
);
4218 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, a
->rd
),
4219 vec_full_reg_offset(s
, a
->rn
),
4220 pred_full_reg_offset(s
, a
->pg
),
4221 status
, vsz
, vsz
, 0, frint_fns
[a
->esz
- 1]);
4223 gen_helper_set_rmode(tmode
, tmode
, status
);
4224 tcg_temp_free_i32(tmode
);
4225 tcg_temp_free_ptr(status
);
4230 static bool trans_FRINTN(DisasContext
*s
, arg_rpr_esz
*a
)
4232 return do_frint_mode(s
, a
, float_round_nearest_even
);
4235 static bool trans_FRINTP(DisasContext
*s
, arg_rpr_esz
*a
)
4237 return do_frint_mode(s
, a
, float_round_up
);
4240 static bool trans_FRINTM(DisasContext
*s
, arg_rpr_esz
*a
)
4242 return do_frint_mode(s
, a
, float_round_down
);
4245 static bool trans_FRINTZ(DisasContext
*s
, arg_rpr_esz
*a
)
4247 return do_frint_mode(s
, a
, float_round_to_zero
);
4250 static bool trans_FRINTA(DisasContext
*s
, arg_rpr_esz
*a
)
4252 return do_frint_mode(s
, a
, float_round_ties_away
);
4255 static bool trans_FRECPX(DisasContext
*s
, arg_rpr_esz
*a
)
4257 static gen_helper_gvec_3_ptr
* const fns
[3] = {
4258 gen_helper_sve_frecpx_h
,
4259 gen_helper_sve_frecpx_s
,
4260 gen_helper_sve_frecpx_d
4265 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
== MO_16
, fns
[a
->esz
- 1]);
4268 static bool trans_FSQRT(DisasContext
*s
, arg_rpr_esz
*a
)
4270 static gen_helper_gvec_3_ptr
* const fns
[3] = {
4271 gen_helper_sve_fsqrt_h
,
4272 gen_helper_sve_fsqrt_s
,
4273 gen_helper_sve_fsqrt_d
4278 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
== MO_16
, fns
[a
->esz
- 1]);
4281 static bool trans_SCVTF_hh(DisasContext
*s
, arg_rpr_esz
*a
)
4283 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_scvt_hh
);
4286 static bool trans_SCVTF_sh(DisasContext
*s
, arg_rpr_esz
*a
)
4288 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_scvt_sh
);
4291 static bool trans_SCVTF_dh(DisasContext
*s
, arg_rpr_esz
*a
)
4293 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_scvt_dh
);
4296 static bool trans_SCVTF_ss(DisasContext
*s
, arg_rpr_esz
*a
)
4298 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_scvt_ss
);
4301 static bool trans_SCVTF_ds(DisasContext
*s
, arg_rpr_esz
*a
)
4303 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_scvt_ds
);
4306 static bool trans_SCVTF_sd(DisasContext
*s
, arg_rpr_esz
*a
)
4308 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_scvt_sd
);
4311 static bool trans_SCVTF_dd(DisasContext
*s
, arg_rpr_esz
*a
)
4313 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_scvt_dd
);
4316 static bool trans_UCVTF_hh(DisasContext
*s
, arg_rpr_esz
*a
)
4318 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_ucvt_hh
);
4321 static bool trans_UCVTF_sh(DisasContext
*s
, arg_rpr_esz
*a
)
4323 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_ucvt_sh
);
4326 static bool trans_UCVTF_dh(DisasContext
*s
, arg_rpr_esz
*a
)
4328 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_ucvt_dh
);
4331 static bool trans_UCVTF_ss(DisasContext
*s
, arg_rpr_esz
*a
)
4333 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_ucvt_ss
);
4336 static bool trans_UCVTF_ds(DisasContext
*s
, arg_rpr_esz
*a
)
4338 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_ucvt_ds
);
4341 static bool trans_UCVTF_sd(DisasContext
*s
, arg_rpr_esz
*a
)
4343 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_ucvt_sd
);
4346 static bool trans_UCVTF_dd(DisasContext
*s
, arg_rpr_esz
*a
)
4348 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_ucvt_dd
);
4352 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4355 /* Subroutine loading a vector register at VOFS of LEN bytes.
4356 * The load should begin at the address Rn + IMM.
4359 static void do_ldr(DisasContext
*s
, uint32_t vofs
, int len
, int rn
, int imm
)
4361 int len_align
= QEMU_ALIGN_DOWN(len
, 8);
4362 int len_remain
= len
% 8;
4363 int nparts
= len
/ 8 + ctpop8(len_remain
);
4364 int midx
= get_mem_index(s
);
4365 TCGv_i64 addr
, t0
, t1
;
4367 addr
= tcg_temp_new_i64();
4368 t0
= tcg_temp_new_i64();
4370 /* Note that unpredicated load/store of vector/predicate registers
4371 * are defined as a stream of bytes, which equates to little-endian
4372 * operations on larger quantities. There is no nice way to force
4373 * a little-endian load for aarch64_be-linux-user out of line.
4375 * Attempt to keep code expansion to a minimum by limiting the
4376 * amount of unrolling done.
4381 for (i
= 0; i
< len_align
; i
+= 8) {
4382 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ i
);
4383 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LEQ
);
4384 tcg_gen_st_i64(t0
, cpu_env
, vofs
+ i
);
4387 TCGLabel
*loop
= gen_new_label();
4388 TCGv_ptr tp
, i
= tcg_const_local_ptr(0);
4390 gen_set_label(loop
);
4392 /* Minimize the number of local temps that must be re-read from
4393 * the stack each iteration. Instead, re-compute values other
4394 * than the loop counter.
4396 tp
= tcg_temp_new_ptr();
4397 tcg_gen_addi_ptr(tp
, i
, imm
);
4398 tcg_gen_extu_ptr_i64(addr
, tp
);
4399 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, rn
));
4401 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LEQ
);
4403 tcg_gen_add_ptr(tp
, cpu_env
, i
);
4404 tcg_gen_addi_ptr(i
, i
, 8);
4405 tcg_gen_st_i64(t0
, tp
, vofs
);
4406 tcg_temp_free_ptr(tp
);
4408 tcg_gen_brcondi_ptr(TCG_COND_LTU
, i
, len_align
, loop
);
4409 tcg_temp_free_ptr(i
);
4412 /* Predicate register loads can be any multiple of 2.
4413 * Note that we still store the entire 64-bit unit into cpu_env.
4416 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ len_align
);
4418 switch (len_remain
) {
4422 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LE
| ctz32(len_remain
));
4426 t1
= tcg_temp_new_i64();
4427 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LEUL
);
4428 tcg_gen_addi_i64(addr
, addr
, 4);
4429 tcg_gen_qemu_ld_i64(t1
, addr
, midx
, MO_LEUW
);
4430 tcg_gen_deposit_i64(t0
, t0
, t1
, 32, 32);
4431 tcg_temp_free_i64(t1
);
4435 g_assert_not_reached();
4437 tcg_gen_st_i64(t0
, cpu_env
, vofs
+ len_align
);
4439 tcg_temp_free_i64(addr
);
4440 tcg_temp_free_i64(t0
);
4443 /* Similarly for stores. */
4444 static void do_str(DisasContext
*s
, uint32_t vofs
, int len
, int rn
, int imm
)
4446 int len_align
= QEMU_ALIGN_DOWN(len
, 8);
4447 int len_remain
= len
% 8;
4448 int nparts
= len
/ 8 + ctpop8(len_remain
);
4449 int midx
= get_mem_index(s
);
4452 addr
= tcg_temp_new_i64();
4453 t0
= tcg_temp_new_i64();
4455 /* Note that unpredicated load/store of vector/predicate registers
4456 * are defined as a stream of bytes, which equates to little-endian
4457 * operations on larger quantities. There is no nice way to force
4458 * a little-endian store for aarch64_be-linux-user out of line.
4460 * Attempt to keep code expansion to a minimum by limiting the
4461 * amount of unrolling done.
4466 for (i
= 0; i
< len_align
; i
+= 8) {
4467 tcg_gen_ld_i64(t0
, cpu_env
, vofs
+ i
);
4468 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ i
);
4469 tcg_gen_qemu_st_i64(t0
, addr
, midx
, MO_LEQ
);
4472 TCGLabel
*loop
= gen_new_label();
4473 TCGv_ptr t2
, i
= tcg_const_local_ptr(0);
4475 gen_set_label(loop
);
4477 t2
= tcg_temp_new_ptr();
4478 tcg_gen_add_ptr(t2
, cpu_env
, i
);
4479 tcg_gen_ld_i64(t0
, t2
, vofs
);
4481 /* Minimize the number of local temps that must be re-read from
4482 * the stack each iteration. Instead, re-compute values other
4483 * than the loop counter.
4485 tcg_gen_addi_ptr(t2
, i
, imm
);
4486 tcg_gen_extu_ptr_i64(addr
, t2
);
4487 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, rn
));
4488 tcg_temp_free_ptr(t2
);
4490 tcg_gen_qemu_st_i64(t0
, addr
, midx
, MO_LEQ
);
4492 tcg_gen_addi_ptr(i
, i
, 8);
4494 tcg_gen_brcondi_ptr(TCG_COND_LTU
, i
, len_align
, loop
);
4495 tcg_temp_free_ptr(i
);
4498 /* Predicate register stores can be any multiple of 2. */
4500 tcg_gen_ld_i64(t0
, cpu_env
, vofs
+ len_align
);
4501 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ len_align
);
4503 switch (len_remain
) {
4507 tcg_gen_qemu_st_i64(t0
, addr
, midx
, MO_LE
| ctz32(len_remain
));
4511 tcg_gen_qemu_st_i64(t0
, addr
, midx
, MO_LEUL
);
4512 tcg_gen_addi_i64(addr
, addr
, 4);
4513 tcg_gen_shri_i64(t0
, t0
, 32);
4514 tcg_gen_qemu_st_i64(t0
, addr
, midx
, MO_LEUW
);
4518 g_assert_not_reached();
4521 tcg_temp_free_i64(addr
);
4522 tcg_temp_free_i64(t0
);
4525 static bool trans_LDR_zri(DisasContext
*s
, arg_rri
*a
)
4527 if (sve_access_check(s
)) {
4528 int size
= vec_full_reg_size(s
);
4529 int off
= vec_full_reg_offset(s
, a
->rd
);
4530 do_ldr(s
, off
, size
, a
->rn
, a
->imm
* size
);
4535 static bool trans_LDR_pri(DisasContext
*s
, arg_rri
*a
)
4537 if (sve_access_check(s
)) {
4538 int size
= pred_full_reg_size(s
);
4539 int off
= pred_full_reg_offset(s
, a
->rd
);
4540 do_ldr(s
, off
, size
, a
->rn
, a
->imm
* size
);
4545 static bool trans_STR_zri(DisasContext
*s
, arg_rri
*a
)
4547 if (sve_access_check(s
)) {
4548 int size
= vec_full_reg_size(s
);
4549 int off
= vec_full_reg_offset(s
, a
->rd
);
4550 do_str(s
, off
, size
, a
->rn
, a
->imm
* size
);
4555 static bool trans_STR_pri(DisasContext
*s
, arg_rri
*a
)
4557 if (sve_access_check(s
)) {
4558 int size
= pred_full_reg_size(s
);
4559 int off
= pred_full_reg_offset(s
, a
->rd
);
4560 do_str(s
, off
, size
, a
->rn
, a
->imm
* size
);
4566 *** SVE Memory - Contiguous Load Group
4569 /* The memory mode of the dtype. */
4570 static const MemOp dtype_mop
[16] = {
4571 MO_UB
, MO_UB
, MO_UB
, MO_UB
,
4572 MO_SL
, MO_UW
, MO_UW
, MO_UW
,
4573 MO_SW
, MO_SW
, MO_UL
, MO_UL
,
4574 MO_SB
, MO_SB
, MO_SB
, MO_Q
4577 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4579 /* The vector element size of dtype. */
4580 static const uint8_t dtype_esz
[16] = {
4587 static TCGMemOpIdx
sve_memopidx(DisasContext
*s
, int dtype
)
4589 return make_memop_idx(s
->be_data
| dtype_mop
[dtype
], get_mem_index(s
));
4592 static void do_mem_zpa(DisasContext
*s
, int zt
, int pg
, TCGv_i64 addr
,
4593 int dtype
, gen_helper_gvec_mem
*fn
)
4595 unsigned vsz
= vec_full_reg_size(s
);
4600 /* For e.g. LD4, there are not enough arguments to pass all 4
4601 * registers as pointers, so encode the regno into the data field.
4602 * For consistency, do this even for LD1.
4604 desc
= sve_memopidx(s
, dtype
);
4605 desc
|= zt
<< MEMOPIDX_SHIFT
;
4606 desc
= simd_desc(vsz
, vsz
, desc
);
4607 t_desc
= tcg_const_i32(desc
);
4608 t_pg
= tcg_temp_new_ptr();
4610 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
4611 fn(cpu_env
, t_pg
, addr
, t_desc
);
4613 tcg_temp_free_ptr(t_pg
);
4614 tcg_temp_free_i32(t_desc
);
4617 static void do_ld_zpa(DisasContext
*s
, int zt
, int pg
,
4618 TCGv_i64 addr
, int dtype
, int nreg
)
4620 static gen_helper_gvec_mem
* const fns
[2][16][4] = {
4622 { { gen_helper_sve_ld1bb_r
, gen_helper_sve_ld2bb_r
,
4623 gen_helper_sve_ld3bb_r
, gen_helper_sve_ld4bb_r
},
4624 { gen_helper_sve_ld1bhu_r
, NULL
, NULL
, NULL
},
4625 { gen_helper_sve_ld1bsu_r
, NULL
, NULL
, NULL
},
4626 { gen_helper_sve_ld1bdu_r
, NULL
, NULL
, NULL
},
4628 { gen_helper_sve_ld1sds_le_r
, NULL
, NULL
, NULL
},
4629 { gen_helper_sve_ld1hh_le_r
, gen_helper_sve_ld2hh_le_r
,
4630 gen_helper_sve_ld3hh_le_r
, gen_helper_sve_ld4hh_le_r
},
4631 { gen_helper_sve_ld1hsu_le_r
, NULL
, NULL
, NULL
},
4632 { gen_helper_sve_ld1hdu_le_r
, NULL
, NULL
, NULL
},
4634 { gen_helper_sve_ld1hds_le_r
, NULL
, NULL
, NULL
},
4635 { gen_helper_sve_ld1hss_le_r
, NULL
, NULL
, NULL
},
4636 { gen_helper_sve_ld1ss_le_r
, gen_helper_sve_ld2ss_le_r
,
4637 gen_helper_sve_ld3ss_le_r
, gen_helper_sve_ld4ss_le_r
},
4638 { gen_helper_sve_ld1sdu_le_r
, NULL
, NULL
, NULL
},
4640 { gen_helper_sve_ld1bds_r
, NULL
, NULL
, NULL
},
4641 { gen_helper_sve_ld1bss_r
, NULL
, NULL
, NULL
},
4642 { gen_helper_sve_ld1bhs_r
, NULL
, NULL
, NULL
},
4643 { gen_helper_sve_ld1dd_le_r
, gen_helper_sve_ld2dd_le_r
,
4644 gen_helper_sve_ld3dd_le_r
, gen_helper_sve_ld4dd_le_r
} },
4647 { { gen_helper_sve_ld1bb_r
, gen_helper_sve_ld2bb_r
,
4648 gen_helper_sve_ld3bb_r
, gen_helper_sve_ld4bb_r
},
4649 { gen_helper_sve_ld1bhu_r
, NULL
, NULL
, NULL
},
4650 { gen_helper_sve_ld1bsu_r
, NULL
, NULL
, NULL
},
4651 { gen_helper_sve_ld1bdu_r
, NULL
, NULL
, NULL
},
4653 { gen_helper_sve_ld1sds_be_r
, NULL
, NULL
, NULL
},
4654 { gen_helper_sve_ld1hh_be_r
, gen_helper_sve_ld2hh_be_r
,
4655 gen_helper_sve_ld3hh_be_r
, gen_helper_sve_ld4hh_be_r
},
4656 { gen_helper_sve_ld1hsu_be_r
, NULL
, NULL
, NULL
},
4657 { gen_helper_sve_ld1hdu_be_r
, NULL
, NULL
, NULL
},
4659 { gen_helper_sve_ld1hds_be_r
, NULL
, NULL
, NULL
},
4660 { gen_helper_sve_ld1hss_be_r
, NULL
, NULL
, NULL
},
4661 { gen_helper_sve_ld1ss_be_r
, gen_helper_sve_ld2ss_be_r
,
4662 gen_helper_sve_ld3ss_be_r
, gen_helper_sve_ld4ss_be_r
},
4663 { gen_helper_sve_ld1sdu_be_r
, NULL
, NULL
, NULL
},
4665 { gen_helper_sve_ld1bds_r
, NULL
, NULL
, NULL
},
4666 { gen_helper_sve_ld1bss_r
, NULL
, NULL
, NULL
},
4667 { gen_helper_sve_ld1bhs_r
, NULL
, NULL
, NULL
},
4668 { gen_helper_sve_ld1dd_be_r
, gen_helper_sve_ld2dd_be_r
,
4669 gen_helper_sve_ld3dd_be_r
, gen_helper_sve_ld4dd_be_r
} }
4671 gen_helper_gvec_mem
*fn
= fns
[s
->be_data
== MO_BE
][dtype
][nreg
];
4673 /* While there are holes in the table, they are not
4674 * accessible via the instruction encoding.
4677 do_mem_zpa(s
, zt
, pg
, addr
, dtype
, fn
);
4680 static bool trans_LD_zprr(DisasContext
*s
, arg_rprr_load
*a
)
4685 if (sve_access_check(s
)) {
4686 TCGv_i64 addr
= new_tmp_a64(s
);
4687 tcg_gen_shli_i64(addr
, cpu_reg(s
, a
->rm
), dtype_msz(a
->dtype
));
4688 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
4689 do_ld_zpa(s
, a
->rd
, a
->pg
, addr
, a
->dtype
, a
->nreg
);
4694 static bool trans_LD_zpri(DisasContext
*s
, arg_rpri_load
*a
)
4696 if (sve_access_check(s
)) {
4697 int vsz
= vec_full_reg_size(s
);
4698 int elements
= vsz
>> dtype_esz
[a
->dtype
];
4699 TCGv_i64 addr
= new_tmp_a64(s
);
4701 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
),
4702 (a
->imm
* elements
* (a
->nreg
+ 1))
4703 << dtype_msz(a
->dtype
));
4704 do_ld_zpa(s
, a
->rd
, a
->pg
, addr
, a
->dtype
, a
->nreg
);
4709 static bool trans_LDFF1_zprr(DisasContext
*s
, arg_rprr_load
*a
)
4711 static gen_helper_gvec_mem
* const fns
[2][16] = {
4713 { gen_helper_sve_ldff1bb_r
,
4714 gen_helper_sve_ldff1bhu_r
,
4715 gen_helper_sve_ldff1bsu_r
,
4716 gen_helper_sve_ldff1bdu_r
,
4718 gen_helper_sve_ldff1sds_le_r
,
4719 gen_helper_sve_ldff1hh_le_r
,
4720 gen_helper_sve_ldff1hsu_le_r
,
4721 gen_helper_sve_ldff1hdu_le_r
,
4723 gen_helper_sve_ldff1hds_le_r
,
4724 gen_helper_sve_ldff1hss_le_r
,
4725 gen_helper_sve_ldff1ss_le_r
,
4726 gen_helper_sve_ldff1sdu_le_r
,
4728 gen_helper_sve_ldff1bds_r
,
4729 gen_helper_sve_ldff1bss_r
,
4730 gen_helper_sve_ldff1bhs_r
,
4731 gen_helper_sve_ldff1dd_le_r
},
4734 { gen_helper_sve_ldff1bb_r
,
4735 gen_helper_sve_ldff1bhu_r
,
4736 gen_helper_sve_ldff1bsu_r
,
4737 gen_helper_sve_ldff1bdu_r
,
4739 gen_helper_sve_ldff1sds_be_r
,
4740 gen_helper_sve_ldff1hh_be_r
,
4741 gen_helper_sve_ldff1hsu_be_r
,
4742 gen_helper_sve_ldff1hdu_be_r
,
4744 gen_helper_sve_ldff1hds_be_r
,
4745 gen_helper_sve_ldff1hss_be_r
,
4746 gen_helper_sve_ldff1ss_be_r
,
4747 gen_helper_sve_ldff1sdu_be_r
,
4749 gen_helper_sve_ldff1bds_r
,
4750 gen_helper_sve_ldff1bss_r
,
4751 gen_helper_sve_ldff1bhs_r
,
4752 gen_helper_sve_ldff1dd_be_r
},
4755 if (sve_access_check(s
)) {
4756 TCGv_i64 addr
= new_tmp_a64(s
);
4757 tcg_gen_shli_i64(addr
, cpu_reg(s
, a
->rm
), dtype_msz(a
->dtype
));
4758 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
4759 do_mem_zpa(s
, a
->rd
, a
->pg
, addr
, a
->dtype
,
4760 fns
[s
->be_data
== MO_BE
][a
->dtype
]);
4765 static bool trans_LDNF1_zpri(DisasContext
*s
, arg_rpri_load
*a
)
4767 static gen_helper_gvec_mem
* const fns
[2][16] = {
4769 { gen_helper_sve_ldnf1bb_r
,
4770 gen_helper_sve_ldnf1bhu_r
,
4771 gen_helper_sve_ldnf1bsu_r
,
4772 gen_helper_sve_ldnf1bdu_r
,
4774 gen_helper_sve_ldnf1sds_le_r
,
4775 gen_helper_sve_ldnf1hh_le_r
,
4776 gen_helper_sve_ldnf1hsu_le_r
,
4777 gen_helper_sve_ldnf1hdu_le_r
,
4779 gen_helper_sve_ldnf1hds_le_r
,
4780 gen_helper_sve_ldnf1hss_le_r
,
4781 gen_helper_sve_ldnf1ss_le_r
,
4782 gen_helper_sve_ldnf1sdu_le_r
,
4784 gen_helper_sve_ldnf1bds_r
,
4785 gen_helper_sve_ldnf1bss_r
,
4786 gen_helper_sve_ldnf1bhs_r
,
4787 gen_helper_sve_ldnf1dd_le_r
},
4790 { gen_helper_sve_ldnf1bb_r
,
4791 gen_helper_sve_ldnf1bhu_r
,
4792 gen_helper_sve_ldnf1bsu_r
,
4793 gen_helper_sve_ldnf1bdu_r
,
4795 gen_helper_sve_ldnf1sds_be_r
,
4796 gen_helper_sve_ldnf1hh_be_r
,
4797 gen_helper_sve_ldnf1hsu_be_r
,
4798 gen_helper_sve_ldnf1hdu_be_r
,
4800 gen_helper_sve_ldnf1hds_be_r
,
4801 gen_helper_sve_ldnf1hss_be_r
,
4802 gen_helper_sve_ldnf1ss_be_r
,
4803 gen_helper_sve_ldnf1sdu_be_r
,
4805 gen_helper_sve_ldnf1bds_r
,
4806 gen_helper_sve_ldnf1bss_r
,
4807 gen_helper_sve_ldnf1bhs_r
,
4808 gen_helper_sve_ldnf1dd_be_r
},
4811 if (sve_access_check(s
)) {
4812 int vsz
= vec_full_reg_size(s
);
4813 int elements
= vsz
>> dtype_esz
[a
->dtype
];
4814 int off
= (a
->imm
* elements
) << dtype_msz(a
->dtype
);
4815 TCGv_i64 addr
= new_tmp_a64(s
);
4817 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
), off
);
4818 do_mem_zpa(s
, a
->rd
, a
->pg
, addr
, a
->dtype
,
4819 fns
[s
->be_data
== MO_BE
][a
->dtype
]);
4824 static void do_ldrq(DisasContext
*s
, int zt
, int pg
, TCGv_i64 addr
, int msz
)
4826 static gen_helper_gvec_mem
* const fns
[2][4] = {
4827 { gen_helper_sve_ld1bb_r
, gen_helper_sve_ld1hh_le_r
,
4828 gen_helper_sve_ld1ss_le_r
, gen_helper_sve_ld1dd_le_r
},
4829 { gen_helper_sve_ld1bb_r
, gen_helper_sve_ld1hh_be_r
,
4830 gen_helper_sve_ld1ss_be_r
, gen_helper_sve_ld1dd_be_r
},
4832 unsigned vsz
= vec_full_reg_size(s
);
4837 /* Load the first quadword using the normal predicated load helpers. */
4838 desc
= sve_memopidx(s
, msz_dtype(s
, msz
));
4839 desc
|= zt
<< MEMOPIDX_SHIFT
;
4840 desc
= simd_desc(16, 16, desc
);
4841 t_desc
= tcg_const_i32(desc
);
4843 poff
= pred_full_reg_offset(s
, pg
);
4846 * Zero-extend the first 16 bits of the predicate into a temporary.
4847 * This avoids triggering an assert making sure we don't have bits
4848 * set within a predicate beyond VQ, but we have lowered VQ to 1
4849 * for this load operation.
4851 TCGv_i64 tmp
= tcg_temp_new_i64();
4852 #ifdef HOST_WORDS_BIGENDIAN
4855 tcg_gen_ld16u_i64(tmp
, cpu_env
, poff
);
4857 poff
= offsetof(CPUARMState
, vfp
.preg_tmp
);
4858 tcg_gen_st_i64(tmp
, cpu_env
, poff
);
4859 tcg_temp_free_i64(tmp
);
4862 t_pg
= tcg_temp_new_ptr();
4863 tcg_gen_addi_ptr(t_pg
, cpu_env
, poff
);
4865 fns
[s
->be_data
== MO_BE
][msz
](cpu_env
, t_pg
, addr
, t_desc
);
4867 tcg_temp_free_ptr(t_pg
);
4868 tcg_temp_free_i32(t_desc
);
4870 /* Replicate that first quadword. */
4872 unsigned dofs
= vec_full_reg_offset(s
, zt
);
4873 tcg_gen_gvec_dup_mem(4, dofs
+ 16, dofs
, vsz
- 16, vsz
- 16);
4877 static bool trans_LD1RQ_zprr(DisasContext
*s
, arg_rprr_load
*a
)
4882 if (sve_access_check(s
)) {
4883 int msz
= dtype_msz(a
->dtype
);
4884 TCGv_i64 addr
= new_tmp_a64(s
);
4885 tcg_gen_shli_i64(addr
, cpu_reg(s
, a
->rm
), msz
);
4886 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
4887 do_ldrq(s
, a
->rd
, a
->pg
, addr
, msz
);
4892 static bool trans_LD1RQ_zpri(DisasContext
*s
, arg_rpri_load
*a
)
4894 if (sve_access_check(s
)) {
4895 TCGv_i64 addr
= new_tmp_a64(s
);
4896 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
), a
->imm
* 16);
4897 do_ldrq(s
, a
->rd
, a
->pg
, addr
, dtype_msz(a
->dtype
));
4902 /* Load and broadcast element. */
4903 static bool trans_LD1R_zpri(DisasContext
*s
, arg_rpri_load
*a
)
4905 if (!sve_access_check(s
)) {
4909 unsigned vsz
= vec_full_reg_size(s
);
4910 unsigned psz
= pred_full_reg_size(s
);
4911 unsigned esz
= dtype_esz
[a
->dtype
];
4912 unsigned msz
= dtype_msz(a
->dtype
);
4913 TCGLabel
*over
= gen_new_label();
4916 /* If the guarding predicate has no bits set, no load occurs. */
4918 /* Reduce the pred_esz_masks value simply to reduce the
4919 * size of the code generated here.
4921 uint64_t psz_mask
= MAKE_64BIT_MASK(0, psz
* 8);
4922 temp
= tcg_temp_new_i64();
4923 tcg_gen_ld_i64(temp
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
4924 tcg_gen_andi_i64(temp
, temp
, pred_esz_masks
[esz
] & psz_mask
);
4925 tcg_gen_brcondi_i64(TCG_COND_EQ
, temp
, 0, over
);
4926 tcg_temp_free_i64(temp
);
4928 TCGv_i32 t32
= tcg_temp_new_i32();
4929 find_last_active(s
, t32
, esz
, a
->pg
);
4930 tcg_gen_brcondi_i32(TCG_COND_LT
, t32
, 0, over
);
4931 tcg_temp_free_i32(t32
);
4934 /* Load the data. */
4935 temp
= tcg_temp_new_i64();
4936 tcg_gen_addi_i64(temp
, cpu_reg_sp(s
, a
->rn
), a
->imm
<< msz
);
4937 tcg_gen_qemu_ld_i64(temp
, temp
, get_mem_index(s
),
4938 s
->be_data
| dtype_mop
[a
->dtype
]);
4940 /* Broadcast to *all* elements. */
4941 tcg_gen_gvec_dup_i64(esz
, vec_full_reg_offset(s
, a
->rd
),
4943 tcg_temp_free_i64(temp
);
4945 /* Zero the inactive elements. */
4946 gen_set_label(over
);
4947 do_movz_zpz(s
, a
->rd
, a
->rd
, a
->pg
, esz
);
4951 static void do_st_zpa(DisasContext
*s
, int zt
, int pg
, TCGv_i64 addr
,
4952 int msz
, int esz
, int nreg
)
4954 static gen_helper_gvec_mem
* const fn_single
[2][4][4] = {
4955 { { gen_helper_sve_st1bb_r
,
4956 gen_helper_sve_st1bh_r
,
4957 gen_helper_sve_st1bs_r
,
4958 gen_helper_sve_st1bd_r
},
4960 gen_helper_sve_st1hh_le_r
,
4961 gen_helper_sve_st1hs_le_r
,
4962 gen_helper_sve_st1hd_le_r
},
4964 gen_helper_sve_st1ss_le_r
,
4965 gen_helper_sve_st1sd_le_r
},
4967 gen_helper_sve_st1dd_le_r
} },
4968 { { gen_helper_sve_st1bb_r
,
4969 gen_helper_sve_st1bh_r
,
4970 gen_helper_sve_st1bs_r
,
4971 gen_helper_sve_st1bd_r
},
4973 gen_helper_sve_st1hh_be_r
,
4974 gen_helper_sve_st1hs_be_r
,
4975 gen_helper_sve_st1hd_be_r
},
4977 gen_helper_sve_st1ss_be_r
,
4978 gen_helper_sve_st1sd_be_r
},
4980 gen_helper_sve_st1dd_be_r
} },
4982 static gen_helper_gvec_mem
* const fn_multiple
[2][3][4] = {
4983 { { gen_helper_sve_st2bb_r
,
4984 gen_helper_sve_st2hh_le_r
,
4985 gen_helper_sve_st2ss_le_r
,
4986 gen_helper_sve_st2dd_le_r
},
4987 { gen_helper_sve_st3bb_r
,
4988 gen_helper_sve_st3hh_le_r
,
4989 gen_helper_sve_st3ss_le_r
,
4990 gen_helper_sve_st3dd_le_r
},
4991 { gen_helper_sve_st4bb_r
,
4992 gen_helper_sve_st4hh_le_r
,
4993 gen_helper_sve_st4ss_le_r
,
4994 gen_helper_sve_st4dd_le_r
} },
4995 { { gen_helper_sve_st2bb_r
,
4996 gen_helper_sve_st2hh_be_r
,
4997 gen_helper_sve_st2ss_be_r
,
4998 gen_helper_sve_st2dd_be_r
},
4999 { gen_helper_sve_st3bb_r
,
5000 gen_helper_sve_st3hh_be_r
,
5001 gen_helper_sve_st3ss_be_r
,
5002 gen_helper_sve_st3dd_be_r
},
5003 { gen_helper_sve_st4bb_r
,
5004 gen_helper_sve_st4hh_be_r
,
5005 gen_helper_sve_st4ss_be_r
,
5006 gen_helper_sve_st4dd_be_r
} },
5008 gen_helper_gvec_mem
*fn
;
5009 int be
= s
->be_data
== MO_BE
;
5013 fn
= fn_single
[be
][msz
][esz
];
5015 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5017 fn
= fn_multiple
[be
][nreg
- 1][msz
];
5020 do_mem_zpa(s
, zt
, pg
, addr
, msz_dtype(s
, msz
), fn
);
5023 static bool trans_ST_zprr(DisasContext
*s
, arg_rprr_store
*a
)
5025 if (a
->rm
== 31 || a
->msz
> a
->esz
) {
5028 if (sve_access_check(s
)) {
5029 TCGv_i64 addr
= new_tmp_a64(s
);
5030 tcg_gen_shli_i64(addr
, cpu_reg(s
, a
->rm
), a
->msz
);
5031 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
5032 do_st_zpa(s
, a
->rd
, a
->pg
, addr
, a
->msz
, a
->esz
, a
->nreg
);
5037 static bool trans_ST_zpri(DisasContext
*s
, arg_rpri_store
*a
)
5039 if (a
->msz
> a
->esz
) {
5042 if (sve_access_check(s
)) {
5043 int vsz
= vec_full_reg_size(s
);
5044 int elements
= vsz
>> a
->esz
;
5045 TCGv_i64 addr
= new_tmp_a64(s
);
5047 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
),
5048 (a
->imm
* elements
* (a
->nreg
+ 1)) << a
->msz
);
5049 do_st_zpa(s
, a
->rd
, a
->pg
, addr
, a
->msz
, a
->esz
, a
->nreg
);
5055 *** SVE gather loads / scatter stores
5058 static void do_mem_zpz(DisasContext
*s
, int zt
, int pg
, int zm
,
5059 int scale
, TCGv_i64 scalar
, int msz
,
5060 gen_helper_gvec_mem_scatter
*fn
)
5062 unsigned vsz
= vec_full_reg_size(s
);
5063 TCGv_ptr t_zm
= tcg_temp_new_ptr();
5064 TCGv_ptr t_pg
= tcg_temp_new_ptr();
5065 TCGv_ptr t_zt
= tcg_temp_new_ptr();
5069 desc
= sve_memopidx(s
, msz_dtype(s
, msz
));
5070 desc
|= scale
<< MEMOPIDX_SHIFT
;
5071 desc
= simd_desc(vsz
, vsz
, desc
);
5072 t_desc
= tcg_const_i32(desc
);
5074 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
5075 tcg_gen_addi_ptr(t_zm
, cpu_env
, vec_full_reg_offset(s
, zm
));
5076 tcg_gen_addi_ptr(t_zt
, cpu_env
, vec_full_reg_offset(s
, zt
));
5077 fn(cpu_env
, t_zt
, t_pg
, t_zm
, scalar
, t_desc
);
5079 tcg_temp_free_ptr(t_zt
);
5080 tcg_temp_free_ptr(t_zm
);
5081 tcg_temp_free_ptr(t_pg
);
5082 tcg_temp_free_i32(t_desc
);
5085 /* Indexed by [be][ff][xs][u][msz]. */
5086 static gen_helper_gvec_mem_scatter
* const gather_load_fn32
[2][2][2][2][3] = {
5088 { { { { gen_helper_sve_ldbss_zsu
,
5089 gen_helper_sve_ldhss_le_zsu
,
5091 { gen_helper_sve_ldbsu_zsu
,
5092 gen_helper_sve_ldhsu_le_zsu
,
5093 gen_helper_sve_ldss_le_zsu
, } },
5094 { { gen_helper_sve_ldbss_zss
,
5095 gen_helper_sve_ldhss_le_zss
,
5097 { gen_helper_sve_ldbsu_zss
,
5098 gen_helper_sve_ldhsu_le_zss
,
5099 gen_helper_sve_ldss_le_zss
, } } },
5102 { { { gen_helper_sve_ldffbss_zsu
,
5103 gen_helper_sve_ldffhss_le_zsu
,
5105 { gen_helper_sve_ldffbsu_zsu
,
5106 gen_helper_sve_ldffhsu_le_zsu
,
5107 gen_helper_sve_ldffss_le_zsu
, } },
5108 { { gen_helper_sve_ldffbss_zss
,
5109 gen_helper_sve_ldffhss_le_zss
,
5111 { gen_helper_sve_ldffbsu_zss
,
5112 gen_helper_sve_ldffhsu_le_zss
,
5113 gen_helper_sve_ldffss_le_zss
, } } } },
5116 { { { { gen_helper_sve_ldbss_zsu
,
5117 gen_helper_sve_ldhss_be_zsu
,
5119 { gen_helper_sve_ldbsu_zsu
,
5120 gen_helper_sve_ldhsu_be_zsu
,
5121 gen_helper_sve_ldss_be_zsu
, } },
5122 { { gen_helper_sve_ldbss_zss
,
5123 gen_helper_sve_ldhss_be_zss
,
5125 { gen_helper_sve_ldbsu_zss
,
5126 gen_helper_sve_ldhsu_be_zss
,
5127 gen_helper_sve_ldss_be_zss
, } } },
5130 { { { gen_helper_sve_ldffbss_zsu
,
5131 gen_helper_sve_ldffhss_be_zsu
,
5133 { gen_helper_sve_ldffbsu_zsu
,
5134 gen_helper_sve_ldffhsu_be_zsu
,
5135 gen_helper_sve_ldffss_be_zsu
, } },
5136 { { gen_helper_sve_ldffbss_zss
,
5137 gen_helper_sve_ldffhss_be_zss
,
5139 { gen_helper_sve_ldffbsu_zss
,
5140 gen_helper_sve_ldffhsu_be_zss
,
5141 gen_helper_sve_ldffss_be_zss
, } } } },
5144 /* Note that we overload xs=2 to indicate 64-bit offset. */
5145 static gen_helper_gvec_mem_scatter
* const gather_load_fn64
[2][2][3][2][4] = {
5147 { { { { gen_helper_sve_ldbds_zsu
,
5148 gen_helper_sve_ldhds_le_zsu
,
5149 gen_helper_sve_ldsds_le_zsu
,
5151 { gen_helper_sve_ldbdu_zsu
,
5152 gen_helper_sve_ldhdu_le_zsu
,
5153 gen_helper_sve_ldsdu_le_zsu
,
5154 gen_helper_sve_lddd_le_zsu
, } },
5155 { { gen_helper_sve_ldbds_zss
,
5156 gen_helper_sve_ldhds_le_zss
,
5157 gen_helper_sve_ldsds_le_zss
,
5159 { gen_helper_sve_ldbdu_zss
,
5160 gen_helper_sve_ldhdu_le_zss
,
5161 gen_helper_sve_ldsdu_le_zss
,
5162 gen_helper_sve_lddd_le_zss
, } },
5163 { { gen_helper_sve_ldbds_zd
,
5164 gen_helper_sve_ldhds_le_zd
,
5165 gen_helper_sve_ldsds_le_zd
,
5167 { gen_helper_sve_ldbdu_zd
,
5168 gen_helper_sve_ldhdu_le_zd
,
5169 gen_helper_sve_ldsdu_le_zd
,
5170 gen_helper_sve_lddd_le_zd
, } } },
5173 { { { gen_helper_sve_ldffbds_zsu
,
5174 gen_helper_sve_ldffhds_le_zsu
,
5175 gen_helper_sve_ldffsds_le_zsu
,
5177 { gen_helper_sve_ldffbdu_zsu
,
5178 gen_helper_sve_ldffhdu_le_zsu
,
5179 gen_helper_sve_ldffsdu_le_zsu
,
5180 gen_helper_sve_ldffdd_le_zsu
, } },
5181 { { gen_helper_sve_ldffbds_zss
,
5182 gen_helper_sve_ldffhds_le_zss
,
5183 gen_helper_sve_ldffsds_le_zss
,
5185 { gen_helper_sve_ldffbdu_zss
,
5186 gen_helper_sve_ldffhdu_le_zss
,
5187 gen_helper_sve_ldffsdu_le_zss
,
5188 gen_helper_sve_ldffdd_le_zss
, } },
5189 { { gen_helper_sve_ldffbds_zd
,
5190 gen_helper_sve_ldffhds_le_zd
,
5191 gen_helper_sve_ldffsds_le_zd
,
5193 { gen_helper_sve_ldffbdu_zd
,
5194 gen_helper_sve_ldffhdu_le_zd
,
5195 gen_helper_sve_ldffsdu_le_zd
,
5196 gen_helper_sve_ldffdd_le_zd
, } } } },
5199 { { { { gen_helper_sve_ldbds_zsu
,
5200 gen_helper_sve_ldhds_be_zsu
,
5201 gen_helper_sve_ldsds_be_zsu
,
5203 { gen_helper_sve_ldbdu_zsu
,
5204 gen_helper_sve_ldhdu_be_zsu
,
5205 gen_helper_sve_ldsdu_be_zsu
,
5206 gen_helper_sve_lddd_be_zsu
, } },
5207 { { gen_helper_sve_ldbds_zss
,
5208 gen_helper_sve_ldhds_be_zss
,
5209 gen_helper_sve_ldsds_be_zss
,
5211 { gen_helper_sve_ldbdu_zss
,
5212 gen_helper_sve_ldhdu_be_zss
,
5213 gen_helper_sve_ldsdu_be_zss
,
5214 gen_helper_sve_lddd_be_zss
, } },
5215 { { gen_helper_sve_ldbds_zd
,
5216 gen_helper_sve_ldhds_be_zd
,
5217 gen_helper_sve_ldsds_be_zd
,
5219 { gen_helper_sve_ldbdu_zd
,
5220 gen_helper_sve_ldhdu_be_zd
,
5221 gen_helper_sve_ldsdu_be_zd
,
5222 gen_helper_sve_lddd_be_zd
, } } },
5225 { { { gen_helper_sve_ldffbds_zsu
,
5226 gen_helper_sve_ldffhds_be_zsu
,
5227 gen_helper_sve_ldffsds_be_zsu
,
5229 { gen_helper_sve_ldffbdu_zsu
,
5230 gen_helper_sve_ldffhdu_be_zsu
,
5231 gen_helper_sve_ldffsdu_be_zsu
,
5232 gen_helper_sve_ldffdd_be_zsu
, } },
5233 { { gen_helper_sve_ldffbds_zss
,
5234 gen_helper_sve_ldffhds_be_zss
,
5235 gen_helper_sve_ldffsds_be_zss
,
5237 { gen_helper_sve_ldffbdu_zss
,
5238 gen_helper_sve_ldffhdu_be_zss
,
5239 gen_helper_sve_ldffsdu_be_zss
,
5240 gen_helper_sve_ldffdd_be_zss
, } },
5241 { { gen_helper_sve_ldffbds_zd
,
5242 gen_helper_sve_ldffhds_be_zd
,
5243 gen_helper_sve_ldffsds_be_zd
,
5245 { gen_helper_sve_ldffbdu_zd
,
5246 gen_helper_sve_ldffhdu_be_zd
,
5247 gen_helper_sve_ldffsdu_be_zd
,
5248 gen_helper_sve_ldffdd_be_zd
, } } } },
5251 static bool trans_LD1_zprz(DisasContext
*s
, arg_LD1_zprz
*a
)
5253 gen_helper_gvec_mem_scatter
*fn
= NULL
;
5254 int be
= s
->be_data
== MO_BE
;
5256 if (!sve_access_check(s
)) {
5262 fn
= gather_load_fn32
[be
][a
->ff
][a
->xs
][a
->u
][a
->msz
];
5265 fn
= gather_load_fn64
[be
][a
->ff
][a
->xs
][a
->u
][a
->msz
];
5270 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rm
, a
->scale
* a
->msz
,
5271 cpu_reg_sp(s
, a
->rn
), a
->msz
, fn
);
5275 static bool trans_LD1_zpiz(DisasContext
*s
, arg_LD1_zpiz
*a
)
5277 gen_helper_gvec_mem_scatter
*fn
= NULL
;
5278 int be
= s
->be_data
== MO_BE
;
5281 if (a
->esz
< a
->msz
|| (a
->esz
== a
->msz
&& !a
->u
)) {
5284 if (!sve_access_check(s
)) {
5290 fn
= gather_load_fn32
[be
][a
->ff
][0][a
->u
][a
->msz
];
5293 fn
= gather_load_fn64
[be
][a
->ff
][2][a
->u
][a
->msz
];
5298 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5299 * by loading the immediate into the scalar parameter.
5301 imm
= tcg_const_i64(a
->imm
<< a
->msz
);
5302 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rn
, 0, imm
, a
->msz
, fn
);
5303 tcg_temp_free_i64(imm
);
5307 /* Indexed by [be][xs][msz]. */
5308 static gen_helper_gvec_mem_scatter
* const scatter_store_fn32
[2][2][3] = {
5310 { { gen_helper_sve_stbs_zsu
,
5311 gen_helper_sve_sths_le_zsu
,
5312 gen_helper_sve_stss_le_zsu
, },
5313 { gen_helper_sve_stbs_zss
,
5314 gen_helper_sve_sths_le_zss
,
5315 gen_helper_sve_stss_le_zss
, } },
5317 { { gen_helper_sve_stbs_zsu
,
5318 gen_helper_sve_sths_be_zsu
,
5319 gen_helper_sve_stss_be_zsu
, },
5320 { gen_helper_sve_stbs_zss
,
5321 gen_helper_sve_sths_be_zss
,
5322 gen_helper_sve_stss_be_zss
, } },
5325 /* Note that we overload xs=2 to indicate 64-bit offset. */
5326 static gen_helper_gvec_mem_scatter
* const scatter_store_fn64
[2][3][4] = {
5328 { { gen_helper_sve_stbd_zsu
,
5329 gen_helper_sve_sthd_le_zsu
,
5330 gen_helper_sve_stsd_le_zsu
,
5331 gen_helper_sve_stdd_le_zsu
, },
5332 { gen_helper_sve_stbd_zss
,
5333 gen_helper_sve_sthd_le_zss
,
5334 gen_helper_sve_stsd_le_zss
,
5335 gen_helper_sve_stdd_le_zss
, },
5336 { gen_helper_sve_stbd_zd
,
5337 gen_helper_sve_sthd_le_zd
,
5338 gen_helper_sve_stsd_le_zd
,
5339 gen_helper_sve_stdd_le_zd
, } },
5341 { { gen_helper_sve_stbd_zsu
,
5342 gen_helper_sve_sthd_be_zsu
,
5343 gen_helper_sve_stsd_be_zsu
,
5344 gen_helper_sve_stdd_be_zsu
, },
5345 { gen_helper_sve_stbd_zss
,
5346 gen_helper_sve_sthd_be_zss
,
5347 gen_helper_sve_stsd_be_zss
,
5348 gen_helper_sve_stdd_be_zss
, },
5349 { gen_helper_sve_stbd_zd
,
5350 gen_helper_sve_sthd_be_zd
,
5351 gen_helper_sve_stsd_be_zd
,
5352 gen_helper_sve_stdd_be_zd
, } },
5355 static bool trans_ST1_zprz(DisasContext
*s
, arg_ST1_zprz
*a
)
5357 gen_helper_gvec_mem_scatter
*fn
;
5358 int be
= s
->be_data
== MO_BE
;
5360 if (a
->esz
< a
->msz
|| (a
->msz
== 0 && a
->scale
)) {
5363 if (!sve_access_check(s
)) {
5368 fn
= scatter_store_fn32
[be
][a
->xs
][a
->msz
];
5371 fn
= scatter_store_fn64
[be
][a
->xs
][a
->msz
];
5374 g_assert_not_reached();
5376 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rm
, a
->scale
* a
->msz
,
5377 cpu_reg_sp(s
, a
->rn
), a
->msz
, fn
);
5381 static bool trans_ST1_zpiz(DisasContext
*s
, arg_ST1_zpiz
*a
)
5383 gen_helper_gvec_mem_scatter
*fn
= NULL
;
5384 int be
= s
->be_data
== MO_BE
;
5387 if (a
->esz
< a
->msz
) {
5390 if (!sve_access_check(s
)) {
5396 fn
= scatter_store_fn32
[be
][0][a
->msz
];
5399 fn
= scatter_store_fn64
[be
][2][a
->msz
];
5404 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5405 * by loading the immediate into the scalar parameter.
5407 imm
= tcg_const_i64(a
->imm
<< a
->msz
);
5408 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rn
, 0, imm
, a
->msz
, fn
);
5409 tcg_temp_free_i64(imm
);
5417 static bool trans_PRF(DisasContext
*s
, arg_PRF
*a
)
5419 /* Prefetch is a nop within QEMU. */
5420 (void)sve_access_check(s
);
5424 static bool trans_PRF_rr(DisasContext
*s
, arg_PRF_rr
*a
)
5429 /* Prefetch is a nop within QEMU. */
5430 (void)sve_access_check(s
);
5437 * TODO: The implementation so far could handle predicated merging movprfx.
5438 * The helper functions as written take an extra source register to
5439 * use in the operation, but the result is only written when predication
5440 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
5441 * to allow the final write back to the destination to be unconditional.
5442 * For predicated zeroing movprfx, we need to rearrange the helpers to
5443 * allow the final write back to zero inactives.
5445 * In the meantime, just emit the moves.
5448 static bool trans_MOVPRFX(DisasContext
*s
, arg_MOVPRFX
*a
)
5450 return do_mov_z(s
, a
->rd
, a
->rn
);
5453 static bool trans_MOVPRFX_m(DisasContext
*s
, arg_rpr_esz
*a
)
5455 if (sve_access_check(s
)) {
5456 do_sel_z(s
, a
->rd
, a
->rn
, a
->rd
, a
->pg
, a
->esz
);
5461 static bool trans_MOVPRFX_z(DisasContext
*s
, arg_rpr_esz
*a
)
5463 if (sve_access_check(s
)) {
5464 do_movz_zpz(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
);