2 * AArch64 SVE translation
4 * Copyright (c) 2018 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
22 #include "exec/exec-all.h"
24 #include "tcg-op-gvec.h"
25 #include "tcg-gvec-desc.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
35 #include "fpu/softfloat.h"
38 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
39 TCGv_i64
, uint32_t, uint32_t);
41 typedef void gen_helper_gvec_flags_3(TCGv_i32
, TCGv_ptr
, TCGv_ptr
,
43 typedef void gen_helper_gvec_flags_4(TCGv_i32
, TCGv_ptr
, TCGv_ptr
,
44 TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
46 typedef void gen_helper_gvec_mem(TCGv_env
, TCGv_ptr
, TCGv_i64
, TCGv_i32
);
47 typedef void gen_helper_gvec_mem_scatter(TCGv_env
, TCGv_ptr
, TCGv_ptr
,
48 TCGv_ptr
, TCGv_i64
, TCGv_i32
);
51 * Helpers for extracting complex instruction fields.
54 /* See e.g. ASR (immediate, predicated).
55 * Returns -1 for unallocated encoding; diagnose later.
57 static int tszimm_esz(int x
)
59 x
>>= 3; /* discard imm3 */
63 static int tszimm_shr(int x
)
65 return (16 << tszimm_esz(x
)) - x
;
68 /* See e.g. LSL (immediate, predicated). */
69 static int tszimm_shl(int x
)
71 return x
- (8 << tszimm_esz(x
));
74 static inline int plus1(int x
)
79 /* The SH bit is in bit 8. Extract the low 8 and shift. */
80 static inline int expand_imm_sh8s(int x
)
82 return (int8_t)x
<< (x
& 0x100 ? 8 : 0);
85 static inline int expand_imm_sh8u(int x
)
87 return (uint8_t)x
<< (x
& 0x100 ? 8 : 0);
90 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
91 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
93 static inline int msz_dtype(int msz
)
95 static const uint8_t dtype
[4] = { 0, 5, 10, 15 };
100 * Include the generated decoder.
103 #include "decode-sve.inc.c"
106 * Implement all of the translator functions referenced by the decoder.
109 /* Return the offset info CPUARMState of the predicate vector register Pn.
110 * Note for this purpose, FFR is P16.
112 static inline int pred_full_reg_offset(DisasContext
*s
, int regno
)
114 return offsetof(CPUARMState
, vfp
.pregs
[regno
]);
117 /* Return the byte size of the whole predicate register, VL / 64. */
118 static inline int pred_full_reg_size(DisasContext
*s
)
120 return s
->sve_len
>> 3;
123 /* Round up the size of a register to a size allowed by
124 * the tcg vector infrastructure. Any operation which uses this
125 * size may assume that the bits above pred_full_reg_size are zero,
126 * and must leave them the same way.
128 * Note that this is not needed for the vector registers as they
129 * are always properly sized for tcg vectors.
131 static int size_for_gvec(int size
)
136 return QEMU_ALIGN_UP(size
, 16);
140 static int pred_gvec_reg_size(DisasContext
*s
)
142 return size_for_gvec(pred_full_reg_size(s
));
145 /* Invoke a vector expander on two Zregs. */
146 static bool do_vector2_z(DisasContext
*s
, GVecGen2Fn
*gvec_fn
,
147 int esz
, int rd
, int rn
)
149 if (sve_access_check(s
)) {
150 unsigned vsz
= vec_full_reg_size(s
);
151 gvec_fn(esz
, vec_full_reg_offset(s
, rd
),
152 vec_full_reg_offset(s
, rn
), vsz
, vsz
);
157 /* Invoke a vector expander on three Zregs. */
158 static bool do_vector3_z(DisasContext
*s
, GVecGen3Fn
*gvec_fn
,
159 int esz
, int rd
, int rn
, int rm
)
161 if (sve_access_check(s
)) {
162 unsigned vsz
= vec_full_reg_size(s
);
163 gvec_fn(esz
, vec_full_reg_offset(s
, rd
),
164 vec_full_reg_offset(s
, rn
),
165 vec_full_reg_offset(s
, rm
), vsz
, vsz
);
170 /* Invoke a vector move on two Zregs. */
171 static bool do_mov_z(DisasContext
*s
, int rd
, int rn
)
173 return do_vector2_z(s
, tcg_gen_gvec_mov
, 0, rd
, rn
);
176 /* Initialize a Zreg with replications of a 64-bit immediate. */
177 static void do_dupi_z(DisasContext
*s
, int rd
, uint64_t word
)
179 unsigned vsz
= vec_full_reg_size(s
);
180 tcg_gen_gvec_dup64i(vec_full_reg_offset(s
, rd
), vsz
, vsz
, word
);
183 /* Invoke a vector expander on two Pregs. */
184 static bool do_vector2_p(DisasContext
*s
, GVecGen2Fn
*gvec_fn
,
185 int esz
, int rd
, int rn
)
187 if (sve_access_check(s
)) {
188 unsigned psz
= pred_gvec_reg_size(s
);
189 gvec_fn(esz
, pred_full_reg_offset(s
, rd
),
190 pred_full_reg_offset(s
, rn
), psz
, psz
);
195 /* Invoke a vector expander on three Pregs. */
196 static bool do_vector3_p(DisasContext
*s
, GVecGen3Fn
*gvec_fn
,
197 int esz
, int rd
, int rn
, int rm
)
199 if (sve_access_check(s
)) {
200 unsigned psz
= pred_gvec_reg_size(s
);
201 gvec_fn(esz
, pred_full_reg_offset(s
, rd
),
202 pred_full_reg_offset(s
, rn
),
203 pred_full_reg_offset(s
, rm
), psz
, psz
);
208 /* Invoke a vector operation on four Pregs. */
209 static bool do_vecop4_p(DisasContext
*s
, const GVecGen4
*gvec_op
,
210 int rd
, int rn
, int rm
, int rg
)
212 if (sve_access_check(s
)) {
213 unsigned psz
= pred_gvec_reg_size(s
);
214 tcg_gen_gvec_4(pred_full_reg_offset(s
, rd
),
215 pred_full_reg_offset(s
, rn
),
216 pred_full_reg_offset(s
, rm
),
217 pred_full_reg_offset(s
, rg
),
223 /* Invoke a vector move on two Pregs. */
224 static bool do_mov_p(DisasContext
*s
, int rd
, int rn
)
226 return do_vector2_p(s
, tcg_gen_gvec_mov
, 0, rd
, rn
);
229 /* Set the cpu flags as per a return from an SVE helper. */
230 static void do_pred_flags(TCGv_i32 t
)
232 tcg_gen_mov_i32(cpu_NF
, t
);
233 tcg_gen_andi_i32(cpu_ZF
, t
, 2);
234 tcg_gen_andi_i32(cpu_CF
, t
, 1);
235 tcg_gen_movi_i32(cpu_VF
, 0);
238 /* Subroutines computing the ARM PredTest psuedofunction. */
239 static void do_predtest1(TCGv_i64 d
, TCGv_i64 g
)
241 TCGv_i32 t
= tcg_temp_new_i32();
243 gen_helper_sve_predtest1(t
, d
, g
);
245 tcg_temp_free_i32(t
);
248 static void do_predtest(DisasContext
*s
, int dofs
, int gofs
, int words
)
250 TCGv_ptr dptr
= tcg_temp_new_ptr();
251 TCGv_ptr gptr
= tcg_temp_new_ptr();
254 tcg_gen_addi_ptr(dptr
, cpu_env
, dofs
);
255 tcg_gen_addi_ptr(gptr
, cpu_env
, gofs
);
256 t
= tcg_const_i32(words
);
258 gen_helper_sve_predtest(t
, dptr
, gptr
, t
);
259 tcg_temp_free_ptr(dptr
);
260 tcg_temp_free_ptr(gptr
);
263 tcg_temp_free_i32(t
);
266 /* For each element size, the bits within a predicate word that are active. */
267 const uint64_t pred_esz_masks
[4] = {
268 0xffffffffffffffffull
, 0x5555555555555555ull
,
269 0x1111111111111111ull
, 0x0101010101010101ull
273 *** SVE Logical - Unpredicated Group
276 static bool trans_AND_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
278 return do_vector3_z(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->rm
);
281 static bool trans_ORR_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
283 if (a
->rn
== a
->rm
) { /* MOV */
284 return do_mov_z(s
, a
->rd
, a
->rn
);
286 return do_vector3_z(s
, tcg_gen_gvec_or
, 0, a
->rd
, a
->rn
, a
->rm
);
290 static bool trans_EOR_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
292 return do_vector3_z(s
, tcg_gen_gvec_xor
, 0, a
->rd
, a
->rn
, a
->rm
);
295 static bool trans_BIC_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
297 return do_vector3_z(s
, tcg_gen_gvec_andc
, 0, a
->rd
, a
->rn
, a
->rm
);
301 *** SVE Integer Arithmetic - Unpredicated Group
304 static bool trans_ADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
306 return do_vector3_z(s
, tcg_gen_gvec_add
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
309 static bool trans_SUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
311 return do_vector3_z(s
, tcg_gen_gvec_sub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
314 static bool trans_SQADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
316 return do_vector3_z(s
, tcg_gen_gvec_ssadd
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
319 static bool trans_SQSUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
321 return do_vector3_z(s
, tcg_gen_gvec_sssub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
324 static bool trans_UQADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
326 return do_vector3_z(s
, tcg_gen_gvec_usadd
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
329 static bool trans_UQSUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
331 return do_vector3_z(s
, tcg_gen_gvec_ussub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
335 *** SVE Integer Arithmetic - Binary Predicated Group
338 static bool do_zpzz_ool(DisasContext
*s
, arg_rprr_esz
*a
, gen_helper_gvec_4
*fn
)
340 unsigned vsz
= vec_full_reg_size(s
);
344 if (sve_access_check(s
)) {
345 tcg_gen_gvec_4_ool(vec_full_reg_offset(s
, a
->rd
),
346 vec_full_reg_offset(s
, a
->rn
),
347 vec_full_reg_offset(s
, a
->rm
),
348 pred_full_reg_offset(s
, a
->pg
),
354 /* Select active elememnts from Zn and inactive elements from Zm,
355 * storing the result in Zd.
357 static void do_sel_z(DisasContext
*s
, int rd
, int rn
, int rm
, int pg
, int esz
)
359 static gen_helper_gvec_4
* const fns
[4] = {
360 gen_helper_sve_sel_zpzz_b
, gen_helper_sve_sel_zpzz_h
,
361 gen_helper_sve_sel_zpzz_s
, gen_helper_sve_sel_zpzz_d
363 unsigned vsz
= vec_full_reg_size(s
);
364 tcg_gen_gvec_4_ool(vec_full_reg_offset(s
, rd
),
365 vec_full_reg_offset(s
, rn
),
366 vec_full_reg_offset(s
, rm
),
367 pred_full_reg_offset(s
, pg
),
368 vsz
, vsz
, 0, fns
[esz
]);
371 #define DO_ZPZZ(NAME, name) \
372 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
375 static gen_helper_gvec_4 * const fns[4] = { \
376 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
377 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
379 return do_zpzz_ool(s, a, fns[a->esz]); \
398 DO_ZPZZ(SMULH
, smulh
)
399 DO_ZPZZ(UMULH
, umulh
)
405 static bool trans_SDIV_zpzz(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
407 static gen_helper_gvec_4
* const fns
[4] = {
408 NULL
, NULL
, gen_helper_sve_sdiv_zpzz_s
, gen_helper_sve_sdiv_zpzz_d
410 return do_zpzz_ool(s
, a
, fns
[a
->esz
]);
413 static bool trans_UDIV_zpzz(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
415 static gen_helper_gvec_4
* const fns
[4] = {
416 NULL
, NULL
, gen_helper_sve_udiv_zpzz_s
, gen_helper_sve_udiv_zpzz_d
418 return do_zpzz_ool(s
, a
, fns
[a
->esz
]);
421 static bool trans_SEL_zpzz(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
423 if (sve_access_check(s
)) {
424 do_sel_z(s
, a
->rd
, a
->rn
, a
->rm
, a
->pg
, a
->esz
);
432 *** SVE Integer Arithmetic - Unary Predicated Group
435 static bool do_zpz_ool(DisasContext
*s
, arg_rpr_esz
*a
, gen_helper_gvec_3
*fn
)
440 if (sve_access_check(s
)) {
441 unsigned vsz
= vec_full_reg_size(s
);
442 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
443 vec_full_reg_offset(s
, a
->rn
),
444 pred_full_reg_offset(s
, a
->pg
),
450 #define DO_ZPZ(NAME, name) \
451 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
453 static gen_helper_gvec_3 * const fns[4] = { \
454 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
455 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
457 return do_zpz_ool(s, a, fns[a->esz]); \
462 DO_ZPZ(CNT_zpz
, cnt_zpz
)
464 DO_ZPZ(NOT_zpz
, not_zpz
)
468 static bool trans_FABS(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
470 static gen_helper_gvec_3
* const fns
[4] = {
472 gen_helper_sve_fabs_h
,
473 gen_helper_sve_fabs_s
,
474 gen_helper_sve_fabs_d
476 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
479 static bool trans_FNEG(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
481 static gen_helper_gvec_3
* const fns
[4] = {
483 gen_helper_sve_fneg_h
,
484 gen_helper_sve_fneg_s
,
485 gen_helper_sve_fneg_d
487 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
490 static bool trans_SXTB(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
492 static gen_helper_gvec_3
* const fns
[4] = {
494 gen_helper_sve_sxtb_h
,
495 gen_helper_sve_sxtb_s
,
496 gen_helper_sve_sxtb_d
498 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
501 static bool trans_UXTB(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
503 static gen_helper_gvec_3
* const fns
[4] = {
505 gen_helper_sve_uxtb_h
,
506 gen_helper_sve_uxtb_s
,
507 gen_helper_sve_uxtb_d
509 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
512 static bool trans_SXTH(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
514 static gen_helper_gvec_3
* const fns
[4] = {
516 gen_helper_sve_sxth_s
,
517 gen_helper_sve_sxth_d
519 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
522 static bool trans_UXTH(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
524 static gen_helper_gvec_3
* const fns
[4] = {
526 gen_helper_sve_uxth_s
,
527 gen_helper_sve_uxth_d
529 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
532 static bool trans_SXTW(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
534 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_sxtw_d
: NULL
);
537 static bool trans_UXTW(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
539 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_uxtw_d
: NULL
);
545 *** SVE Integer Reduction Group
548 typedef void gen_helper_gvec_reduc(TCGv_i64
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
549 static bool do_vpz_ool(DisasContext
*s
, arg_rpr_esz
*a
,
550 gen_helper_gvec_reduc
*fn
)
552 unsigned vsz
= vec_full_reg_size(s
);
560 if (!sve_access_check(s
)) {
564 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
565 temp
= tcg_temp_new_i64();
566 t_zn
= tcg_temp_new_ptr();
567 t_pg
= tcg_temp_new_ptr();
569 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
570 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
571 fn(temp
, t_zn
, t_pg
, desc
);
572 tcg_temp_free_ptr(t_zn
);
573 tcg_temp_free_ptr(t_pg
);
574 tcg_temp_free_i32(desc
);
576 write_fp_dreg(s
, a
->rd
, temp
);
577 tcg_temp_free_i64(temp
);
581 #define DO_VPZ(NAME, name) \
582 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
584 static gen_helper_gvec_reduc * const fns[4] = { \
585 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
586 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
588 return do_vpz_ool(s, a, fns[a->esz]); \
601 static bool trans_SADDV(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
603 static gen_helper_gvec_reduc
* const fns
[4] = {
604 gen_helper_sve_saddv_b
, gen_helper_sve_saddv_h
,
605 gen_helper_sve_saddv_s
, NULL
607 return do_vpz_ool(s
, a
, fns
[a
->esz
]);
613 *** SVE Shift by Immediate - Predicated Group
616 /* Store zero into every active element of Zd. We will use this for two
617 * and three-operand predicated instructions for which logic dictates a
620 static bool do_clr_zp(DisasContext
*s
, int rd
, int pg
, int esz
)
622 static gen_helper_gvec_2
* const fns
[4] = {
623 gen_helper_sve_clr_b
, gen_helper_sve_clr_h
,
624 gen_helper_sve_clr_s
, gen_helper_sve_clr_d
,
626 if (sve_access_check(s
)) {
627 unsigned vsz
= vec_full_reg_size(s
);
628 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, rd
),
629 pred_full_reg_offset(s
, pg
),
630 vsz
, vsz
, 0, fns
[esz
]);
635 /* Copy Zn into Zd, storing zeros into inactive elements. */
636 static void do_movz_zpz(DisasContext
*s
, int rd
, int rn
, int pg
, int esz
)
638 static gen_helper_gvec_3
* const fns
[4] = {
639 gen_helper_sve_movz_b
, gen_helper_sve_movz_h
,
640 gen_helper_sve_movz_s
, gen_helper_sve_movz_d
,
642 unsigned vsz
= vec_full_reg_size(s
);
643 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, rd
),
644 vec_full_reg_offset(s
, rn
),
645 pred_full_reg_offset(s
, pg
),
646 vsz
, vsz
, 0, fns
[esz
]);
649 static bool do_zpzi_ool(DisasContext
*s
, arg_rpri_esz
*a
,
650 gen_helper_gvec_3
*fn
)
652 if (sve_access_check(s
)) {
653 unsigned vsz
= vec_full_reg_size(s
);
654 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
655 vec_full_reg_offset(s
, a
->rn
),
656 pred_full_reg_offset(s
, a
->pg
),
657 vsz
, vsz
, a
->imm
, fn
);
662 static bool trans_ASR_zpzi(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
664 static gen_helper_gvec_3
* const fns
[4] = {
665 gen_helper_sve_asr_zpzi_b
, gen_helper_sve_asr_zpzi_h
,
666 gen_helper_sve_asr_zpzi_s
, gen_helper_sve_asr_zpzi_d
,
669 /* Invalid tsz encoding -- see tszimm_esz. */
672 /* Shift by element size is architecturally valid. For
673 arithmetic right-shift, it's the same as by one less. */
674 a
->imm
= MIN(a
->imm
, (8 << a
->esz
) - 1);
675 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
678 static bool trans_LSR_zpzi(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
680 static gen_helper_gvec_3
* const fns
[4] = {
681 gen_helper_sve_lsr_zpzi_b
, gen_helper_sve_lsr_zpzi_h
,
682 gen_helper_sve_lsr_zpzi_s
, gen_helper_sve_lsr_zpzi_d
,
687 /* Shift by element size is architecturally valid.
688 For logical shifts, it is a zeroing operation. */
689 if (a
->imm
>= (8 << a
->esz
)) {
690 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
692 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
696 static bool trans_LSL_zpzi(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
698 static gen_helper_gvec_3
* const fns
[4] = {
699 gen_helper_sve_lsl_zpzi_b
, gen_helper_sve_lsl_zpzi_h
,
700 gen_helper_sve_lsl_zpzi_s
, gen_helper_sve_lsl_zpzi_d
,
705 /* Shift by element size is architecturally valid.
706 For logical shifts, it is a zeroing operation. */
707 if (a
->imm
>= (8 << a
->esz
)) {
708 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
710 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
714 static bool trans_ASRD(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
716 static gen_helper_gvec_3
* const fns
[4] = {
717 gen_helper_sve_asrd_b
, gen_helper_sve_asrd_h
,
718 gen_helper_sve_asrd_s
, gen_helper_sve_asrd_d
,
723 /* Shift by element size is architecturally valid. For arithmetic
724 right shift for division, it is a zeroing operation. */
725 if (a
->imm
>= (8 << a
->esz
)) {
726 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
728 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
733 *** SVE Bitwise Shift - Predicated Group
736 #define DO_ZPZW(NAME, name) \
737 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
740 static gen_helper_gvec_4 * const fns[3] = { \
741 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
742 gen_helper_sve_##name##_zpzw_s, \
744 if (a->esz < 0 || a->esz >= 3) { \
747 return do_zpzz_ool(s, a, fns[a->esz]); \
757 *** SVE Bitwise Shift - Unpredicated Group
760 static bool do_shift_imm(DisasContext
*s
, arg_rri_esz
*a
, bool asr
,
761 void (*gvec_fn
)(unsigned, uint32_t, uint32_t,
762 int64_t, uint32_t, uint32_t))
765 /* Invalid tsz encoding -- see tszimm_esz. */
768 if (sve_access_check(s
)) {
769 unsigned vsz
= vec_full_reg_size(s
);
770 /* Shift by element size is architecturally valid. For
771 arithmetic right-shift, it's the same as by one less.
772 Otherwise it is a zeroing operation. */
773 if (a
->imm
>= 8 << a
->esz
) {
775 a
->imm
= (8 << a
->esz
) - 1;
777 do_dupi_z(s
, a
->rd
, 0);
781 gvec_fn(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
782 vec_full_reg_offset(s
, a
->rn
), a
->imm
, vsz
, vsz
);
787 static bool trans_ASR_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
789 return do_shift_imm(s
, a
, true, tcg_gen_gvec_sari
);
792 static bool trans_LSR_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
794 return do_shift_imm(s
, a
, false, tcg_gen_gvec_shri
);
797 static bool trans_LSL_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
799 return do_shift_imm(s
, a
, false, tcg_gen_gvec_shli
);
802 static bool do_zzw_ool(DisasContext
*s
, arg_rrr_esz
*a
, gen_helper_gvec_3
*fn
)
807 if (sve_access_check(s
)) {
808 unsigned vsz
= vec_full_reg_size(s
);
809 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
810 vec_full_reg_offset(s
, a
->rn
),
811 vec_full_reg_offset(s
, a
->rm
),
817 #define DO_ZZW(NAME, name) \
818 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
821 static gen_helper_gvec_3 * const fns[4] = { \
822 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
823 gen_helper_sve_##name##_zzw_s, NULL \
825 return do_zzw_ool(s, a, fns[a->esz]); \
835 *** SVE Integer Multiply-Add Group
838 static bool do_zpzzz_ool(DisasContext
*s
, arg_rprrr_esz
*a
,
839 gen_helper_gvec_5
*fn
)
841 if (sve_access_check(s
)) {
842 unsigned vsz
= vec_full_reg_size(s
);
843 tcg_gen_gvec_5_ool(vec_full_reg_offset(s
, a
->rd
),
844 vec_full_reg_offset(s
, a
->ra
),
845 vec_full_reg_offset(s
, a
->rn
),
846 vec_full_reg_offset(s
, a
->rm
),
847 pred_full_reg_offset(s
, a
->pg
),
853 #define DO_ZPZZZ(NAME, name) \
854 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
856 static gen_helper_gvec_5 * const fns[4] = { \
857 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
858 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
860 return do_zpzzz_ool(s, a, fns[a->esz]); \
869 *** SVE Index Generation Group
872 static void do_index(DisasContext
*s
, int esz
, int rd
,
873 TCGv_i64 start
, TCGv_i64 incr
)
875 unsigned vsz
= vec_full_reg_size(s
);
876 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
877 TCGv_ptr t_zd
= tcg_temp_new_ptr();
879 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, rd
));
881 gen_helper_sve_index_d(t_zd
, start
, incr
, desc
);
883 typedef void index_fn(TCGv_ptr
, TCGv_i32
, TCGv_i32
, TCGv_i32
);
884 static index_fn
* const fns
[3] = {
885 gen_helper_sve_index_b
,
886 gen_helper_sve_index_h
,
887 gen_helper_sve_index_s
,
889 TCGv_i32 s32
= tcg_temp_new_i32();
890 TCGv_i32 i32
= tcg_temp_new_i32();
892 tcg_gen_extrl_i64_i32(s32
, start
);
893 tcg_gen_extrl_i64_i32(i32
, incr
);
894 fns
[esz
](t_zd
, s32
, i32
, desc
);
896 tcg_temp_free_i32(s32
);
897 tcg_temp_free_i32(i32
);
899 tcg_temp_free_ptr(t_zd
);
900 tcg_temp_free_i32(desc
);
903 static bool trans_INDEX_ii(DisasContext
*s
, arg_INDEX_ii
*a
, uint32_t insn
)
905 if (sve_access_check(s
)) {
906 TCGv_i64 start
= tcg_const_i64(a
->imm1
);
907 TCGv_i64 incr
= tcg_const_i64(a
->imm2
);
908 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
909 tcg_temp_free_i64(start
);
910 tcg_temp_free_i64(incr
);
915 static bool trans_INDEX_ir(DisasContext
*s
, arg_INDEX_ir
*a
, uint32_t insn
)
917 if (sve_access_check(s
)) {
918 TCGv_i64 start
= tcg_const_i64(a
->imm
);
919 TCGv_i64 incr
= cpu_reg(s
, a
->rm
);
920 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
921 tcg_temp_free_i64(start
);
926 static bool trans_INDEX_ri(DisasContext
*s
, arg_INDEX_ri
*a
, uint32_t insn
)
928 if (sve_access_check(s
)) {
929 TCGv_i64 start
= cpu_reg(s
, a
->rn
);
930 TCGv_i64 incr
= tcg_const_i64(a
->imm
);
931 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
932 tcg_temp_free_i64(incr
);
937 static bool trans_INDEX_rr(DisasContext
*s
, arg_INDEX_rr
*a
, uint32_t insn
)
939 if (sve_access_check(s
)) {
940 TCGv_i64 start
= cpu_reg(s
, a
->rn
);
941 TCGv_i64 incr
= cpu_reg(s
, a
->rm
);
942 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
948 *** SVE Stack Allocation Group
951 static bool trans_ADDVL(DisasContext
*s
, arg_ADDVL
*a
, uint32_t insn
)
953 TCGv_i64 rd
= cpu_reg_sp(s
, a
->rd
);
954 TCGv_i64 rn
= cpu_reg_sp(s
, a
->rn
);
955 tcg_gen_addi_i64(rd
, rn
, a
->imm
* vec_full_reg_size(s
));
959 static bool trans_ADDPL(DisasContext
*s
, arg_ADDPL
*a
, uint32_t insn
)
961 TCGv_i64 rd
= cpu_reg_sp(s
, a
->rd
);
962 TCGv_i64 rn
= cpu_reg_sp(s
, a
->rn
);
963 tcg_gen_addi_i64(rd
, rn
, a
->imm
* pred_full_reg_size(s
));
967 static bool trans_RDVL(DisasContext
*s
, arg_RDVL
*a
, uint32_t insn
)
969 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
970 tcg_gen_movi_i64(reg
, a
->imm
* vec_full_reg_size(s
));
975 *** SVE Compute Vector Address Group
978 static bool do_adr(DisasContext
*s
, arg_rrri
*a
, gen_helper_gvec_3
*fn
)
980 if (sve_access_check(s
)) {
981 unsigned vsz
= vec_full_reg_size(s
);
982 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
983 vec_full_reg_offset(s
, a
->rn
),
984 vec_full_reg_offset(s
, a
->rm
),
985 vsz
, vsz
, a
->imm
, fn
);
990 static bool trans_ADR_p32(DisasContext
*s
, arg_rrri
*a
, uint32_t insn
)
992 return do_adr(s
, a
, gen_helper_sve_adr_p32
);
995 static bool trans_ADR_p64(DisasContext
*s
, arg_rrri
*a
, uint32_t insn
)
997 return do_adr(s
, a
, gen_helper_sve_adr_p64
);
1000 static bool trans_ADR_s32(DisasContext
*s
, arg_rrri
*a
, uint32_t insn
)
1002 return do_adr(s
, a
, gen_helper_sve_adr_s32
);
1005 static bool trans_ADR_u32(DisasContext
*s
, arg_rrri
*a
, uint32_t insn
)
1007 return do_adr(s
, a
, gen_helper_sve_adr_u32
);
1011 *** SVE Integer Misc - Unpredicated Group
1014 static bool trans_FEXPA(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
1016 static gen_helper_gvec_2
* const fns
[4] = {
1018 gen_helper_sve_fexpa_h
,
1019 gen_helper_sve_fexpa_s
,
1020 gen_helper_sve_fexpa_d
,
1025 if (sve_access_check(s
)) {
1026 unsigned vsz
= vec_full_reg_size(s
);
1027 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, a
->rd
),
1028 vec_full_reg_offset(s
, a
->rn
),
1029 vsz
, vsz
, 0, fns
[a
->esz
]);
1034 static bool trans_FTSSEL(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
1036 static gen_helper_gvec_3
* const fns
[4] = {
1038 gen_helper_sve_ftssel_h
,
1039 gen_helper_sve_ftssel_s
,
1040 gen_helper_sve_ftssel_d
,
1045 if (sve_access_check(s
)) {
1046 unsigned vsz
= vec_full_reg_size(s
);
1047 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
1048 vec_full_reg_offset(s
, a
->rn
),
1049 vec_full_reg_offset(s
, a
->rm
),
1050 vsz
, vsz
, 0, fns
[a
->esz
]);
1056 *** SVE Predicate Logical Operations Group
1059 static bool do_pppp_flags(DisasContext
*s
, arg_rprr_s
*a
,
1060 const GVecGen4
*gvec_op
)
1062 if (!sve_access_check(s
)) {
1066 unsigned psz
= pred_gvec_reg_size(s
);
1067 int dofs
= pred_full_reg_offset(s
, a
->rd
);
1068 int nofs
= pred_full_reg_offset(s
, a
->rn
);
1069 int mofs
= pred_full_reg_offset(s
, a
->rm
);
1070 int gofs
= pred_full_reg_offset(s
, a
->pg
);
1073 /* Do the operation and the flags generation in temps. */
1074 TCGv_i64 pd
= tcg_temp_new_i64();
1075 TCGv_i64 pn
= tcg_temp_new_i64();
1076 TCGv_i64 pm
= tcg_temp_new_i64();
1077 TCGv_i64 pg
= tcg_temp_new_i64();
1079 tcg_gen_ld_i64(pn
, cpu_env
, nofs
);
1080 tcg_gen_ld_i64(pm
, cpu_env
, mofs
);
1081 tcg_gen_ld_i64(pg
, cpu_env
, gofs
);
1083 gvec_op
->fni8(pd
, pn
, pm
, pg
);
1084 tcg_gen_st_i64(pd
, cpu_env
, dofs
);
1086 do_predtest1(pd
, pg
);
1088 tcg_temp_free_i64(pd
);
1089 tcg_temp_free_i64(pn
);
1090 tcg_temp_free_i64(pm
);
1091 tcg_temp_free_i64(pg
);
1093 /* The operation and flags generation is large. The computation
1094 * of the flags depends on the original contents of the guarding
1095 * predicate. If the destination overwrites the guarding predicate,
1096 * then the easiest way to get this right is to save a copy.
1099 if (a
->rd
== a
->pg
) {
1100 tofs
= offsetof(CPUARMState
, vfp
.preg_tmp
);
1101 tcg_gen_gvec_mov(0, tofs
, gofs
, psz
, psz
);
1104 tcg_gen_gvec_4(dofs
, nofs
, mofs
, gofs
, psz
, psz
, gvec_op
);
1105 do_predtest(s
, dofs
, tofs
, psz
/ 8);
1110 static void gen_and_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1112 tcg_gen_and_i64(pd
, pn
, pm
);
1113 tcg_gen_and_i64(pd
, pd
, pg
);
1116 static void gen_and_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1117 TCGv_vec pm
, TCGv_vec pg
)
1119 tcg_gen_and_vec(vece
, pd
, pn
, pm
);
1120 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1123 static bool trans_AND_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1125 static const GVecGen4 op
= {
1126 .fni8
= gen_and_pg_i64
,
1127 .fniv
= gen_and_pg_vec
,
1128 .fno
= gen_helper_sve_and_pppp
,
1129 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1132 return do_pppp_flags(s
, a
, &op
);
1133 } else if (a
->rn
== a
->rm
) {
1134 if (a
->pg
== a
->rn
) {
1135 return do_mov_p(s
, a
->rd
, a
->rn
);
1137 return do_vector3_p(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->pg
);
1139 } else if (a
->pg
== a
->rn
|| a
->pg
== a
->rm
) {
1140 return do_vector3_p(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->rm
);
1142 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1146 static void gen_bic_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1148 tcg_gen_andc_i64(pd
, pn
, pm
);
1149 tcg_gen_and_i64(pd
, pd
, pg
);
1152 static void gen_bic_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1153 TCGv_vec pm
, TCGv_vec pg
)
1155 tcg_gen_andc_vec(vece
, pd
, pn
, pm
);
1156 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1159 static bool trans_BIC_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1161 static const GVecGen4 op
= {
1162 .fni8
= gen_bic_pg_i64
,
1163 .fniv
= gen_bic_pg_vec
,
1164 .fno
= gen_helper_sve_bic_pppp
,
1165 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1168 return do_pppp_flags(s
, a
, &op
);
1169 } else if (a
->pg
== a
->rn
) {
1170 return do_vector3_p(s
, tcg_gen_gvec_andc
, 0, a
->rd
, a
->rn
, a
->rm
);
1172 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1176 static void gen_eor_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1178 tcg_gen_xor_i64(pd
, pn
, pm
);
1179 tcg_gen_and_i64(pd
, pd
, pg
);
1182 static void gen_eor_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1183 TCGv_vec pm
, TCGv_vec pg
)
1185 tcg_gen_xor_vec(vece
, pd
, pn
, pm
);
1186 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1189 static bool trans_EOR_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1191 static const GVecGen4 op
= {
1192 .fni8
= gen_eor_pg_i64
,
1193 .fniv
= gen_eor_pg_vec
,
1194 .fno
= gen_helper_sve_eor_pppp
,
1195 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1198 return do_pppp_flags(s
, a
, &op
);
1200 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1204 static void gen_sel_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1206 tcg_gen_and_i64(pn
, pn
, pg
);
1207 tcg_gen_andc_i64(pm
, pm
, pg
);
1208 tcg_gen_or_i64(pd
, pn
, pm
);
1211 static void gen_sel_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1212 TCGv_vec pm
, TCGv_vec pg
)
1214 tcg_gen_and_vec(vece
, pn
, pn
, pg
);
1215 tcg_gen_andc_vec(vece
, pm
, pm
, pg
);
1216 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1219 static bool trans_SEL_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1221 static const GVecGen4 op
= {
1222 .fni8
= gen_sel_pg_i64
,
1223 .fniv
= gen_sel_pg_vec
,
1224 .fno
= gen_helper_sve_sel_pppp
,
1225 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1230 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1234 static void gen_orr_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1236 tcg_gen_or_i64(pd
, pn
, pm
);
1237 tcg_gen_and_i64(pd
, pd
, pg
);
1240 static void gen_orr_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1241 TCGv_vec pm
, TCGv_vec pg
)
1243 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1244 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1247 static bool trans_ORR_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1249 static const GVecGen4 op
= {
1250 .fni8
= gen_orr_pg_i64
,
1251 .fniv
= gen_orr_pg_vec
,
1252 .fno
= gen_helper_sve_orr_pppp
,
1253 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1256 return do_pppp_flags(s
, a
, &op
);
1257 } else if (a
->pg
== a
->rn
&& a
->rn
== a
->rm
) {
1258 return do_mov_p(s
, a
->rd
, a
->rn
);
1260 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1264 static void gen_orn_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1266 tcg_gen_orc_i64(pd
, pn
, pm
);
1267 tcg_gen_and_i64(pd
, pd
, pg
);
1270 static void gen_orn_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1271 TCGv_vec pm
, TCGv_vec pg
)
1273 tcg_gen_orc_vec(vece
, pd
, pn
, pm
);
1274 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1277 static bool trans_ORN_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1279 static const GVecGen4 op
= {
1280 .fni8
= gen_orn_pg_i64
,
1281 .fniv
= gen_orn_pg_vec
,
1282 .fno
= gen_helper_sve_orn_pppp
,
1283 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1286 return do_pppp_flags(s
, a
, &op
);
1288 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1292 static void gen_nor_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1294 tcg_gen_or_i64(pd
, pn
, pm
);
1295 tcg_gen_andc_i64(pd
, pg
, pd
);
1298 static void gen_nor_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1299 TCGv_vec pm
, TCGv_vec pg
)
1301 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1302 tcg_gen_andc_vec(vece
, pd
, pg
, pd
);
1305 static bool trans_NOR_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1307 static const GVecGen4 op
= {
1308 .fni8
= gen_nor_pg_i64
,
1309 .fniv
= gen_nor_pg_vec
,
1310 .fno
= gen_helper_sve_nor_pppp
,
1311 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1314 return do_pppp_flags(s
, a
, &op
);
1316 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1320 static void gen_nand_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1322 tcg_gen_and_i64(pd
, pn
, pm
);
1323 tcg_gen_andc_i64(pd
, pg
, pd
);
1326 static void gen_nand_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1327 TCGv_vec pm
, TCGv_vec pg
)
1329 tcg_gen_and_vec(vece
, pd
, pn
, pm
);
1330 tcg_gen_andc_vec(vece
, pd
, pg
, pd
);
1333 static bool trans_NAND_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1335 static const GVecGen4 op
= {
1336 .fni8
= gen_nand_pg_i64
,
1337 .fniv
= gen_nand_pg_vec
,
1338 .fno
= gen_helper_sve_nand_pppp
,
1339 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1342 return do_pppp_flags(s
, a
, &op
);
1344 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1349 *** SVE Predicate Misc Group
1352 static bool trans_PTEST(DisasContext
*s
, arg_PTEST
*a
, uint32_t insn
)
1354 if (sve_access_check(s
)) {
1355 int nofs
= pred_full_reg_offset(s
, a
->rn
);
1356 int gofs
= pred_full_reg_offset(s
, a
->pg
);
1357 int words
= DIV_ROUND_UP(pred_full_reg_size(s
), 8);
1360 TCGv_i64 pn
= tcg_temp_new_i64();
1361 TCGv_i64 pg
= tcg_temp_new_i64();
1363 tcg_gen_ld_i64(pn
, cpu_env
, nofs
);
1364 tcg_gen_ld_i64(pg
, cpu_env
, gofs
);
1365 do_predtest1(pn
, pg
);
1367 tcg_temp_free_i64(pn
);
1368 tcg_temp_free_i64(pg
);
1370 do_predtest(s
, nofs
, gofs
, words
);
1376 /* See the ARM pseudocode DecodePredCount. */
1377 static unsigned decode_pred_count(unsigned fullsz
, int pattern
, int esz
)
1379 unsigned elements
= fullsz
>> esz
;
1383 case 0x0: /* POW2 */
1384 return pow2floor(elements
);
1395 case 0x9: /* VL16 */
1396 case 0xa: /* VL32 */
1397 case 0xb: /* VL64 */
1398 case 0xc: /* VL128 */
1399 case 0xd: /* VL256 */
1400 bound
= 16 << (pattern
- 9);
1402 case 0x1d: /* MUL4 */
1403 return elements
- elements
% 4;
1404 case 0x1e: /* MUL3 */
1405 return elements
- elements
% 3;
1406 case 0x1f: /* ALL */
1408 default: /* #uimm5 */
1411 return elements
>= bound
? bound
: 0;
1414 /* This handles all of the predicate initialization instructions,
1415 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1416 * so that decode_pred_count returns 0. For SETFFR, we will have
1417 * set RD == 16 == FFR.
1419 static bool do_predset(DisasContext
*s
, int esz
, int rd
, int pat
, bool setflag
)
1421 if (!sve_access_check(s
)) {
1425 unsigned fullsz
= vec_full_reg_size(s
);
1426 unsigned ofs
= pred_full_reg_offset(s
, rd
);
1427 unsigned numelem
, setsz
, i
;
1428 uint64_t word
, lastword
;
1431 numelem
= decode_pred_count(fullsz
, pat
, esz
);
1433 /* Determine what we must store into each bit, and how many. */
1435 lastword
= word
= 0;
1438 setsz
= numelem
<< esz
;
1439 lastword
= word
= pred_esz_masks
[esz
];
1441 lastword
&= ~(-1ull << (setsz
% 64));
1445 t
= tcg_temp_new_i64();
1447 tcg_gen_movi_i64(t
, lastword
);
1448 tcg_gen_st_i64(t
, cpu_env
, ofs
);
1452 if (word
== lastword
) {
1453 unsigned maxsz
= size_for_gvec(fullsz
/ 8);
1454 unsigned oprsz
= size_for_gvec(setsz
/ 8);
1456 if (oprsz
* 8 == setsz
) {
1457 tcg_gen_gvec_dup64i(ofs
, oprsz
, maxsz
, word
);
1460 if (oprsz
* 8 == setsz
+ 8) {
1461 tcg_gen_gvec_dup64i(ofs
, oprsz
, maxsz
, word
);
1462 tcg_gen_movi_i64(t
, 0);
1463 tcg_gen_st_i64(t
, cpu_env
, ofs
+ oprsz
- 8);
1471 tcg_gen_movi_i64(t
, word
);
1472 for (i
= 0; i
< setsz
; i
+= 8) {
1473 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1475 if (lastword
!= word
) {
1476 tcg_gen_movi_i64(t
, lastword
);
1477 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1481 tcg_gen_movi_i64(t
, 0);
1482 for (; i
< fullsz
; i
+= 8) {
1483 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1488 tcg_temp_free_i64(t
);
1492 tcg_gen_movi_i32(cpu_NF
, -(word
!= 0));
1493 tcg_gen_movi_i32(cpu_CF
, word
== 0);
1494 tcg_gen_movi_i32(cpu_VF
, 0);
1495 tcg_gen_mov_i32(cpu_ZF
, cpu_NF
);
1500 static bool trans_PTRUE(DisasContext
*s
, arg_PTRUE
*a
, uint32_t insn
)
1502 return do_predset(s
, a
->esz
, a
->rd
, a
->pat
, a
->s
);
1505 static bool trans_SETFFR(DisasContext
*s
, arg_SETFFR
*a
, uint32_t insn
)
1507 /* Note pat == 31 is #all, to set all elements. */
1508 return do_predset(s
, 0, FFR_PRED_NUM
, 31, false);
1511 static bool trans_PFALSE(DisasContext
*s
, arg_PFALSE
*a
, uint32_t insn
)
1513 /* Note pat == 32 is #unimp, to set no elements. */
1514 return do_predset(s
, 0, a
->rd
, 32, false);
1517 static bool trans_RDFFR_p(DisasContext
*s
, arg_RDFFR_p
*a
, uint32_t insn
)
1519 /* The path through do_pppp_flags is complicated enough to want to avoid
1520 * duplication. Frob the arguments into the form of a predicated AND.
1522 arg_rprr_s alt_a
= {
1523 .rd
= a
->rd
, .pg
= a
->pg
, .s
= a
->s
,
1524 .rn
= FFR_PRED_NUM
, .rm
= FFR_PRED_NUM
,
1526 return trans_AND_pppp(s
, &alt_a
, insn
);
1529 static bool trans_RDFFR(DisasContext
*s
, arg_RDFFR
*a
, uint32_t insn
)
1531 return do_mov_p(s
, a
->rd
, FFR_PRED_NUM
);
1534 static bool trans_WRFFR(DisasContext
*s
, arg_WRFFR
*a
, uint32_t insn
)
1536 return do_mov_p(s
, FFR_PRED_NUM
, a
->rn
);
1539 static bool do_pfirst_pnext(DisasContext
*s
, arg_rr_esz
*a
,
1540 void (*gen_fn
)(TCGv_i32
, TCGv_ptr
,
1541 TCGv_ptr
, TCGv_i32
))
1543 if (!sve_access_check(s
)) {
1547 TCGv_ptr t_pd
= tcg_temp_new_ptr();
1548 TCGv_ptr t_pg
= tcg_temp_new_ptr();
1552 desc
= DIV_ROUND_UP(pred_full_reg_size(s
), 8);
1553 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
1555 tcg_gen_addi_ptr(t_pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
1556 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
1557 t
= tcg_const_i32(desc
);
1559 gen_fn(t
, t_pd
, t_pg
, t
);
1560 tcg_temp_free_ptr(t_pd
);
1561 tcg_temp_free_ptr(t_pg
);
1564 tcg_temp_free_i32(t
);
1568 static bool trans_PFIRST(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
1570 return do_pfirst_pnext(s
, a
, gen_helper_sve_pfirst
);
1573 static bool trans_PNEXT(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
1575 return do_pfirst_pnext(s
, a
, gen_helper_sve_pnext
);
1579 *** SVE Element Count Group
1582 /* Perform an inline saturating addition of a 32-bit value within
1583 * a 64-bit register. The second operand is known to be positive,
1584 * which halves the comparisions we must perform to bound the result.
1586 static void do_sat_addsub_32(TCGv_i64 reg
, TCGv_i64 val
, bool u
, bool d
)
1592 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1594 tcg_gen_ext32u_i64(reg
, reg
);
1596 tcg_gen_ext32s_i64(reg
, reg
);
1599 tcg_gen_sub_i64(reg
, reg
, val
);
1600 ibound
= (u
? 0 : INT32_MIN
);
1603 tcg_gen_add_i64(reg
, reg
, val
);
1604 ibound
= (u
? UINT32_MAX
: INT32_MAX
);
1607 bound
= tcg_const_i64(ibound
);
1608 tcg_gen_movcond_i64(cond
, reg
, reg
, bound
, bound
, reg
);
1609 tcg_temp_free_i64(bound
);
1612 /* Similarly with 64-bit values. */
1613 static void do_sat_addsub_64(TCGv_i64 reg
, TCGv_i64 val
, bool u
, bool d
)
1615 TCGv_i64 t0
= tcg_temp_new_i64();
1616 TCGv_i64 t1
= tcg_temp_new_i64();
1621 tcg_gen_sub_i64(t0
, reg
, val
);
1622 tcg_gen_movi_i64(t1
, 0);
1623 tcg_gen_movcond_i64(TCG_COND_LTU
, reg
, reg
, val
, t1
, t0
);
1625 tcg_gen_add_i64(t0
, reg
, val
);
1626 tcg_gen_movi_i64(t1
, -1);
1627 tcg_gen_movcond_i64(TCG_COND_LTU
, reg
, t0
, reg
, t1
, t0
);
1631 /* Detect signed overflow for subtraction. */
1632 tcg_gen_xor_i64(t0
, reg
, val
);
1633 tcg_gen_sub_i64(t1
, reg
, val
);
1634 tcg_gen_xor_i64(reg
, reg
, t0
);
1635 tcg_gen_and_i64(t0
, t0
, reg
);
1637 /* Bound the result. */
1638 tcg_gen_movi_i64(reg
, INT64_MIN
);
1639 t2
= tcg_const_i64(0);
1640 tcg_gen_movcond_i64(TCG_COND_LT
, reg
, t0
, t2
, reg
, t1
);
1642 /* Detect signed overflow for addition. */
1643 tcg_gen_xor_i64(t0
, reg
, val
);
1644 tcg_gen_add_i64(reg
, reg
, val
);
1645 tcg_gen_xor_i64(t1
, reg
, val
);
1646 tcg_gen_andc_i64(t0
, t1
, t0
);
1648 /* Bound the result. */
1649 tcg_gen_movi_i64(t1
, INT64_MAX
);
1650 t2
= tcg_const_i64(0);
1651 tcg_gen_movcond_i64(TCG_COND_LT
, reg
, t0
, t2
, t1
, reg
);
1653 tcg_temp_free_i64(t2
);
1655 tcg_temp_free_i64(t0
);
1656 tcg_temp_free_i64(t1
);
1659 /* Similarly with a vector and a scalar operand. */
1660 static void do_sat_addsub_vec(DisasContext
*s
, int esz
, int rd
, int rn
,
1661 TCGv_i64 val
, bool u
, bool d
)
1663 unsigned vsz
= vec_full_reg_size(s
);
1664 TCGv_ptr dptr
, nptr
;
1668 dptr
= tcg_temp_new_ptr();
1669 nptr
= tcg_temp_new_ptr();
1670 tcg_gen_addi_ptr(dptr
, cpu_env
, vec_full_reg_offset(s
, rd
));
1671 tcg_gen_addi_ptr(nptr
, cpu_env
, vec_full_reg_offset(s
, rn
));
1672 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
1676 t32
= tcg_temp_new_i32();
1677 tcg_gen_extrl_i64_i32(t32
, val
);
1679 tcg_gen_neg_i32(t32
, t32
);
1682 gen_helper_sve_uqaddi_b(dptr
, nptr
, t32
, desc
);
1684 gen_helper_sve_sqaddi_b(dptr
, nptr
, t32
, desc
);
1686 tcg_temp_free_i32(t32
);
1690 t32
= tcg_temp_new_i32();
1691 tcg_gen_extrl_i64_i32(t32
, val
);
1693 tcg_gen_neg_i32(t32
, t32
);
1696 gen_helper_sve_uqaddi_h(dptr
, nptr
, t32
, desc
);
1698 gen_helper_sve_sqaddi_h(dptr
, nptr
, t32
, desc
);
1700 tcg_temp_free_i32(t32
);
1704 t64
= tcg_temp_new_i64();
1706 tcg_gen_neg_i64(t64
, val
);
1708 tcg_gen_mov_i64(t64
, val
);
1711 gen_helper_sve_uqaddi_s(dptr
, nptr
, t64
, desc
);
1713 gen_helper_sve_sqaddi_s(dptr
, nptr
, t64
, desc
);
1715 tcg_temp_free_i64(t64
);
1721 gen_helper_sve_uqsubi_d(dptr
, nptr
, val
, desc
);
1723 gen_helper_sve_uqaddi_d(dptr
, nptr
, val
, desc
);
1726 t64
= tcg_temp_new_i64();
1727 tcg_gen_neg_i64(t64
, val
);
1728 gen_helper_sve_sqaddi_d(dptr
, nptr
, t64
, desc
);
1729 tcg_temp_free_i64(t64
);
1731 gen_helper_sve_sqaddi_d(dptr
, nptr
, val
, desc
);
1736 g_assert_not_reached();
1739 tcg_temp_free_ptr(dptr
);
1740 tcg_temp_free_ptr(nptr
);
1741 tcg_temp_free_i32(desc
);
1744 static bool trans_CNT_r(DisasContext
*s
, arg_CNT_r
*a
, uint32_t insn
)
1746 if (sve_access_check(s
)) {
1747 unsigned fullsz
= vec_full_reg_size(s
);
1748 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1749 tcg_gen_movi_i64(cpu_reg(s
, a
->rd
), numelem
* a
->imm
);
1754 static bool trans_INCDEC_r(DisasContext
*s
, arg_incdec_cnt
*a
, uint32_t insn
)
1756 if (sve_access_check(s
)) {
1757 unsigned fullsz
= vec_full_reg_size(s
);
1758 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1759 int inc
= numelem
* a
->imm
* (a
->d
? -1 : 1);
1760 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1762 tcg_gen_addi_i64(reg
, reg
, inc
);
1767 static bool trans_SINCDEC_r_32(DisasContext
*s
, arg_incdec_cnt
*a
,
1770 if (!sve_access_check(s
)) {
1774 unsigned fullsz
= vec_full_reg_size(s
);
1775 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1776 int inc
= numelem
* a
->imm
;
1777 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1779 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1782 tcg_gen_ext32u_i64(reg
, reg
);
1784 tcg_gen_ext32s_i64(reg
, reg
);
1787 TCGv_i64 t
= tcg_const_i64(inc
);
1788 do_sat_addsub_32(reg
, t
, a
->u
, a
->d
);
1789 tcg_temp_free_i64(t
);
1794 static bool trans_SINCDEC_r_64(DisasContext
*s
, arg_incdec_cnt
*a
,
1797 if (!sve_access_check(s
)) {
1801 unsigned fullsz
= vec_full_reg_size(s
);
1802 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1803 int inc
= numelem
* a
->imm
;
1804 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1807 TCGv_i64 t
= tcg_const_i64(inc
);
1808 do_sat_addsub_64(reg
, t
, a
->u
, a
->d
);
1809 tcg_temp_free_i64(t
);
1814 static bool trans_INCDEC_v(DisasContext
*s
, arg_incdec2_cnt
*a
, uint32_t insn
)
1820 unsigned fullsz
= vec_full_reg_size(s
);
1821 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1822 int inc
= numelem
* a
->imm
;
1825 if (sve_access_check(s
)) {
1826 TCGv_i64 t
= tcg_const_i64(a
->d
? -inc
: inc
);
1827 tcg_gen_gvec_adds(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
1828 vec_full_reg_offset(s
, a
->rn
),
1830 tcg_temp_free_i64(t
);
1833 do_mov_z(s
, a
->rd
, a
->rn
);
1838 static bool trans_SINCDEC_v(DisasContext
*s
, arg_incdec2_cnt
*a
,
1845 unsigned fullsz
= vec_full_reg_size(s
);
1846 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1847 int inc
= numelem
* a
->imm
;
1850 if (sve_access_check(s
)) {
1851 TCGv_i64 t
= tcg_const_i64(inc
);
1852 do_sat_addsub_vec(s
, a
->esz
, a
->rd
, a
->rn
, t
, a
->u
, a
->d
);
1853 tcg_temp_free_i64(t
);
1856 do_mov_z(s
, a
->rd
, a
->rn
);
1862 *** SVE Bitwise Immediate Group
1865 static bool do_zz_dbm(DisasContext
*s
, arg_rr_dbm
*a
, GVecGen2iFn
*gvec_fn
)
1868 if (!logic_imm_decode_wmask(&imm
, extract32(a
->dbm
, 12, 1),
1869 extract32(a
->dbm
, 0, 6),
1870 extract32(a
->dbm
, 6, 6))) {
1873 if (sve_access_check(s
)) {
1874 unsigned vsz
= vec_full_reg_size(s
);
1875 gvec_fn(MO_64
, vec_full_reg_offset(s
, a
->rd
),
1876 vec_full_reg_offset(s
, a
->rn
), imm
, vsz
, vsz
);
1881 static bool trans_AND_zzi(DisasContext
*s
, arg_rr_dbm
*a
, uint32_t insn
)
1883 return do_zz_dbm(s
, a
, tcg_gen_gvec_andi
);
1886 static bool trans_ORR_zzi(DisasContext
*s
, arg_rr_dbm
*a
, uint32_t insn
)
1888 return do_zz_dbm(s
, a
, tcg_gen_gvec_ori
);
1891 static bool trans_EOR_zzi(DisasContext
*s
, arg_rr_dbm
*a
, uint32_t insn
)
1893 return do_zz_dbm(s
, a
, tcg_gen_gvec_xori
);
1896 static bool trans_DUPM(DisasContext
*s
, arg_DUPM
*a
, uint32_t insn
)
1899 if (!logic_imm_decode_wmask(&imm
, extract32(a
->dbm
, 12, 1),
1900 extract32(a
->dbm
, 0, 6),
1901 extract32(a
->dbm
, 6, 6))) {
1904 if (sve_access_check(s
)) {
1905 do_dupi_z(s
, a
->rd
, imm
);
1911 *** SVE Integer Wide Immediate - Predicated Group
1914 /* Implement all merging copies. This is used for CPY (immediate),
1915 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1917 static void do_cpy_m(DisasContext
*s
, int esz
, int rd
, int rn
, int pg
,
1920 typedef void gen_cpy(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i64
, TCGv_i32
);
1921 static gen_cpy
* const fns
[4] = {
1922 gen_helper_sve_cpy_m_b
, gen_helper_sve_cpy_m_h
,
1923 gen_helper_sve_cpy_m_s
, gen_helper_sve_cpy_m_d
,
1925 unsigned vsz
= vec_full_reg_size(s
);
1926 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
1927 TCGv_ptr t_zd
= tcg_temp_new_ptr();
1928 TCGv_ptr t_zn
= tcg_temp_new_ptr();
1929 TCGv_ptr t_pg
= tcg_temp_new_ptr();
1931 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, rd
));
1932 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, rn
));
1933 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
1935 fns
[esz
](t_zd
, t_zn
, t_pg
, val
, desc
);
1937 tcg_temp_free_ptr(t_zd
);
1938 tcg_temp_free_ptr(t_zn
);
1939 tcg_temp_free_ptr(t_pg
);
1940 tcg_temp_free_i32(desc
);
1943 static bool trans_FCPY(DisasContext
*s
, arg_FCPY
*a
, uint32_t insn
)
1948 if (sve_access_check(s
)) {
1949 /* Decode the VFP immediate. */
1950 uint64_t imm
= vfp_expand_imm(a
->esz
, a
->imm
);
1951 TCGv_i64 t_imm
= tcg_const_i64(imm
);
1952 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rn
, a
->pg
, t_imm
);
1953 tcg_temp_free_i64(t_imm
);
1958 static bool trans_CPY_m_i(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
1960 if (a
->esz
== 0 && extract32(insn
, 13, 1)) {
1963 if (sve_access_check(s
)) {
1964 TCGv_i64 t_imm
= tcg_const_i64(a
->imm
);
1965 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rn
, a
->pg
, t_imm
);
1966 tcg_temp_free_i64(t_imm
);
1971 static bool trans_CPY_z_i(DisasContext
*s
, arg_CPY_z_i
*a
, uint32_t insn
)
1973 static gen_helper_gvec_2i
* const fns
[4] = {
1974 gen_helper_sve_cpy_z_b
, gen_helper_sve_cpy_z_h
,
1975 gen_helper_sve_cpy_z_s
, gen_helper_sve_cpy_z_d
,
1978 if (a
->esz
== 0 && extract32(insn
, 13, 1)) {
1981 if (sve_access_check(s
)) {
1982 unsigned vsz
= vec_full_reg_size(s
);
1983 TCGv_i64 t_imm
= tcg_const_i64(a
->imm
);
1984 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s
, a
->rd
),
1985 pred_full_reg_offset(s
, a
->pg
),
1986 t_imm
, vsz
, vsz
, 0, fns
[a
->esz
]);
1987 tcg_temp_free_i64(t_imm
);
1993 *** SVE Permute Extract Group
1996 static bool trans_EXT(DisasContext
*s
, arg_EXT
*a
, uint32_t insn
)
1998 if (!sve_access_check(s
)) {
2002 unsigned vsz
= vec_full_reg_size(s
);
2003 unsigned n_ofs
= a
->imm
>= vsz
? 0 : a
->imm
;
2004 unsigned n_siz
= vsz
- n_ofs
;
2005 unsigned d
= vec_full_reg_offset(s
, a
->rd
);
2006 unsigned n
= vec_full_reg_offset(s
, a
->rn
);
2007 unsigned m
= vec_full_reg_offset(s
, a
->rm
);
2009 /* Use host vector move insns if we have appropriate sizes
2010 * and no unfortunate overlap.
2013 && n_ofs
== size_for_gvec(n_ofs
)
2014 && n_siz
== size_for_gvec(n_siz
)
2015 && (d
!= n
|| n_siz
<= n_ofs
)) {
2016 tcg_gen_gvec_mov(0, d
, n
+ n_ofs
, n_siz
, n_siz
);
2018 tcg_gen_gvec_mov(0, d
+ n_siz
, m
, n_ofs
, n_ofs
);
2021 tcg_gen_gvec_3_ool(d
, n
, m
, vsz
, vsz
, n_ofs
, gen_helper_sve_ext
);
2027 *** SVE Permute - Unpredicated Group
2030 static bool trans_DUP_s(DisasContext
*s
, arg_DUP_s
*a
, uint32_t insn
)
2032 if (sve_access_check(s
)) {
2033 unsigned vsz
= vec_full_reg_size(s
);
2034 tcg_gen_gvec_dup_i64(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
2035 vsz
, vsz
, cpu_reg_sp(s
, a
->rn
));
2040 static bool trans_DUP_x(DisasContext
*s
, arg_DUP_x
*a
, uint32_t insn
)
2042 if ((a
->imm
& 0x1f) == 0) {
2045 if (sve_access_check(s
)) {
2046 unsigned vsz
= vec_full_reg_size(s
);
2047 unsigned dofs
= vec_full_reg_offset(s
, a
->rd
);
2048 unsigned esz
, index
;
2050 esz
= ctz32(a
->imm
);
2051 index
= a
->imm
>> (esz
+ 1);
2053 if ((index
<< esz
) < vsz
) {
2054 unsigned nofs
= vec_reg_offset(s
, a
->rn
, index
, esz
);
2055 tcg_gen_gvec_dup_mem(esz
, dofs
, nofs
, vsz
, vsz
);
2057 tcg_gen_gvec_dup64i(dofs
, vsz
, vsz
, 0);
2063 static void do_insr_i64(DisasContext
*s
, arg_rrr_esz
*a
, TCGv_i64 val
)
2065 typedef void gen_insr(TCGv_ptr
, TCGv_ptr
, TCGv_i64
, TCGv_i32
);
2066 static gen_insr
* const fns
[4] = {
2067 gen_helper_sve_insr_b
, gen_helper_sve_insr_h
,
2068 gen_helper_sve_insr_s
, gen_helper_sve_insr_d
,
2070 unsigned vsz
= vec_full_reg_size(s
);
2071 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
2072 TCGv_ptr t_zd
= tcg_temp_new_ptr();
2073 TCGv_ptr t_zn
= tcg_temp_new_ptr();
2075 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, a
->rd
));
2076 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
2078 fns
[a
->esz
](t_zd
, t_zn
, val
, desc
);
2080 tcg_temp_free_ptr(t_zd
);
2081 tcg_temp_free_ptr(t_zn
);
2082 tcg_temp_free_i32(desc
);
2085 static bool trans_INSR_f(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2087 if (sve_access_check(s
)) {
2088 TCGv_i64 t
= tcg_temp_new_i64();
2089 tcg_gen_ld_i64(t
, cpu_env
, vec_reg_offset(s
, a
->rm
, 0, MO_64
));
2090 do_insr_i64(s
, a
, t
);
2091 tcg_temp_free_i64(t
);
2096 static bool trans_INSR_r(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2098 if (sve_access_check(s
)) {
2099 do_insr_i64(s
, a
, cpu_reg(s
, a
->rm
));
2104 static bool trans_REV_v(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
2106 static gen_helper_gvec_2
* const fns
[4] = {
2107 gen_helper_sve_rev_b
, gen_helper_sve_rev_h
,
2108 gen_helper_sve_rev_s
, gen_helper_sve_rev_d
2111 if (sve_access_check(s
)) {
2112 unsigned vsz
= vec_full_reg_size(s
);
2113 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, a
->rd
),
2114 vec_full_reg_offset(s
, a
->rn
),
2115 vsz
, vsz
, 0, fns
[a
->esz
]);
2120 static bool trans_TBL(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2122 static gen_helper_gvec_3
* const fns
[4] = {
2123 gen_helper_sve_tbl_b
, gen_helper_sve_tbl_h
,
2124 gen_helper_sve_tbl_s
, gen_helper_sve_tbl_d
2127 if (sve_access_check(s
)) {
2128 unsigned vsz
= vec_full_reg_size(s
);
2129 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
2130 vec_full_reg_offset(s
, a
->rn
),
2131 vec_full_reg_offset(s
, a
->rm
),
2132 vsz
, vsz
, 0, fns
[a
->esz
]);
2137 static bool trans_UNPK(DisasContext
*s
, arg_UNPK
*a
, uint32_t insn
)
2139 static gen_helper_gvec_2
* const fns
[4][2] = {
2141 { gen_helper_sve_sunpk_h
, gen_helper_sve_uunpk_h
},
2142 { gen_helper_sve_sunpk_s
, gen_helper_sve_uunpk_s
},
2143 { gen_helper_sve_sunpk_d
, gen_helper_sve_uunpk_d
},
2149 if (sve_access_check(s
)) {
2150 unsigned vsz
= vec_full_reg_size(s
);
2151 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, a
->rd
),
2152 vec_full_reg_offset(s
, a
->rn
)
2153 + (a
->h
? vsz
/ 2 : 0),
2154 vsz
, vsz
, 0, fns
[a
->esz
][a
->u
]);
2160 *** SVE Permute - Predicates Group
2163 static bool do_perm_pred3(DisasContext
*s
, arg_rrr_esz
*a
, bool high_odd
,
2164 gen_helper_gvec_3
*fn
)
2166 if (!sve_access_check(s
)) {
2170 unsigned vsz
= pred_full_reg_size(s
);
2172 /* Predicate sizes may be smaller and cannot use simd_desc.
2173 We cannot round up, as we do elsewhere, because we need
2174 the exact size for ZIP2 and REV. We retain the style for
2175 the other helpers for consistency. */
2176 TCGv_ptr t_d
= tcg_temp_new_ptr();
2177 TCGv_ptr t_n
= tcg_temp_new_ptr();
2178 TCGv_ptr t_m
= tcg_temp_new_ptr();
2183 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
2184 desc
= deposit32(desc
, SIMD_DATA_SHIFT
+ 2, 2, high_odd
);
2186 tcg_gen_addi_ptr(t_d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2187 tcg_gen_addi_ptr(t_n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2188 tcg_gen_addi_ptr(t_m
, cpu_env
, pred_full_reg_offset(s
, a
->rm
));
2189 t_desc
= tcg_const_i32(desc
);
2191 fn(t_d
, t_n
, t_m
, t_desc
);
2193 tcg_temp_free_ptr(t_d
);
2194 tcg_temp_free_ptr(t_n
);
2195 tcg_temp_free_ptr(t_m
);
2196 tcg_temp_free_i32(t_desc
);
2200 static bool do_perm_pred2(DisasContext
*s
, arg_rr_esz
*a
, bool high_odd
,
2201 gen_helper_gvec_2
*fn
)
2203 if (!sve_access_check(s
)) {
2207 unsigned vsz
= pred_full_reg_size(s
);
2208 TCGv_ptr t_d
= tcg_temp_new_ptr();
2209 TCGv_ptr t_n
= tcg_temp_new_ptr();
2213 tcg_gen_addi_ptr(t_d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2214 tcg_gen_addi_ptr(t_n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2216 /* Predicate sizes may be smaller and cannot use simd_desc.
2217 We cannot round up, as we do elsewhere, because we need
2218 the exact size for ZIP2 and REV. We retain the style for
2219 the other helpers for consistency. */
2222 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
2223 desc
= deposit32(desc
, SIMD_DATA_SHIFT
+ 2, 2, high_odd
);
2224 t_desc
= tcg_const_i32(desc
);
2226 fn(t_d
, t_n
, t_desc
);
2228 tcg_temp_free_i32(t_desc
);
2229 tcg_temp_free_ptr(t_d
);
2230 tcg_temp_free_ptr(t_n
);
2234 static bool trans_ZIP1_p(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2236 return do_perm_pred3(s
, a
, 0, gen_helper_sve_zip_p
);
2239 static bool trans_ZIP2_p(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2241 return do_perm_pred3(s
, a
, 1, gen_helper_sve_zip_p
);
2244 static bool trans_UZP1_p(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2246 return do_perm_pred3(s
, a
, 0, gen_helper_sve_uzp_p
);
2249 static bool trans_UZP2_p(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2251 return do_perm_pred3(s
, a
, 1, gen_helper_sve_uzp_p
);
2254 static bool trans_TRN1_p(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2256 return do_perm_pred3(s
, a
, 0, gen_helper_sve_trn_p
);
2259 static bool trans_TRN2_p(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2261 return do_perm_pred3(s
, a
, 1, gen_helper_sve_trn_p
);
2264 static bool trans_REV_p(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
2266 return do_perm_pred2(s
, a
, 0, gen_helper_sve_rev_p
);
2269 static bool trans_PUNPKLO(DisasContext
*s
, arg_PUNPKLO
*a
, uint32_t insn
)
2271 return do_perm_pred2(s
, a
, 0, gen_helper_sve_punpk_p
);
2274 static bool trans_PUNPKHI(DisasContext
*s
, arg_PUNPKHI
*a
, uint32_t insn
)
2276 return do_perm_pred2(s
, a
, 1, gen_helper_sve_punpk_p
);
2280 *** SVE Permute - Interleaving Group
2283 static bool do_zip(DisasContext
*s
, arg_rrr_esz
*a
, bool high
)
2285 static gen_helper_gvec_3
* const fns
[4] = {
2286 gen_helper_sve_zip_b
, gen_helper_sve_zip_h
,
2287 gen_helper_sve_zip_s
, gen_helper_sve_zip_d
,
2290 if (sve_access_check(s
)) {
2291 unsigned vsz
= vec_full_reg_size(s
);
2292 unsigned high_ofs
= high
? vsz
/ 2 : 0;
2293 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
2294 vec_full_reg_offset(s
, a
->rn
) + high_ofs
,
2295 vec_full_reg_offset(s
, a
->rm
) + high_ofs
,
2296 vsz
, vsz
, 0, fns
[a
->esz
]);
2301 static bool do_zzz_data_ool(DisasContext
*s
, arg_rrr_esz
*a
, int data
,
2302 gen_helper_gvec_3
*fn
)
2304 if (sve_access_check(s
)) {
2305 unsigned vsz
= vec_full_reg_size(s
);
2306 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
2307 vec_full_reg_offset(s
, a
->rn
),
2308 vec_full_reg_offset(s
, a
->rm
),
2309 vsz
, vsz
, data
, fn
);
2314 static bool trans_ZIP1_z(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2316 return do_zip(s
, a
, false);
2319 static bool trans_ZIP2_z(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2321 return do_zip(s
, a
, true);
2324 static gen_helper_gvec_3
* const uzp_fns
[4] = {
2325 gen_helper_sve_uzp_b
, gen_helper_sve_uzp_h
,
2326 gen_helper_sve_uzp_s
, gen_helper_sve_uzp_d
,
2329 static bool trans_UZP1_z(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2331 return do_zzz_data_ool(s
, a
, 0, uzp_fns
[a
->esz
]);
2334 static bool trans_UZP2_z(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2336 return do_zzz_data_ool(s
, a
, 1 << a
->esz
, uzp_fns
[a
->esz
]);
2339 static gen_helper_gvec_3
* const trn_fns
[4] = {
2340 gen_helper_sve_trn_b
, gen_helper_sve_trn_h
,
2341 gen_helper_sve_trn_s
, gen_helper_sve_trn_d
,
2344 static bool trans_TRN1_z(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2346 return do_zzz_data_ool(s
, a
, 0, trn_fns
[a
->esz
]);
2349 static bool trans_TRN2_z(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2351 return do_zzz_data_ool(s
, a
, 1 << a
->esz
, trn_fns
[a
->esz
]);
2355 *** SVE Permute Vector - Predicated Group
2358 static bool trans_COMPACT(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2360 static gen_helper_gvec_3
* const fns
[4] = {
2361 NULL
, NULL
, gen_helper_sve_compact_s
, gen_helper_sve_compact_d
2363 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2366 /* Call the helper that computes the ARM LastActiveElement pseudocode
2367 * function, scaled by the element size. This includes the not found
2368 * indication; e.g. not found for esz=3 is -8.
2370 static void find_last_active(DisasContext
*s
, TCGv_i32 ret
, int esz
, int pg
)
2372 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2373 * round up, as we do elsewhere, because we need the exact size.
2375 TCGv_ptr t_p
= tcg_temp_new_ptr();
2377 unsigned vsz
= pred_full_reg_size(s
);
2381 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, esz
);
2383 tcg_gen_addi_ptr(t_p
, cpu_env
, pred_full_reg_offset(s
, pg
));
2384 t_desc
= tcg_const_i32(desc
);
2386 gen_helper_sve_last_active_element(ret
, t_p
, t_desc
);
2388 tcg_temp_free_i32(t_desc
);
2389 tcg_temp_free_ptr(t_p
);
2392 /* Increment LAST to the offset of the next element in the vector,
2393 * wrapping around to 0.
2395 static void incr_last_active(DisasContext
*s
, TCGv_i32 last
, int esz
)
2397 unsigned vsz
= vec_full_reg_size(s
);
2399 tcg_gen_addi_i32(last
, last
, 1 << esz
);
2400 if (is_power_of_2(vsz
)) {
2401 tcg_gen_andi_i32(last
, last
, vsz
- 1);
2403 TCGv_i32 max
= tcg_const_i32(vsz
);
2404 TCGv_i32 zero
= tcg_const_i32(0);
2405 tcg_gen_movcond_i32(TCG_COND_GEU
, last
, last
, max
, zero
, last
);
2406 tcg_temp_free_i32(max
);
2407 tcg_temp_free_i32(zero
);
2411 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2412 static void wrap_last_active(DisasContext
*s
, TCGv_i32 last
, int esz
)
2414 unsigned vsz
= vec_full_reg_size(s
);
2416 if (is_power_of_2(vsz
)) {
2417 tcg_gen_andi_i32(last
, last
, vsz
- 1);
2419 TCGv_i32 max
= tcg_const_i32(vsz
- (1 << esz
));
2420 TCGv_i32 zero
= tcg_const_i32(0);
2421 tcg_gen_movcond_i32(TCG_COND_LT
, last
, last
, zero
, max
, last
);
2422 tcg_temp_free_i32(max
);
2423 tcg_temp_free_i32(zero
);
2427 /* Load an unsigned element of ESZ from BASE+OFS. */
2428 static TCGv_i64
load_esz(TCGv_ptr base
, int ofs
, int esz
)
2430 TCGv_i64 r
= tcg_temp_new_i64();
2434 tcg_gen_ld8u_i64(r
, base
, ofs
);
2437 tcg_gen_ld16u_i64(r
, base
, ofs
);
2440 tcg_gen_ld32u_i64(r
, base
, ofs
);
2443 tcg_gen_ld_i64(r
, base
, ofs
);
2446 g_assert_not_reached();
2451 /* Load an unsigned element of ESZ from RM[LAST]. */
2452 static TCGv_i64
load_last_active(DisasContext
*s
, TCGv_i32 last
,
2455 TCGv_ptr p
= tcg_temp_new_ptr();
2458 /* Convert offset into vector into offset into ENV.
2459 * The final adjustment for the vector register base
2460 * is added via constant offset to the load.
2462 #ifdef HOST_WORDS_BIGENDIAN
2463 /* Adjust for element ordering. See vec_reg_offset. */
2465 tcg_gen_xori_i32(last
, last
, 8 - (1 << esz
));
2468 tcg_gen_ext_i32_ptr(p
, last
);
2469 tcg_gen_add_ptr(p
, p
, cpu_env
);
2471 r
= load_esz(p
, vec_full_reg_offset(s
, rm
), esz
);
2472 tcg_temp_free_ptr(p
);
2477 /* Compute CLAST for a Zreg. */
2478 static bool do_clast_vector(DisasContext
*s
, arg_rprr_esz
*a
, bool before
)
2483 unsigned vsz
, esz
= a
->esz
;
2485 if (!sve_access_check(s
)) {
2489 last
= tcg_temp_local_new_i32();
2490 over
= gen_new_label();
2492 find_last_active(s
, last
, esz
, a
->pg
);
2494 /* There is of course no movcond for a 2048-bit vector,
2495 * so we must branch over the actual store.
2497 tcg_gen_brcondi_i32(TCG_COND_LT
, last
, 0, over
);
2500 incr_last_active(s
, last
, esz
);
2503 ele
= load_last_active(s
, last
, a
->rm
, esz
);
2504 tcg_temp_free_i32(last
);
2506 vsz
= vec_full_reg_size(s
);
2507 tcg_gen_gvec_dup_i64(esz
, vec_full_reg_offset(s
, a
->rd
), vsz
, vsz
, ele
);
2508 tcg_temp_free_i64(ele
);
2510 /* If this insn used MOVPRFX, we may need a second move. */
2511 if (a
->rd
!= a
->rn
) {
2512 TCGLabel
*done
= gen_new_label();
2515 gen_set_label(over
);
2516 do_mov_z(s
, a
->rd
, a
->rn
);
2518 gen_set_label(done
);
2520 gen_set_label(over
);
2525 static bool trans_CLASTA_z(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
2527 return do_clast_vector(s
, a
, false);
2530 static bool trans_CLASTB_z(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
2532 return do_clast_vector(s
, a
, true);
2535 /* Compute CLAST for a scalar. */
2536 static void do_clast_scalar(DisasContext
*s
, int esz
, int pg
, int rm
,
2537 bool before
, TCGv_i64 reg_val
)
2539 TCGv_i32 last
= tcg_temp_new_i32();
2540 TCGv_i64 ele
, cmp
, zero
;
2542 find_last_active(s
, last
, esz
, pg
);
2544 /* Extend the original value of last prior to incrementing. */
2545 cmp
= tcg_temp_new_i64();
2546 tcg_gen_ext_i32_i64(cmp
, last
);
2549 incr_last_active(s
, last
, esz
);
2552 /* The conceit here is that while last < 0 indicates not found, after
2553 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2554 * from which we can load garbage. We then discard the garbage with
2555 * a conditional move.
2557 ele
= load_last_active(s
, last
, rm
, esz
);
2558 tcg_temp_free_i32(last
);
2560 zero
= tcg_const_i64(0);
2561 tcg_gen_movcond_i64(TCG_COND_GE
, reg_val
, cmp
, zero
, ele
, reg_val
);
2563 tcg_temp_free_i64(zero
);
2564 tcg_temp_free_i64(cmp
);
2565 tcg_temp_free_i64(ele
);
2568 /* Compute CLAST for a Vreg. */
2569 static bool do_clast_fp(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2571 if (sve_access_check(s
)) {
2573 int ofs
= vec_reg_offset(s
, a
->rd
, 0, esz
);
2574 TCGv_i64 reg
= load_esz(cpu_env
, ofs
, esz
);
2576 do_clast_scalar(s
, esz
, a
->pg
, a
->rn
, before
, reg
);
2577 write_fp_dreg(s
, a
->rd
, reg
);
2578 tcg_temp_free_i64(reg
);
2583 static bool trans_CLASTA_v(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2585 return do_clast_fp(s
, a
, false);
2588 static bool trans_CLASTB_v(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2590 return do_clast_fp(s
, a
, true);
2593 /* Compute CLAST for a Xreg. */
2594 static bool do_clast_general(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2598 if (!sve_access_check(s
)) {
2602 reg
= cpu_reg(s
, a
->rd
);
2605 tcg_gen_ext8u_i64(reg
, reg
);
2608 tcg_gen_ext16u_i64(reg
, reg
);
2611 tcg_gen_ext32u_i64(reg
, reg
);
2616 g_assert_not_reached();
2619 do_clast_scalar(s
, a
->esz
, a
->pg
, a
->rn
, before
, reg
);
2623 static bool trans_CLASTA_r(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2625 return do_clast_general(s
, a
, false);
2628 static bool trans_CLASTB_r(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2630 return do_clast_general(s
, a
, true);
2633 /* Compute LAST for a scalar. */
2634 static TCGv_i64
do_last_scalar(DisasContext
*s
, int esz
,
2635 int pg
, int rm
, bool before
)
2637 TCGv_i32 last
= tcg_temp_new_i32();
2640 find_last_active(s
, last
, esz
, pg
);
2642 wrap_last_active(s
, last
, esz
);
2644 incr_last_active(s
, last
, esz
);
2647 ret
= load_last_active(s
, last
, rm
, esz
);
2648 tcg_temp_free_i32(last
);
2652 /* Compute LAST for a Vreg. */
2653 static bool do_last_fp(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2655 if (sve_access_check(s
)) {
2656 TCGv_i64 val
= do_last_scalar(s
, a
->esz
, a
->pg
, a
->rn
, before
);
2657 write_fp_dreg(s
, a
->rd
, val
);
2658 tcg_temp_free_i64(val
);
2663 static bool trans_LASTA_v(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2665 return do_last_fp(s
, a
, false);
2668 static bool trans_LASTB_v(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2670 return do_last_fp(s
, a
, true);
2673 /* Compute LAST for a Xreg. */
2674 static bool do_last_general(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2676 if (sve_access_check(s
)) {
2677 TCGv_i64 val
= do_last_scalar(s
, a
->esz
, a
->pg
, a
->rn
, before
);
2678 tcg_gen_mov_i64(cpu_reg(s
, a
->rd
), val
);
2679 tcg_temp_free_i64(val
);
2684 static bool trans_LASTA_r(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2686 return do_last_general(s
, a
, false);
2689 static bool trans_LASTB_r(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2691 return do_last_general(s
, a
, true);
2694 static bool trans_CPY_m_r(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2696 if (sve_access_check(s
)) {
2697 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rd
, a
->pg
, cpu_reg_sp(s
, a
->rn
));
2702 static bool trans_CPY_m_v(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2704 if (sve_access_check(s
)) {
2705 int ofs
= vec_reg_offset(s
, a
->rn
, 0, a
->esz
);
2706 TCGv_i64 t
= load_esz(cpu_env
, ofs
, a
->esz
);
2707 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rd
, a
->pg
, t
);
2708 tcg_temp_free_i64(t
);
2713 static bool trans_REVB(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2715 static gen_helper_gvec_3
* const fns
[4] = {
2717 gen_helper_sve_revb_h
,
2718 gen_helper_sve_revb_s
,
2719 gen_helper_sve_revb_d
,
2721 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2724 static bool trans_REVH(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2726 static gen_helper_gvec_3
* const fns
[4] = {
2729 gen_helper_sve_revh_s
,
2730 gen_helper_sve_revh_d
,
2732 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2735 static bool trans_REVW(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2737 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_revw_d
: NULL
);
2740 static bool trans_RBIT(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2742 static gen_helper_gvec_3
* const fns
[4] = {
2743 gen_helper_sve_rbit_b
,
2744 gen_helper_sve_rbit_h
,
2745 gen_helper_sve_rbit_s
,
2746 gen_helper_sve_rbit_d
,
2748 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2751 static bool trans_SPLICE(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
2753 if (sve_access_check(s
)) {
2754 unsigned vsz
= vec_full_reg_size(s
);
2755 tcg_gen_gvec_4_ool(vec_full_reg_offset(s
, a
->rd
),
2756 vec_full_reg_offset(s
, a
->rn
),
2757 vec_full_reg_offset(s
, a
->rm
),
2758 pred_full_reg_offset(s
, a
->pg
),
2759 vsz
, vsz
, a
->esz
, gen_helper_sve_splice
);
2765 *** SVE Integer Compare - Vectors Group
2768 static bool do_ppzz_flags(DisasContext
*s
, arg_rprr_esz
*a
,
2769 gen_helper_gvec_flags_4
*gen_fn
)
2771 TCGv_ptr pd
, zn
, zm
, pg
;
2775 if (gen_fn
== NULL
) {
2778 if (!sve_access_check(s
)) {
2782 vsz
= vec_full_reg_size(s
);
2783 t
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
2784 pd
= tcg_temp_new_ptr();
2785 zn
= tcg_temp_new_ptr();
2786 zm
= tcg_temp_new_ptr();
2787 pg
= tcg_temp_new_ptr();
2789 tcg_gen_addi_ptr(pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2790 tcg_gen_addi_ptr(zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
2791 tcg_gen_addi_ptr(zm
, cpu_env
, vec_full_reg_offset(s
, a
->rm
));
2792 tcg_gen_addi_ptr(pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2794 gen_fn(t
, pd
, zn
, zm
, pg
, t
);
2796 tcg_temp_free_ptr(pd
);
2797 tcg_temp_free_ptr(zn
);
2798 tcg_temp_free_ptr(zm
);
2799 tcg_temp_free_ptr(pg
);
2803 tcg_temp_free_i32(t
);
2807 #define DO_PPZZ(NAME, name) \
2808 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
2811 static gen_helper_gvec_flags_4 * const fns[4] = { \
2812 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2813 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2815 return do_ppzz_flags(s, a, fns[a->esz]); \
2818 DO_PPZZ(CMPEQ
, cmpeq
)
2819 DO_PPZZ(CMPNE
, cmpne
)
2820 DO_PPZZ(CMPGT
, cmpgt
)
2821 DO_PPZZ(CMPGE
, cmpge
)
2822 DO_PPZZ(CMPHI
, cmphi
)
2823 DO_PPZZ(CMPHS
, cmphs
)
2827 #define DO_PPZW(NAME, name) \
2828 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a, \
2831 static gen_helper_gvec_flags_4 * const fns[4] = { \
2832 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2833 gen_helper_sve_##name##_ppzw_s, NULL \
2835 return do_ppzz_flags(s, a, fns[a->esz]); \
2838 DO_PPZW(CMPEQ
, cmpeq
)
2839 DO_PPZW(CMPNE
, cmpne
)
2840 DO_PPZW(CMPGT
, cmpgt
)
2841 DO_PPZW(CMPGE
, cmpge
)
2842 DO_PPZW(CMPHI
, cmphi
)
2843 DO_PPZW(CMPHS
, cmphs
)
2844 DO_PPZW(CMPLT
, cmplt
)
2845 DO_PPZW(CMPLE
, cmple
)
2846 DO_PPZW(CMPLO
, cmplo
)
2847 DO_PPZW(CMPLS
, cmpls
)
2852 *** SVE Integer Compare - Immediate Groups
2855 static bool do_ppzi_flags(DisasContext
*s
, arg_rpri_esz
*a
,
2856 gen_helper_gvec_flags_3
*gen_fn
)
2858 TCGv_ptr pd
, zn
, pg
;
2862 if (gen_fn
== NULL
) {
2865 if (!sve_access_check(s
)) {
2869 vsz
= vec_full_reg_size(s
);
2870 t
= tcg_const_i32(simd_desc(vsz
, vsz
, a
->imm
));
2871 pd
= tcg_temp_new_ptr();
2872 zn
= tcg_temp_new_ptr();
2873 pg
= tcg_temp_new_ptr();
2875 tcg_gen_addi_ptr(pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2876 tcg_gen_addi_ptr(zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
2877 tcg_gen_addi_ptr(pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2879 gen_fn(t
, pd
, zn
, pg
, t
);
2881 tcg_temp_free_ptr(pd
);
2882 tcg_temp_free_ptr(zn
);
2883 tcg_temp_free_ptr(pg
);
2887 tcg_temp_free_i32(t
);
2891 #define DO_PPZI(NAME, name) \
2892 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a, \
2895 static gen_helper_gvec_flags_3 * const fns[4] = { \
2896 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2897 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2899 return do_ppzi_flags(s, a, fns[a->esz]); \
2902 DO_PPZI(CMPEQ
, cmpeq
)
2903 DO_PPZI(CMPNE
, cmpne
)
2904 DO_PPZI(CMPGT
, cmpgt
)
2905 DO_PPZI(CMPGE
, cmpge
)
2906 DO_PPZI(CMPHI
, cmphi
)
2907 DO_PPZI(CMPHS
, cmphs
)
2908 DO_PPZI(CMPLT
, cmplt
)
2909 DO_PPZI(CMPLE
, cmple
)
2910 DO_PPZI(CMPLO
, cmplo
)
2911 DO_PPZI(CMPLS
, cmpls
)
2916 *** SVE Partition Break Group
2919 static bool do_brk3(DisasContext
*s
, arg_rprr_s
*a
,
2920 gen_helper_gvec_4
*fn
, gen_helper_gvec_flags_4
*fn_s
)
2922 if (!sve_access_check(s
)) {
2926 unsigned vsz
= pred_full_reg_size(s
);
2928 /* Predicate sizes may be smaller and cannot use simd_desc. */
2929 TCGv_ptr d
= tcg_temp_new_ptr();
2930 TCGv_ptr n
= tcg_temp_new_ptr();
2931 TCGv_ptr m
= tcg_temp_new_ptr();
2932 TCGv_ptr g
= tcg_temp_new_ptr();
2933 TCGv_i32 t
= tcg_const_i32(vsz
- 2);
2935 tcg_gen_addi_ptr(d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2936 tcg_gen_addi_ptr(n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2937 tcg_gen_addi_ptr(m
, cpu_env
, pred_full_reg_offset(s
, a
->rm
));
2938 tcg_gen_addi_ptr(g
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2941 fn_s(t
, d
, n
, m
, g
, t
);
2946 tcg_temp_free_ptr(d
);
2947 tcg_temp_free_ptr(n
);
2948 tcg_temp_free_ptr(m
);
2949 tcg_temp_free_ptr(g
);
2950 tcg_temp_free_i32(t
);
2954 static bool do_brk2(DisasContext
*s
, arg_rpr_s
*a
,
2955 gen_helper_gvec_3
*fn
, gen_helper_gvec_flags_3
*fn_s
)
2957 if (!sve_access_check(s
)) {
2961 unsigned vsz
= pred_full_reg_size(s
);
2963 /* Predicate sizes may be smaller and cannot use simd_desc. */
2964 TCGv_ptr d
= tcg_temp_new_ptr();
2965 TCGv_ptr n
= tcg_temp_new_ptr();
2966 TCGv_ptr g
= tcg_temp_new_ptr();
2967 TCGv_i32 t
= tcg_const_i32(vsz
- 2);
2969 tcg_gen_addi_ptr(d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2970 tcg_gen_addi_ptr(n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2971 tcg_gen_addi_ptr(g
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2974 fn_s(t
, d
, n
, g
, t
);
2979 tcg_temp_free_ptr(d
);
2980 tcg_temp_free_ptr(n
);
2981 tcg_temp_free_ptr(g
);
2982 tcg_temp_free_i32(t
);
2986 static bool trans_BRKPA(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
2988 return do_brk3(s
, a
, gen_helper_sve_brkpa
, gen_helper_sve_brkpas
);
2991 static bool trans_BRKPB(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
2993 return do_brk3(s
, a
, gen_helper_sve_brkpb
, gen_helper_sve_brkpbs
);
2996 static bool trans_BRKA_m(DisasContext
*s
, arg_rpr_s
*a
, uint32_t insn
)
2998 return do_brk2(s
, a
, gen_helper_sve_brka_m
, gen_helper_sve_brkas_m
);
3001 static bool trans_BRKB_m(DisasContext
*s
, arg_rpr_s
*a
, uint32_t insn
)
3003 return do_brk2(s
, a
, gen_helper_sve_brkb_m
, gen_helper_sve_brkbs_m
);
3006 static bool trans_BRKA_z(DisasContext
*s
, arg_rpr_s
*a
, uint32_t insn
)
3008 return do_brk2(s
, a
, gen_helper_sve_brka_z
, gen_helper_sve_brkas_z
);
3011 static bool trans_BRKB_z(DisasContext
*s
, arg_rpr_s
*a
, uint32_t insn
)
3013 return do_brk2(s
, a
, gen_helper_sve_brkb_z
, gen_helper_sve_brkbs_z
);
3016 static bool trans_BRKN(DisasContext
*s
, arg_rpr_s
*a
, uint32_t insn
)
3018 return do_brk2(s
, a
, gen_helper_sve_brkn
, gen_helper_sve_brkns
);
3022 *** SVE Predicate Count Group
3025 static void do_cntp(DisasContext
*s
, TCGv_i64 val
, int esz
, int pn
, int pg
)
3027 unsigned psz
= pred_full_reg_size(s
);
3032 tcg_gen_ld_i64(val
, cpu_env
, pred_full_reg_offset(s
, pn
));
3034 TCGv_i64 g
= tcg_temp_new_i64();
3035 tcg_gen_ld_i64(g
, cpu_env
, pred_full_reg_offset(s
, pg
));
3036 tcg_gen_and_i64(val
, val
, g
);
3037 tcg_temp_free_i64(g
);
3040 /* Reduce the pred_esz_masks value simply to reduce the
3041 * size of the code generated here.
3043 psz_mask
= MAKE_64BIT_MASK(0, psz
* 8);
3044 tcg_gen_andi_i64(val
, val
, pred_esz_masks
[esz
] & psz_mask
);
3046 tcg_gen_ctpop_i64(val
, val
);
3048 TCGv_ptr t_pn
= tcg_temp_new_ptr();
3049 TCGv_ptr t_pg
= tcg_temp_new_ptr();
3054 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, esz
);
3056 tcg_gen_addi_ptr(t_pn
, cpu_env
, pred_full_reg_offset(s
, pn
));
3057 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
3058 t_desc
= tcg_const_i32(desc
);
3060 gen_helper_sve_cntp(val
, t_pn
, t_pg
, t_desc
);
3061 tcg_temp_free_ptr(t_pn
);
3062 tcg_temp_free_ptr(t_pg
);
3063 tcg_temp_free_i32(t_desc
);
3067 static bool trans_CNTP(DisasContext
*s
, arg_CNTP
*a
, uint32_t insn
)
3069 if (sve_access_check(s
)) {
3070 do_cntp(s
, cpu_reg(s
, a
->rd
), a
->esz
, a
->rn
, a
->pg
);
3075 static bool trans_INCDECP_r(DisasContext
*s
, arg_incdec_pred
*a
,
3078 if (sve_access_check(s
)) {
3079 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
3080 TCGv_i64 val
= tcg_temp_new_i64();
3082 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3084 tcg_gen_sub_i64(reg
, reg
, val
);
3086 tcg_gen_add_i64(reg
, reg
, val
);
3088 tcg_temp_free_i64(val
);
3093 static bool trans_INCDECP_z(DisasContext
*s
, arg_incdec2_pred
*a
,
3099 if (sve_access_check(s
)) {
3100 unsigned vsz
= vec_full_reg_size(s
);
3101 TCGv_i64 val
= tcg_temp_new_i64();
3102 GVecGen2sFn
*gvec_fn
= a
->d
? tcg_gen_gvec_subs
: tcg_gen_gvec_adds
;
3104 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3105 gvec_fn(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
3106 vec_full_reg_offset(s
, a
->rn
), val
, vsz
, vsz
);
3111 static bool trans_SINCDECP_r_32(DisasContext
*s
, arg_incdec_pred
*a
,
3114 if (sve_access_check(s
)) {
3115 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
3116 TCGv_i64 val
= tcg_temp_new_i64();
3118 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3119 do_sat_addsub_32(reg
, val
, a
->u
, a
->d
);
3124 static bool trans_SINCDECP_r_64(DisasContext
*s
, arg_incdec_pred
*a
,
3127 if (sve_access_check(s
)) {
3128 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
3129 TCGv_i64 val
= tcg_temp_new_i64();
3131 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3132 do_sat_addsub_64(reg
, val
, a
->u
, a
->d
);
3137 static bool trans_SINCDECP_z(DisasContext
*s
, arg_incdec2_pred
*a
,
3143 if (sve_access_check(s
)) {
3144 TCGv_i64 val
= tcg_temp_new_i64();
3145 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3146 do_sat_addsub_vec(s
, a
->esz
, a
->rd
, a
->rn
, val
, a
->u
, a
->d
);
3152 *** SVE Integer Compare Scalars Group
3155 static bool trans_CTERM(DisasContext
*s
, arg_CTERM
*a
, uint32_t insn
)
3157 if (!sve_access_check(s
)) {
3161 TCGCond cond
= (a
->ne
? TCG_COND_NE
: TCG_COND_EQ
);
3162 TCGv_i64 rn
= read_cpu_reg(s
, a
->rn
, a
->sf
);
3163 TCGv_i64 rm
= read_cpu_reg(s
, a
->rm
, a
->sf
);
3164 TCGv_i64 cmp
= tcg_temp_new_i64();
3166 tcg_gen_setcond_i64(cond
, cmp
, rn
, rm
);
3167 tcg_gen_extrl_i64_i32(cpu_NF
, cmp
);
3168 tcg_temp_free_i64(cmp
);
3170 /* VF = !NF & !CF. */
3171 tcg_gen_xori_i32(cpu_VF
, cpu_NF
, 1);
3172 tcg_gen_andc_i32(cpu_VF
, cpu_VF
, cpu_CF
);
3174 /* Both NF and VF actually look at bit 31. */
3175 tcg_gen_neg_i32(cpu_NF
, cpu_NF
);
3176 tcg_gen_neg_i32(cpu_VF
, cpu_VF
);
3180 static bool trans_WHILE(DisasContext
*s
, arg_WHILE
*a
, uint32_t insn
)
3182 if (!sve_access_check(s
)) {
3186 TCGv_i64 op0
= read_cpu_reg(s
, a
->rn
, 1);
3187 TCGv_i64 op1
= read_cpu_reg(s
, a
->rm
, 1);
3188 TCGv_i64 t0
= tcg_temp_new_i64();
3189 TCGv_i64 t1
= tcg_temp_new_i64();
3192 unsigned desc
, vsz
= vec_full_reg_size(s
);
3197 tcg_gen_ext32u_i64(op0
, op0
);
3198 tcg_gen_ext32u_i64(op1
, op1
);
3200 tcg_gen_ext32s_i64(op0
, op0
);
3201 tcg_gen_ext32s_i64(op1
, op1
);
3205 /* For the helper, compress the different conditions into a computation
3206 * of how many iterations for which the condition is true.
3208 * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
3209 * 2**64 iterations, overflowing to 0. Of course, predicate registers
3210 * aren't that large, so any value >= predicate size is sufficient.
3212 tcg_gen_sub_i64(t0
, op1
, op0
);
3214 /* t0 = MIN(op1 - op0, vsz). */
3215 tcg_gen_movi_i64(t1
, vsz
);
3216 tcg_gen_umin_i64(t0
, t0
, t1
);
3218 /* Equality means one more iteration. */
3219 tcg_gen_addi_i64(t0
, t0
, 1);
3222 /* t0 = (condition true ? t0 : 0). */
3224 ? (a
->eq
? TCG_COND_LEU
: TCG_COND_LTU
)
3225 : (a
->eq
? TCG_COND_LE
: TCG_COND_LT
));
3226 tcg_gen_movi_i64(t1
, 0);
3227 tcg_gen_movcond_i64(cond
, t0
, op0
, op1
, t0
, t1
);
3229 t2
= tcg_temp_new_i32();
3230 tcg_gen_extrl_i64_i32(t2
, t0
);
3231 tcg_temp_free_i64(t0
);
3232 tcg_temp_free_i64(t1
);
3234 desc
= (vsz
/ 8) - 2;
3235 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
3236 t3
= tcg_const_i32(desc
);
3238 ptr
= tcg_temp_new_ptr();
3239 tcg_gen_addi_ptr(ptr
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
3241 gen_helper_sve_while(t2
, ptr
, t2
, t3
);
3244 tcg_temp_free_ptr(ptr
);
3245 tcg_temp_free_i32(t2
);
3246 tcg_temp_free_i32(t3
);
3251 *** SVE Integer Wide Immediate - Unpredicated Group
3254 static bool trans_FDUP(DisasContext
*s
, arg_FDUP
*a
, uint32_t insn
)
3259 if (sve_access_check(s
)) {
3260 unsigned vsz
= vec_full_reg_size(s
);
3261 int dofs
= vec_full_reg_offset(s
, a
->rd
);
3264 /* Decode the VFP immediate. */
3265 imm
= vfp_expand_imm(a
->esz
, a
->imm
);
3266 imm
= dup_const(a
->esz
, imm
);
3268 tcg_gen_gvec_dup64i(dofs
, vsz
, vsz
, imm
);
3273 static bool trans_DUP_i(DisasContext
*s
, arg_DUP_i
*a
, uint32_t insn
)
3275 if (a
->esz
== 0 && extract32(insn
, 13, 1)) {
3278 if (sve_access_check(s
)) {
3279 unsigned vsz
= vec_full_reg_size(s
);
3280 int dofs
= vec_full_reg_offset(s
, a
->rd
);
3282 tcg_gen_gvec_dup64i(dofs
, vsz
, vsz
, dup_const(a
->esz
, a
->imm
));
3287 static bool trans_ADD_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
3289 if (a
->esz
== 0 && extract32(insn
, 13, 1)) {
3292 if (sve_access_check(s
)) {
3293 unsigned vsz
= vec_full_reg_size(s
);
3294 tcg_gen_gvec_addi(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
3295 vec_full_reg_offset(s
, a
->rn
), a
->imm
, vsz
, vsz
);
3300 static bool trans_SUB_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
3303 return trans_ADD_zzi(s
, a
, insn
);
3306 static bool trans_SUBR_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
3308 static const GVecGen2s op
[4] = {
3309 { .fni8
= tcg_gen_vec_sub8_i64
,
3310 .fniv
= tcg_gen_sub_vec
,
3311 .fno
= gen_helper_sve_subri_b
,
3312 .opc
= INDEX_op_sub_vec
,
3314 .scalar_first
= true },
3315 { .fni8
= tcg_gen_vec_sub16_i64
,
3316 .fniv
= tcg_gen_sub_vec
,
3317 .fno
= gen_helper_sve_subri_h
,
3318 .opc
= INDEX_op_sub_vec
,
3320 .scalar_first
= true },
3321 { .fni4
= tcg_gen_sub_i32
,
3322 .fniv
= tcg_gen_sub_vec
,
3323 .fno
= gen_helper_sve_subri_s
,
3324 .opc
= INDEX_op_sub_vec
,
3326 .scalar_first
= true },
3327 { .fni8
= tcg_gen_sub_i64
,
3328 .fniv
= tcg_gen_sub_vec
,
3329 .fno
= gen_helper_sve_subri_d
,
3330 .opc
= INDEX_op_sub_vec
,
3331 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
3333 .scalar_first
= true }
3336 if (a
->esz
== 0 && extract32(insn
, 13, 1)) {
3339 if (sve_access_check(s
)) {
3340 unsigned vsz
= vec_full_reg_size(s
);
3341 TCGv_i64 c
= tcg_const_i64(a
->imm
);
3342 tcg_gen_gvec_2s(vec_full_reg_offset(s
, a
->rd
),
3343 vec_full_reg_offset(s
, a
->rn
),
3344 vsz
, vsz
, c
, &op
[a
->esz
]);
3345 tcg_temp_free_i64(c
);
3350 static bool trans_MUL_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
3352 if (sve_access_check(s
)) {
3353 unsigned vsz
= vec_full_reg_size(s
);
3354 tcg_gen_gvec_muli(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
3355 vec_full_reg_offset(s
, a
->rn
), a
->imm
, vsz
, vsz
);
3360 static bool do_zzi_sat(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
,
3363 if (a
->esz
== 0 && extract32(insn
, 13, 1)) {
3366 if (sve_access_check(s
)) {
3367 TCGv_i64 val
= tcg_const_i64(a
->imm
);
3368 do_sat_addsub_vec(s
, a
->esz
, a
->rd
, a
->rn
, val
, u
, d
);
3369 tcg_temp_free_i64(val
);
3374 static bool trans_SQADD_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
3376 return do_zzi_sat(s
, a
, insn
, false, false);
3379 static bool trans_UQADD_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
3381 return do_zzi_sat(s
, a
, insn
, true, false);
3384 static bool trans_SQSUB_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
3386 return do_zzi_sat(s
, a
, insn
, false, true);
3389 static bool trans_UQSUB_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
3391 return do_zzi_sat(s
, a
, insn
, true, true);
3394 static bool do_zzi_ool(DisasContext
*s
, arg_rri_esz
*a
, gen_helper_gvec_2i
*fn
)
3396 if (sve_access_check(s
)) {
3397 unsigned vsz
= vec_full_reg_size(s
);
3398 TCGv_i64 c
= tcg_const_i64(a
->imm
);
3400 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s
, a
->rd
),
3401 vec_full_reg_offset(s
, a
->rn
),
3402 c
, vsz
, vsz
, 0, fn
);
3403 tcg_temp_free_i64(c
);
3408 #define DO_ZZI(NAME, name) \
3409 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a, \
3412 static gen_helper_gvec_2i * const fns[4] = { \
3413 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3414 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3416 return do_zzi_ool(s, a, fns[a->esz]); \
3427 *** SVE Floating Point Multiply-Add Indexed Group
3430 static bool trans_FMLA_zzxz(DisasContext
*s
, arg_FMLA_zzxz
*a
, uint32_t insn
)
3432 static gen_helper_gvec_4_ptr
* const fns
[3] = {
3433 gen_helper_gvec_fmla_idx_h
,
3434 gen_helper_gvec_fmla_idx_s
,
3435 gen_helper_gvec_fmla_idx_d
,
3438 if (sve_access_check(s
)) {
3439 unsigned vsz
= vec_full_reg_size(s
);
3440 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3441 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s
, a
->rd
),
3442 vec_full_reg_offset(s
, a
->rn
),
3443 vec_full_reg_offset(s
, a
->rm
),
3444 vec_full_reg_offset(s
, a
->ra
),
3445 status
, vsz
, vsz
, (a
->index
<< 1) | a
->sub
,
3447 tcg_temp_free_ptr(status
);
3453 *** SVE Floating Point Multiply Indexed Group
3456 static bool trans_FMUL_zzx(DisasContext
*s
, arg_FMUL_zzx
*a
, uint32_t insn
)
3458 static gen_helper_gvec_3_ptr
* const fns
[3] = {
3459 gen_helper_gvec_fmul_idx_h
,
3460 gen_helper_gvec_fmul_idx_s
,
3461 gen_helper_gvec_fmul_idx_d
,
3464 if (sve_access_check(s
)) {
3465 unsigned vsz
= vec_full_reg_size(s
);
3466 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3467 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, a
->rd
),
3468 vec_full_reg_offset(s
, a
->rn
),
3469 vec_full_reg_offset(s
, a
->rm
),
3470 status
, vsz
, vsz
, a
->index
, fns
[a
->esz
- 1]);
3471 tcg_temp_free_ptr(status
);
3477 *** SVE Floating Point Fast Reduction Group
3480 typedef void gen_helper_fp_reduce(TCGv_i64
, TCGv_ptr
, TCGv_ptr
,
3481 TCGv_ptr
, TCGv_i32
);
3483 static void do_reduce(DisasContext
*s
, arg_rpr_esz
*a
,
3484 gen_helper_fp_reduce
*fn
)
3486 unsigned vsz
= vec_full_reg_size(s
);
3487 unsigned p2vsz
= pow2ceil(vsz
);
3488 TCGv_i32 t_desc
= tcg_const_i32(simd_desc(vsz
, p2vsz
, 0));
3489 TCGv_ptr t_zn
, t_pg
, status
;
3492 temp
= tcg_temp_new_i64();
3493 t_zn
= tcg_temp_new_ptr();
3494 t_pg
= tcg_temp_new_ptr();
3496 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
3497 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
3498 status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3500 fn(temp
, t_zn
, t_pg
, status
, t_desc
);
3501 tcg_temp_free_ptr(t_zn
);
3502 tcg_temp_free_ptr(t_pg
);
3503 tcg_temp_free_ptr(status
);
3504 tcg_temp_free_i32(t_desc
);
3506 write_fp_dreg(s
, a
->rd
, temp
);
3507 tcg_temp_free_i64(temp
);
3510 #define DO_VPZ(NAME, name) \
3511 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
3513 static gen_helper_fp_reduce * const fns[3] = { \
3514 gen_helper_sve_##name##_h, \
3515 gen_helper_sve_##name##_s, \
3516 gen_helper_sve_##name##_d, \
3518 if (a->esz == 0) { \
3521 if (sve_access_check(s)) { \
3522 do_reduce(s, a, fns[a->esz - 1]); \
3527 DO_VPZ(FADDV
, faddv
)
3528 DO_VPZ(FMINNMV
, fminnmv
)
3529 DO_VPZ(FMAXNMV
, fmaxnmv
)
3530 DO_VPZ(FMINV
, fminv
)
3531 DO_VPZ(FMAXV
, fmaxv
)
3534 *** SVE Floating Point Unary Operations - Unpredicated Group
3537 static void do_zz_fp(DisasContext
*s
, arg_rr_esz
*a
, gen_helper_gvec_2_ptr
*fn
)
3539 unsigned vsz
= vec_full_reg_size(s
);
3540 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3542 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s
, a
->rd
),
3543 vec_full_reg_offset(s
, a
->rn
),
3544 status
, vsz
, vsz
, 0, fn
);
3545 tcg_temp_free_ptr(status
);
3548 static bool trans_FRECPE(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
3550 static gen_helper_gvec_2_ptr
* const fns
[3] = {
3551 gen_helper_gvec_frecpe_h
,
3552 gen_helper_gvec_frecpe_s
,
3553 gen_helper_gvec_frecpe_d
,
3558 if (sve_access_check(s
)) {
3559 do_zz_fp(s
, a
, fns
[a
->esz
- 1]);
3564 static bool trans_FRSQRTE(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
3566 static gen_helper_gvec_2_ptr
* const fns
[3] = {
3567 gen_helper_gvec_frsqrte_h
,
3568 gen_helper_gvec_frsqrte_s
,
3569 gen_helper_gvec_frsqrte_d
,
3574 if (sve_access_check(s
)) {
3575 do_zz_fp(s
, a
, fns
[a
->esz
- 1]);
3581 *** SVE Floating Point Compare with Zero Group
3584 static void do_ppz_fp(DisasContext
*s
, arg_rpr_esz
*a
,
3585 gen_helper_gvec_3_ptr
*fn
)
3587 unsigned vsz
= vec_full_reg_size(s
);
3588 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3590 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s
, a
->rd
),
3591 vec_full_reg_offset(s
, a
->rn
),
3592 pred_full_reg_offset(s
, a
->pg
),
3593 status
, vsz
, vsz
, 0, fn
);
3594 tcg_temp_free_ptr(status
);
3597 #define DO_PPZ(NAME, name) \
3598 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
3600 static gen_helper_gvec_3_ptr * const fns[3] = { \
3601 gen_helper_sve_##name##_h, \
3602 gen_helper_sve_##name##_s, \
3603 gen_helper_sve_##name##_d, \
3605 if (a->esz == 0) { \
3608 if (sve_access_check(s)) { \
3609 do_ppz_fp(s, a, fns[a->esz - 1]); \
3614 DO_PPZ(FCMGE_ppz0
, fcmge0
)
3615 DO_PPZ(FCMGT_ppz0
, fcmgt0
)
3616 DO_PPZ(FCMLE_ppz0
, fcmle0
)
3617 DO_PPZ(FCMLT_ppz0
, fcmlt0
)
3618 DO_PPZ(FCMEQ_ppz0
, fcmeq0
)
3619 DO_PPZ(FCMNE_ppz0
, fcmne0
)
3624 *** SVE floating-point trig multiply-add coefficient
3627 static bool trans_FTMAD(DisasContext
*s
, arg_FTMAD
*a
, uint32_t insn
)
3629 static gen_helper_gvec_3_ptr
* const fns
[3] = {
3630 gen_helper_sve_ftmad_h
,
3631 gen_helper_sve_ftmad_s
,
3632 gen_helper_sve_ftmad_d
,
3638 if (sve_access_check(s
)) {
3639 unsigned vsz
= vec_full_reg_size(s
);
3640 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3641 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, a
->rd
),
3642 vec_full_reg_offset(s
, a
->rn
),
3643 vec_full_reg_offset(s
, a
->rm
),
3644 status
, vsz
, vsz
, a
->imm
, fns
[a
->esz
- 1]);
3645 tcg_temp_free_ptr(status
);
3651 *** SVE Floating Point Accumulating Reduction Group
3654 static bool trans_FADDA(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
3656 typedef void fadda_fn(TCGv_i64
, TCGv_i64
, TCGv_ptr
,
3657 TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
3658 static fadda_fn
* const fns
[3] = {
3659 gen_helper_sve_fadda_h
,
3660 gen_helper_sve_fadda_s
,
3661 gen_helper_sve_fadda_d
,
3663 unsigned vsz
= vec_full_reg_size(s
);
3664 TCGv_ptr t_rm
, t_pg
, t_fpst
;
3671 if (!sve_access_check(s
)) {
3675 t_val
= load_esz(cpu_env
, vec_reg_offset(s
, a
->rn
, 0, a
->esz
), a
->esz
);
3676 t_rm
= tcg_temp_new_ptr();
3677 t_pg
= tcg_temp_new_ptr();
3678 tcg_gen_addi_ptr(t_rm
, cpu_env
, vec_full_reg_offset(s
, a
->rm
));
3679 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
3680 t_fpst
= get_fpstatus_ptr(a
->esz
== MO_16
);
3681 t_desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
3683 fns
[a
->esz
- 1](t_val
, t_val
, t_rm
, t_pg
, t_fpst
, t_desc
);
3685 tcg_temp_free_i32(t_desc
);
3686 tcg_temp_free_ptr(t_fpst
);
3687 tcg_temp_free_ptr(t_pg
);
3688 tcg_temp_free_ptr(t_rm
);
3690 write_fp_dreg(s
, a
->rd
, t_val
);
3691 tcg_temp_free_i64(t_val
);
3696 *** SVE Floating Point Arithmetic - Unpredicated Group
3699 static bool do_zzz_fp(DisasContext
*s
, arg_rrr_esz
*a
,
3700 gen_helper_gvec_3_ptr
*fn
)
3705 if (sve_access_check(s
)) {
3706 unsigned vsz
= vec_full_reg_size(s
);
3707 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3708 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, a
->rd
),
3709 vec_full_reg_offset(s
, a
->rn
),
3710 vec_full_reg_offset(s
, a
->rm
),
3711 status
, vsz
, vsz
, 0, fn
);
3712 tcg_temp_free_ptr(status
);
3718 #define DO_FP3(NAME, name) \
3719 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
3721 static gen_helper_gvec_3_ptr * const fns[4] = { \
3722 NULL, gen_helper_gvec_##name##_h, \
3723 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3725 return do_zzz_fp(s, a, fns[a->esz]); \
3728 DO_FP3(FADD_zzz
, fadd
)
3729 DO_FP3(FSUB_zzz
, fsub
)
3730 DO_FP3(FMUL_zzz
, fmul
)
3731 DO_FP3(FTSMUL
, ftsmul
)
3732 DO_FP3(FRECPS
, recps
)
3733 DO_FP3(FRSQRTS
, rsqrts
)
3738 *** SVE Floating Point Arithmetic - Predicated Group
3741 static bool do_zpzz_fp(DisasContext
*s
, arg_rprr_esz
*a
,
3742 gen_helper_gvec_4_ptr
*fn
)
3747 if (sve_access_check(s
)) {
3748 unsigned vsz
= vec_full_reg_size(s
);
3749 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3750 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s
, a
->rd
),
3751 vec_full_reg_offset(s
, a
->rn
),
3752 vec_full_reg_offset(s
, a
->rm
),
3753 pred_full_reg_offset(s
, a
->pg
),
3754 status
, vsz
, vsz
, 0, fn
);
3755 tcg_temp_free_ptr(status
);
3760 #define DO_FP3(NAME, name) \
3761 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a, uint32_t insn) \
3763 static gen_helper_gvec_4_ptr * const fns[4] = { \
3764 NULL, gen_helper_sve_##name##_h, \
3765 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3767 return do_zpzz_fp(s, a, fns[a->esz]); \
3770 DO_FP3(FADD_zpzz
, fadd
)
3771 DO_FP3(FSUB_zpzz
, fsub
)
3772 DO_FP3(FMUL_zpzz
, fmul
)
3773 DO_FP3(FMIN_zpzz
, fmin
)
3774 DO_FP3(FMAX_zpzz
, fmax
)
3775 DO_FP3(FMINNM_zpzz
, fminnum
)
3776 DO_FP3(FMAXNM_zpzz
, fmaxnum
)
3778 DO_FP3(FSCALE
, fscalbn
)
3780 DO_FP3(FMULX
, fmulx
)
3784 typedef void gen_helper_sve_fp2scalar(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
,
3785 TCGv_i64
, TCGv_ptr
, TCGv_i32
);
3787 static void do_fp_scalar(DisasContext
*s
, int zd
, int zn
, int pg
, bool is_fp16
,
3788 TCGv_i64 scalar
, gen_helper_sve_fp2scalar
*fn
)
3790 unsigned vsz
= vec_full_reg_size(s
);
3791 TCGv_ptr t_zd
, t_zn
, t_pg
, status
;
3794 t_zd
= tcg_temp_new_ptr();
3795 t_zn
= tcg_temp_new_ptr();
3796 t_pg
= tcg_temp_new_ptr();
3797 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, zd
));
3798 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, zn
));
3799 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
3801 status
= get_fpstatus_ptr(is_fp16
);
3802 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
3803 fn(t_zd
, t_zn
, t_pg
, scalar
, status
, desc
);
3805 tcg_temp_free_i32(desc
);
3806 tcg_temp_free_ptr(status
);
3807 tcg_temp_free_ptr(t_pg
);
3808 tcg_temp_free_ptr(t_zn
);
3809 tcg_temp_free_ptr(t_zd
);
3812 static void do_fp_imm(DisasContext
*s
, arg_rpri_esz
*a
, uint64_t imm
,
3813 gen_helper_sve_fp2scalar
*fn
)
3815 TCGv_i64 temp
= tcg_const_i64(imm
);
3816 do_fp_scalar(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
== MO_16
, temp
, fn
);
3817 tcg_temp_free_i64(temp
);
3820 #define DO_FP_IMM(NAME, name, const0, const1) \
3821 static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a, \
3824 static gen_helper_sve_fp2scalar * const fns[3] = { \
3825 gen_helper_sve_##name##_h, \
3826 gen_helper_sve_##name##_s, \
3827 gen_helper_sve_##name##_d \
3829 static uint64_t const val[3][2] = { \
3830 { float16_##const0, float16_##const1 }, \
3831 { float32_##const0, float32_##const1 }, \
3832 { float64_##const0, float64_##const1 }, \
3834 if (a->esz == 0) { \
3837 if (sve_access_check(s)) { \
3838 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
3843 #define float16_two make_float16(0x4000)
3844 #define float32_two make_float32(0x40000000)
3845 #define float64_two make_float64(0x4000000000000000ULL)
3847 DO_FP_IMM(FADD
, fadds
, half
, one
)
3848 DO_FP_IMM(FSUB
, fsubs
, half
, one
)
3849 DO_FP_IMM(FMUL
, fmuls
, half
, two
)
3850 DO_FP_IMM(FSUBR
, fsubrs
, half
, one
)
3851 DO_FP_IMM(FMAXNM
, fmaxnms
, zero
, one
)
3852 DO_FP_IMM(FMINNM
, fminnms
, zero
, one
)
3853 DO_FP_IMM(FMAX
, fmaxs
, zero
, one
)
3854 DO_FP_IMM(FMIN
, fmins
, zero
, one
)
3858 static bool do_fp_cmp(DisasContext
*s
, arg_rprr_esz
*a
,
3859 gen_helper_gvec_4_ptr
*fn
)
3864 if (sve_access_check(s
)) {
3865 unsigned vsz
= vec_full_reg_size(s
);
3866 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3867 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s
, a
->rd
),
3868 vec_full_reg_offset(s
, a
->rn
),
3869 vec_full_reg_offset(s
, a
->rm
),
3870 pred_full_reg_offset(s
, a
->pg
),
3871 status
, vsz
, vsz
, 0, fn
);
3872 tcg_temp_free_ptr(status
);
3877 #define DO_FPCMP(NAME, name) \
3878 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
3881 static gen_helper_gvec_4_ptr * const fns[4] = { \
3882 NULL, gen_helper_sve_##name##_h, \
3883 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3885 return do_fp_cmp(s, a, fns[a->esz]); \
3888 DO_FPCMP(FCMGE
, fcmge
)
3889 DO_FPCMP(FCMGT
, fcmgt
)
3890 DO_FPCMP(FCMEQ
, fcmeq
)
3891 DO_FPCMP(FCMNE
, fcmne
)
3892 DO_FPCMP(FCMUO
, fcmuo
)
3893 DO_FPCMP(FACGE
, facge
)
3894 DO_FPCMP(FACGT
, facgt
)
3898 static bool trans_FCADD(DisasContext
*s
, arg_FCADD
*a
, uint32_t insn
)
3900 static gen_helper_gvec_4_ptr
* const fns
[3] = {
3901 gen_helper_sve_fcadd_h
,
3902 gen_helper_sve_fcadd_s
,
3903 gen_helper_sve_fcadd_d
3909 if (sve_access_check(s
)) {
3910 unsigned vsz
= vec_full_reg_size(s
);
3911 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3912 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s
, a
->rd
),
3913 vec_full_reg_offset(s
, a
->rn
),
3914 vec_full_reg_offset(s
, a
->rm
),
3915 pred_full_reg_offset(s
, a
->pg
),
3916 status
, vsz
, vsz
, a
->rot
, fns
[a
->esz
- 1]);
3917 tcg_temp_free_ptr(status
);
3922 typedef void gen_helper_sve_fmla(TCGv_env
, TCGv_ptr
, TCGv_i32
);
3924 static bool do_fmla(DisasContext
*s
, arg_rprrr_esz
*a
, gen_helper_sve_fmla
*fn
)
3929 if (!sve_access_check(s
)) {
3933 unsigned vsz
= vec_full_reg_size(s
);
3936 TCGv_ptr pg
= tcg_temp_new_ptr();
3938 /* We would need 7 operands to pass these arguments "properly".
3939 * So we encode all the register numbers into the descriptor.
3941 desc
= deposit32(a
->rd
, 5, 5, a
->rn
);
3942 desc
= deposit32(desc
, 10, 5, a
->rm
);
3943 desc
= deposit32(desc
, 15, 5, a
->ra
);
3944 desc
= simd_desc(vsz
, vsz
, desc
);
3946 t_desc
= tcg_const_i32(desc
);
3947 tcg_gen_addi_ptr(pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
3948 fn(cpu_env
, pg
, t_desc
);
3949 tcg_temp_free_i32(t_desc
);
3950 tcg_temp_free_ptr(pg
);
3954 #define DO_FMLA(NAME, name) \
3955 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
3957 static gen_helper_sve_fmla * const fns[4] = { \
3958 NULL, gen_helper_sve_##name##_h, \
3959 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3961 return do_fmla(s, a, fns[a->esz]); \
3964 DO_FMLA(FMLA_zpzzz
, fmla_zpzzz
)
3965 DO_FMLA(FMLS_zpzzz
, fmls_zpzzz
)
3966 DO_FMLA(FNMLA_zpzzz
, fnmla_zpzzz
)
3967 DO_FMLA(FNMLS_zpzzz
, fnmls_zpzzz
)
3971 static bool trans_FCMLA_zpzzz(DisasContext
*s
,
3972 arg_FCMLA_zpzzz
*a
, uint32_t insn
)
3974 static gen_helper_sve_fmla
* const fns
[3] = {
3975 gen_helper_sve_fcmla_zpzzz_h
,
3976 gen_helper_sve_fcmla_zpzzz_s
,
3977 gen_helper_sve_fcmla_zpzzz_d
,
3983 if (sve_access_check(s
)) {
3984 unsigned vsz
= vec_full_reg_size(s
);
3987 TCGv_ptr pg
= tcg_temp_new_ptr();
3989 /* We would need 7 operands to pass these arguments "properly".
3990 * So we encode all the register numbers into the descriptor.
3992 desc
= deposit32(a
->rd
, 5, 5, a
->rn
);
3993 desc
= deposit32(desc
, 10, 5, a
->rm
);
3994 desc
= deposit32(desc
, 15, 5, a
->ra
);
3995 desc
= deposit32(desc
, 20, 2, a
->rot
);
3996 desc
= sextract32(desc
, 0, 22);
3997 desc
= simd_desc(vsz
, vsz
, desc
);
3999 t_desc
= tcg_const_i32(desc
);
4000 tcg_gen_addi_ptr(pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
4001 fns
[a
->esz
- 1](cpu_env
, pg
, t_desc
);
4002 tcg_temp_free_i32(t_desc
);
4003 tcg_temp_free_ptr(pg
);
4009 *** SVE Floating Point Unary Operations Predicated Group
4012 static bool do_zpz_ptr(DisasContext
*s
, int rd
, int rn
, int pg
,
4013 bool is_fp16
, gen_helper_gvec_3_ptr
*fn
)
4015 if (sve_access_check(s
)) {
4016 unsigned vsz
= vec_full_reg_size(s
);
4017 TCGv_ptr status
= get_fpstatus_ptr(is_fp16
);
4018 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, rd
),
4019 vec_full_reg_offset(s
, rn
),
4020 pred_full_reg_offset(s
, pg
),
4021 status
, vsz
, vsz
, 0, fn
);
4022 tcg_temp_free_ptr(status
);
4027 static bool trans_FCVT_sh(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4029 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvt_sh
);
4032 static bool trans_FCVT_hs(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4034 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_hs
);
4037 static bool trans_FCVT_dh(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4039 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvt_dh
);
4042 static bool trans_FCVT_hd(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4044 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_hd
);
4047 static bool trans_FCVT_ds(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4049 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_ds
);
4052 static bool trans_FCVT_sd(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4054 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_sd
);
4057 static bool trans_FCVTZS_hh(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4059 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzs_hh
);
4062 static bool trans_FCVTZU_hh(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4064 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzu_hh
);
4067 static bool trans_FCVTZS_hs(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4069 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzs_hs
);
4072 static bool trans_FCVTZU_hs(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4074 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzu_hs
);
4077 static bool trans_FCVTZS_hd(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4079 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzs_hd
);
4082 static bool trans_FCVTZU_hd(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4084 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzu_hd
);
4087 static bool trans_FCVTZS_ss(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4089 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzs_ss
);
4092 static bool trans_FCVTZU_ss(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4094 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzu_ss
);
4097 static bool trans_FCVTZS_sd(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4099 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzs_sd
);
4102 static bool trans_FCVTZU_sd(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4104 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzu_sd
);
4107 static bool trans_FCVTZS_ds(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4109 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzs_ds
);
4112 static bool trans_FCVTZU_ds(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4114 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzu_ds
);
4117 static bool trans_FCVTZS_dd(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4119 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzs_dd
);
4122 static bool trans_FCVTZU_dd(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4124 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzu_dd
);
4127 static gen_helper_gvec_3_ptr
* const frint_fns
[3] = {
4128 gen_helper_sve_frint_h
,
4129 gen_helper_sve_frint_s
,
4130 gen_helper_sve_frint_d
4133 static bool trans_FRINTI(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4138 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
== MO_16
,
4139 frint_fns
[a
->esz
- 1]);
4142 static bool trans_FRINTX(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4144 static gen_helper_gvec_3_ptr
* const fns
[3] = {
4145 gen_helper_sve_frintx_h
,
4146 gen_helper_sve_frintx_s
,
4147 gen_helper_sve_frintx_d
4152 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
== MO_16
, fns
[a
->esz
- 1]);
4155 static bool do_frint_mode(DisasContext
*s
, arg_rpr_esz
*a
, int mode
)
4160 if (sve_access_check(s
)) {
4161 unsigned vsz
= vec_full_reg_size(s
);
4162 TCGv_i32 tmode
= tcg_const_i32(mode
);
4163 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
4165 gen_helper_set_rmode(tmode
, tmode
, status
);
4167 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, a
->rd
),
4168 vec_full_reg_offset(s
, a
->rn
),
4169 pred_full_reg_offset(s
, a
->pg
),
4170 status
, vsz
, vsz
, 0, frint_fns
[a
->esz
- 1]);
4172 gen_helper_set_rmode(tmode
, tmode
, status
);
4173 tcg_temp_free_i32(tmode
);
4174 tcg_temp_free_ptr(status
);
4179 static bool trans_FRINTN(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4181 return do_frint_mode(s
, a
, float_round_nearest_even
);
4184 static bool trans_FRINTP(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4186 return do_frint_mode(s
, a
, float_round_up
);
4189 static bool trans_FRINTM(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4191 return do_frint_mode(s
, a
, float_round_down
);
4194 static bool trans_FRINTZ(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4196 return do_frint_mode(s
, a
, float_round_to_zero
);
4199 static bool trans_FRINTA(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4201 return do_frint_mode(s
, a
, float_round_ties_away
);
4204 static bool trans_FRECPX(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4206 static gen_helper_gvec_3_ptr
* const fns
[3] = {
4207 gen_helper_sve_frecpx_h
,
4208 gen_helper_sve_frecpx_s
,
4209 gen_helper_sve_frecpx_d
4214 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
== MO_16
, fns
[a
->esz
- 1]);
4217 static bool trans_FSQRT(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4219 static gen_helper_gvec_3_ptr
* const fns
[3] = {
4220 gen_helper_sve_fsqrt_h
,
4221 gen_helper_sve_fsqrt_s
,
4222 gen_helper_sve_fsqrt_d
4227 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
== MO_16
, fns
[a
->esz
- 1]);
4230 static bool trans_SCVTF_hh(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4232 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_scvt_hh
);
4235 static bool trans_SCVTF_sh(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4237 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_scvt_sh
);
4240 static bool trans_SCVTF_dh(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4242 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_scvt_dh
);
4245 static bool trans_SCVTF_ss(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4247 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_scvt_ss
);
4250 static bool trans_SCVTF_ds(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4252 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_scvt_ds
);
4255 static bool trans_SCVTF_sd(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4257 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_scvt_sd
);
4260 static bool trans_SCVTF_dd(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4262 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_scvt_dd
);
4265 static bool trans_UCVTF_hh(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4267 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_ucvt_hh
);
4270 static bool trans_UCVTF_sh(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4272 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_ucvt_sh
);
4275 static bool trans_UCVTF_dh(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4277 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_ucvt_dh
);
4280 static bool trans_UCVTF_ss(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4282 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_ucvt_ss
);
4285 static bool trans_UCVTF_ds(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4287 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_ucvt_ds
);
4290 static bool trans_UCVTF_sd(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4292 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_ucvt_sd
);
4295 static bool trans_UCVTF_dd(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
4297 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_ucvt_dd
);
4301 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4304 /* Subroutine loading a vector register at VOFS of LEN bytes.
4305 * The load should begin at the address Rn + IMM.
4308 static void do_ldr(DisasContext
*s
, uint32_t vofs
, uint32_t len
,
4311 uint32_t len_align
= QEMU_ALIGN_DOWN(len
, 8);
4312 uint32_t len_remain
= len
% 8;
4313 uint32_t nparts
= len
/ 8 + ctpop8(len_remain
);
4314 int midx
= get_mem_index(s
);
4315 TCGv_i64 addr
, t0
, t1
;
4317 addr
= tcg_temp_new_i64();
4318 t0
= tcg_temp_new_i64();
4320 /* Note that unpredicated load/store of vector/predicate registers
4321 * are defined as a stream of bytes, which equates to little-endian
4322 * operations on larger quantities. There is no nice way to force
4323 * a little-endian load for aarch64_be-linux-user out of line.
4325 * Attempt to keep code expansion to a minimum by limiting the
4326 * amount of unrolling done.
4331 for (i
= 0; i
< len_align
; i
+= 8) {
4332 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ i
);
4333 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LEQ
);
4334 tcg_gen_st_i64(t0
, cpu_env
, vofs
+ i
);
4337 TCGLabel
*loop
= gen_new_label();
4338 TCGv_ptr tp
, i
= tcg_const_local_ptr(0);
4340 gen_set_label(loop
);
4342 /* Minimize the number of local temps that must be re-read from
4343 * the stack each iteration. Instead, re-compute values other
4344 * than the loop counter.
4346 tp
= tcg_temp_new_ptr();
4347 tcg_gen_addi_ptr(tp
, i
, imm
);
4348 tcg_gen_extu_ptr_i64(addr
, tp
);
4349 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, rn
));
4351 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LEQ
);
4353 tcg_gen_add_ptr(tp
, cpu_env
, i
);
4354 tcg_gen_addi_ptr(i
, i
, 8);
4355 tcg_gen_st_i64(t0
, tp
, vofs
);
4356 tcg_temp_free_ptr(tp
);
4358 tcg_gen_brcondi_ptr(TCG_COND_LTU
, i
, len_align
, loop
);
4359 tcg_temp_free_ptr(i
);
4362 /* Predicate register loads can be any multiple of 2.
4363 * Note that we still store the entire 64-bit unit into cpu_env.
4366 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ len_align
);
4368 switch (len_remain
) {
4372 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LE
| ctz32(len_remain
));
4376 t1
= tcg_temp_new_i64();
4377 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LEUL
);
4378 tcg_gen_addi_i64(addr
, addr
, 4);
4379 tcg_gen_qemu_ld_i64(t1
, addr
, midx
, MO_LEUW
);
4380 tcg_gen_deposit_i64(t0
, t0
, t1
, 32, 32);
4381 tcg_temp_free_i64(t1
);
4385 g_assert_not_reached();
4387 tcg_gen_st_i64(t0
, cpu_env
, vofs
+ len_align
);
4389 tcg_temp_free_i64(addr
);
4390 tcg_temp_free_i64(t0
);
4393 /* Similarly for stores. */
4394 static void do_str(DisasContext
*s
, uint32_t vofs
, uint32_t len
,
4397 uint32_t len_align
= QEMU_ALIGN_DOWN(len
, 8);
4398 uint32_t len_remain
= len
% 8;
4399 uint32_t nparts
= len
/ 8 + ctpop8(len_remain
);
4400 int midx
= get_mem_index(s
);
4403 addr
= tcg_temp_new_i64();
4404 t0
= tcg_temp_new_i64();
4406 /* Note that unpredicated load/store of vector/predicate registers
4407 * are defined as a stream of bytes, which equates to little-endian
4408 * operations on larger quantities. There is no nice way to force
4409 * a little-endian store for aarch64_be-linux-user out of line.
4411 * Attempt to keep code expansion to a minimum by limiting the
4412 * amount of unrolling done.
4417 for (i
= 0; i
< len_align
; i
+= 8) {
4418 tcg_gen_ld_i64(t0
, cpu_env
, vofs
+ i
);
4419 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ i
);
4420 tcg_gen_qemu_st_i64(t0
, addr
, midx
, MO_LEQ
);
4423 TCGLabel
*loop
= gen_new_label();
4424 TCGv_ptr t2
, i
= tcg_const_local_ptr(0);
4426 gen_set_label(loop
);
4428 t2
= tcg_temp_new_ptr();
4429 tcg_gen_add_ptr(t2
, cpu_env
, i
);
4430 tcg_gen_ld_i64(t0
, t2
, vofs
);
4432 /* Minimize the number of local temps that must be re-read from
4433 * the stack each iteration. Instead, re-compute values other
4434 * than the loop counter.
4436 tcg_gen_addi_ptr(t2
, i
, imm
);
4437 tcg_gen_extu_ptr_i64(addr
, t2
);
4438 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, rn
));
4439 tcg_temp_free_ptr(t2
);
4441 tcg_gen_qemu_st_i64(t0
, addr
, midx
, MO_LEQ
);
4443 tcg_gen_addi_ptr(i
, i
, 8);
4445 tcg_gen_brcondi_ptr(TCG_COND_LTU
, i
, len_align
, loop
);
4446 tcg_temp_free_ptr(i
);
4449 /* Predicate register stores can be any multiple of 2. */
4451 tcg_gen_ld_i64(t0
, cpu_env
, vofs
+ len_align
);
4452 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ len_align
);
4454 switch (len_remain
) {
4458 tcg_gen_qemu_st_i64(t0
, addr
, midx
, MO_LE
| ctz32(len_remain
));
4462 tcg_gen_qemu_st_i64(t0
, addr
, midx
, MO_LEUL
);
4463 tcg_gen_addi_i64(addr
, addr
, 4);
4464 tcg_gen_shri_i64(t0
, t0
, 32);
4465 tcg_gen_qemu_st_i64(t0
, addr
, midx
, MO_LEUW
);
4469 g_assert_not_reached();
4472 tcg_temp_free_i64(addr
);
4473 tcg_temp_free_i64(t0
);
4476 static bool trans_LDR_zri(DisasContext
*s
, arg_rri
*a
, uint32_t insn
)
4478 if (sve_access_check(s
)) {
4479 int size
= vec_full_reg_size(s
);
4480 int off
= vec_full_reg_offset(s
, a
->rd
);
4481 do_ldr(s
, off
, size
, a
->rn
, a
->imm
* size
);
4486 static bool trans_LDR_pri(DisasContext
*s
, arg_rri
*a
, uint32_t insn
)
4488 if (sve_access_check(s
)) {
4489 int size
= pred_full_reg_size(s
);
4490 int off
= pred_full_reg_offset(s
, a
->rd
);
4491 do_ldr(s
, off
, size
, a
->rn
, a
->imm
* size
);
4496 static bool trans_STR_zri(DisasContext
*s
, arg_rri
*a
, uint32_t insn
)
4498 if (sve_access_check(s
)) {
4499 int size
= vec_full_reg_size(s
);
4500 int off
= vec_full_reg_offset(s
, a
->rd
);
4501 do_str(s
, off
, size
, a
->rn
, a
->imm
* size
);
4506 static bool trans_STR_pri(DisasContext
*s
, arg_rri
*a
, uint32_t insn
)
4508 if (sve_access_check(s
)) {
4509 int size
= pred_full_reg_size(s
);
4510 int off
= pred_full_reg_offset(s
, a
->rd
);
4511 do_str(s
, off
, size
, a
->rn
, a
->imm
* size
);
4517 *** SVE Memory - Contiguous Load Group
4520 /* The memory mode of the dtype. */
4521 static const TCGMemOp dtype_mop
[16] = {
4522 MO_UB
, MO_UB
, MO_UB
, MO_UB
,
4523 MO_SL
, MO_UW
, MO_UW
, MO_UW
,
4524 MO_SW
, MO_SW
, MO_UL
, MO_UL
,
4525 MO_SB
, MO_SB
, MO_SB
, MO_Q
4528 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4530 /* The vector element size of dtype. */
4531 static const uint8_t dtype_esz
[16] = {
4538 static void do_mem_zpa(DisasContext
*s
, int zt
, int pg
, TCGv_i64 addr
,
4539 gen_helper_gvec_mem
*fn
)
4541 unsigned vsz
= vec_full_reg_size(s
);
4545 /* For e.g. LD4, there are not enough arguments to pass all 4
4546 * registers as pointers, so encode the regno into the data field.
4547 * For consistency, do this even for LD1.
4549 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, zt
));
4550 t_pg
= tcg_temp_new_ptr();
4552 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
4553 fn(cpu_env
, t_pg
, addr
, desc
);
4555 tcg_temp_free_ptr(t_pg
);
4556 tcg_temp_free_i32(desc
);
4559 static void do_ld_zpa(DisasContext
*s
, int zt
, int pg
,
4560 TCGv_i64 addr
, int dtype
, int nreg
)
4562 static gen_helper_gvec_mem
* const fns
[16][4] = {
4563 { gen_helper_sve_ld1bb_r
, gen_helper_sve_ld2bb_r
,
4564 gen_helper_sve_ld3bb_r
, gen_helper_sve_ld4bb_r
},
4565 { gen_helper_sve_ld1bhu_r
, NULL
, NULL
, NULL
},
4566 { gen_helper_sve_ld1bsu_r
, NULL
, NULL
, NULL
},
4567 { gen_helper_sve_ld1bdu_r
, NULL
, NULL
, NULL
},
4569 { gen_helper_sve_ld1sds_r
, NULL
, NULL
, NULL
},
4570 { gen_helper_sve_ld1hh_r
, gen_helper_sve_ld2hh_r
,
4571 gen_helper_sve_ld3hh_r
, gen_helper_sve_ld4hh_r
},
4572 { gen_helper_sve_ld1hsu_r
, NULL
, NULL
, NULL
},
4573 { gen_helper_sve_ld1hdu_r
, NULL
, NULL
, NULL
},
4575 { gen_helper_sve_ld1hds_r
, NULL
, NULL
, NULL
},
4576 { gen_helper_sve_ld1hss_r
, NULL
, NULL
, NULL
},
4577 { gen_helper_sve_ld1ss_r
, gen_helper_sve_ld2ss_r
,
4578 gen_helper_sve_ld3ss_r
, gen_helper_sve_ld4ss_r
},
4579 { gen_helper_sve_ld1sdu_r
, NULL
, NULL
, NULL
},
4581 { gen_helper_sve_ld1bds_r
, NULL
, NULL
, NULL
},
4582 { gen_helper_sve_ld1bss_r
, NULL
, NULL
, NULL
},
4583 { gen_helper_sve_ld1bhs_r
, NULL
, NULL
, NULL
},
4584 { gen_helper_sve_ld1dd_r
, gen_helper_sve_ld2dd_r
,
4585 gen_helper_sve_ld3dd_r
, gen_helper_sve_ld4dd_r
},
4587 gen_helper_gvec_mem
*fn
= fns
[dtype
][nreg
];
4589 /* While there are holes in the table, they are not
4590 * accessible via the instruction encoding.
4593 do_mem_zpa(s
, zt
, pg
, addr
, fn
);
4596 static bool trans_LD_zprr(DisasContext
*s
, arg_rprr_load
*a
, uint32_t insn
)
4601 if (sve_access_check(s
)) {
4602 TCGv_i64 addr
= new_tmp_a64(s
);
4603 tcg_gen_muli_i64(addr
, cpu_reg(s
, a
->rm
),
4604 (a
->nreg
+ 1) << dtype_msz(a
->dtype
));
4605 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
4606 do_ld_zpa(s
, a
->rd
, a
->pg
, addr
, a
->dtype
, a
->nreg
);
4611 static bool trans_LD_zpri(DisasContext
*s
, arg_rpri_load
*a
, uint32_t insn
)
4613 if (sve_access_check(s
)) {
4614 int vsz
= vec_full_reg_size(s
);
4615 int elements
= vsz
>> dtype_esz
[a
->dtype
];
4616 TCGv_i64 addr
= new_tmp_a64(s
);
4618 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
),
4619 (a
->imm
* elements
* (a
->nreg
+ 1))
4620 << dtype_msz(a
->dtype
));
4621 do_ld_zpa(s
, a
->rd
, a
->pg
, addr
, a
->dtype
, a
->nreg
);
4626 static bool trans_LDFF1_zprr(DisasContext
*s
, arg_rprr_load
*a
, uint32_t insn
)
4628 static gen_helper_gvec_mem
* const fns
[16] = {
4629 gen_helper_sve_ldff1bb_r
,
4630 gen_helper_sve_ldff1bhu_r
,
4631 gen_helper_sve_ldff1bsu_r
,
4632 gen_helper_sve_ldff1bdu_r
,
4634 gen_helper_sve_ldff1sds_r
,
4635 gen_helper_sve_ldff1hh_r
,
4636 gen_helper_sve_ldff1hsu_r
,
4637 gen_helper_sve_ldff1hdu_r
,
4639 gen_helper_sve_ldff1hds_r
,
4640 gen_helper_sve_ldff1hss_r
,
4641 gen_helper_sve_ldff1ss_r
,
4642 gen_helper_sve_ldff1sdu_r
,
4644 gen_helper_sve_ldff1bds_r
,
4645 gen_helper_sve_ldff1bss_r
,
4646 gen_helper_sve_ldff1bhs_r
,
4647 gen_helper_sve_ldff1dd_r
,
4650 if (sve_access_check(s
)) {
4651 TCGv_i64 addr
= new_tmp_a64(s
);
4652 tcg_gen_shli_i64(addr
, cpu_reg(s
, a
->rm
), dtype_msz(a
->dtype
));
4653 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
4654 do_mem_zpa(s
, a
->rd
, a
->pg
, addr
, fns
[a
->dtype
]);
4659 static bool trans_LDNF1_zpri(DisasContext
*s
, arg_rpri_load
*a
, uint32_t insn
)
4661 static gen_helper_gvec_mem
* const fns
[16] = {
4662 gen_helper_sve_ldnf1bb_r
,
4663 gen_helper_sve_ldnf1bhu_r
,
4664 gen_helper_sve_ldnf1bsu_r
,
4665 gen_helper_sve_ldnf1bdu_r
,
4667 gen_helper_sve_ldnf1sds_r
,
4668 gen_helper_sve_ldnf1hh_r
,
4669 gen_helper_sve_ldnf1hsu_r
,
4670 gen_helper_sve_ldnf1hdu_r
,
4672 gen_helper_sve_ldnf1hds_r
,
4673 gen_helper_sve_ldnf1hss_r
,
4674 gen_helper_sve_ldnf1ss_r
,
4675 gen_helper_sve_ldnf1sdu_r
,
4677 gen_helper_sve_ldnf1bds_r
,
4678 gen_helper_sve_ldnf1bss_r
,
4679 gen_helper_sve_ldnf1bhs_r
,
4680 gen_helper_sve_ldnf1dd_r
,
4683 if (sve_access_check(s
)) {
4684 int vsz
= vec_full_reg_size(s
);
4685 int elements
= vsz
>> dtype_esz
[a
->dtype
];
4686 int off
= (a
->imm
* elements
) << dtype_msz(a
->dtype
);
4687 TCGv_i64 addr
= new_tmp_a64(s
);
4689 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
), off
);
4690 do_mem_zpa(s
, a
->rd
, a
->pg
, addr
, fns
[a
->dtype
]);
4695 static void do_ldrq(DisasContext
*s
, int zt
, int pg
, TCGv_i64 addr
, int msz
)
4697 static gen_helper_gvec_mem
* const fns
[4] = {
4698 gen_helper_sve_ld1bb_r
, gen_helper_sve_ld1hh_r
,
4699 gen_helper_sve_ld1ss_r
, gen_helper_sve_ld1dd_r
,
4701 unsigned vsz
= vec_full_reg_size(s
);
4705 /* Load the first quadword using the normal predicated load helpers. */
4706 desc
= tcg_const_i32(simd_desc(16, 16, zt
));
4707 t_pg
= tcg_temp_new_ptr();
4709 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
4710 fns
[msz
](cpu_env
, t_pg
, addr
, desc
);
4712 tcg_temp_free_ptr(t_pg
);
4713 tcg_temp_free_i32(desc
);
4715 /* Replicate that first quadword. */
4717 unsigned dofs
= vec_full_reg_offset(s
, zt
);
4718 tcg_gen_gvec_dup_mem(4, dofs
+ 16, dofs
, vsz
- 16, vsz
- 16);
4722 static bool trans_LD1RQ_zprr(DisasContext
*s
, arg_rprr_load
*a
, uint32_t insn
)
4727 if (sve_access_check(s
)) {
4728 int msz
= dtype_msz(a
->dtype
);
4729 TCGv_i64 addr
= new_tmp_a64(s
);
4730 tcg_gen_shli_i64(addr
, cpu_reg(s
, a
->rm
), msz
);
4731 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
4732 do_ldrq(s
, a
->rd
, a
->pg
, addr
, msz
);
4737 static bool trans_LD1RQ_zpri(DisasContext
*s
, arg_rpri_load
*a
, uint32_t insn
)
4739 if (sve_access_check(s
)) {
4740 TCGv_i64 addr
= new_tmp_a64(s
);
4741 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
), a
->imm
* 16);
4742 do_ldrq(s
, a
->rd
, a
->pg
, addr
, dtype_msz(a
->dtype
));
4747 /* Load and broadcast element. */
4748 static bool trans_LD1R_zpri(DisasContext
*s
, arg_rpri_load
*a
, uint32_t insn
)
4750 if (!sve_access_check(s
)) {
4754 unsigned vsz
= vec_full_reg_size(s
);
4755 unsigned psz
= pred_full_reg_size(s
);
4756 unsigned esz
= dtype_esz
[a
->dtype
];
4757 TCGLabel
*over
= gen_new_label();
4760 /* If the guarding predicate has no bits set, no load occurs. */
4762 /* Reduce the pred_esz_masks value simply to reduce the
4763 * size of the code generated here.
4765 uint64_t psz_mask
= MAKE_64BIT_MASK(0, psz
* 8);
4766 temp
= tcg_temp_new_i64();
4767 tcg_gen_ld_i64(temp
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
4768 tcg_gen_andi_i64(temp
, temp
, pred_esz_masks
[esz
] & psz_mask
);
4769 tcg_gen_brcondi_i64(TCG_COND_EQ
, temp
, 0, over
);
4770 tcg_temp_free_i64(temp
);
4772 TCGv_i32 t32
= tcg_temp_new_i32();
4773 find_last_active(s
, t32
, esz
, a
->pg
);
4774 tcg_gen_brcondi_i32(TCG_COND_LT
, t32
, 0, over
);
4775 tcg_temp_free_i32(t32
);
4778 /* Load the data. */
4779 temp
= tcg_temp_new_i64();
4780 tcg_gen_addi_i64(temp
, cpu_reg_sp(s
, a
->rn
), a
->imm
<< esz
);
4781 tcg_gen_qemu_ld_i64(temp
, temp
, get_mem_index(s
),
4782 s
->be_data
| dtype_mop
[a
->dtype
]);
4784 /* Broadcast to *all* elements. */
4785 tcg_gen_gvec_dup_i64(esz
, vec_full_reg_offset(s
, a
->rd
),
4787 tcg_temp_free_i64(temp
);
4789 /* Zero the inactive elements. */
4790 gen_set_label(over
);
4791 do_movz_zpz(s
, a
->rd
, a
->rd
, a
->pg
, esz
);
4795 static void do_st_zpa(DisasContext
*s
, int zt
, int pg
, TCGv_i64 addr
,
4796 int msz
, int esz
, int nreg
)
4798 static gen_helper_gvec_mem
* const fn_single
[4][4] = {
4799 { gen_helper_sve_st1bb_r
, gen_helper_sve_st1bh_r
,
4800 gen_helper_sve_st1bs_r
, gen_helper_sve_st1bd_r
},
4801 { NULL
, gen_helper_sve_st1hh_r
,
4802 gen_helper_sve_st1hs_r
, gen_helper_sve_st1hd_r
},
4804 gen_helper_sve_st1ss_r
, gen_helper_sve_st1sd_r
},
4805 { NULL
, NULL
, NULL
, gen_helper_sve_st1dd_r
},
4807 static gen_helper_gvec_mem
* const fn_multiple
[3][4] = {
4808 { gen_helper_sve_st2bb_r
, gen_helper_sve_st2hh_r
,
4809 gen_helper_sve_st2ss_r
, gen_helper_sve_st2dd_r
},
4810 { gen_helper_sve_st3bb_r
, gen_helper_sve_st3hh_r
,
4811 gen_helper_sve_st3ss_r
, gen_helper_sve_st3dd_r
},
4812 { gen_helper_sve_st4bb_r
, gen_helper_sve_st4hh_r
,
4813 gen_helper_sve_st4ss_r
, gen_helper_sve_st4dd_r
},
4815 gen_helper_gvec_mem
*fn
;
4819 fn
= fn_single
[msz
][esz
];
4821 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
4823 fn
= fn_multiple
[nreg
- 1][msz
];
4826 do_mem_zpa(s
, zt
, pg
, addr
, fn
);
4829 static bool trans_ST_zprr(DisasContext
*s
, arg_rprr_store
*a
, uint32_t insn
)
4831 if (a
->rm
== 31 || a
->msz
> a
->esz
) {
4834 if (sve_access_check(s
)) {
4835 TCGv_i64 addr
= new_tmp_a64(s
);
4836 tcg_gen_muli_i64(addr
, cpu_reg(s
, a
->rm
), (a
->nreg
+ 1) << a
->msz
);
4837 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
4838 do_st_zpa(s
, a
->rd
, a
->pg
, addr
, a
->msz
, a
->esz
, a
->nreg
);
4843 static bool trans_ST_zpri(DisasContext
*s
, arg_rpri_store
*a
, uint32_t insn
)
4845 if (a
->msz
> a
->esz
) {
4848 if (sve_access_check(s
)) {
4849 int vsz
= vec_full_reg_size(s
);
4850 int elements
= vsz
>> a
->esz
;
4851 TCGv_i64 addr
= new_tmp_a64(s
);
4853 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
),
4854 (a
->imm
* elements
* (a
->nreg
+ 1)) << a
->msz
);
4855 do_st_zpa(s
, a
->rd
, a
->pg
, addr
, a
->msz
, a
->esz
, a
->nreg
);
4861 *** SVE gather loads / scatter stores
4864 static void do_mem_zpz(DisasContext
*s
, int zt
, int pg
, int zm
, int scale
,
4865 TCGv_i64 scalar
, gen_helper_gvec_mem_scatter
*fn
)
4867 unsigned vsz
= vec_full_reg_size(s
);
4868 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, scale
));
4869 TCGv_ptr t_zm
= tcg_temp_new_ptr();
4870 TCGv_ptr t_pg
= tcg_temp_new_ptr();
4871 TCGv_ptr t_zt
= tcg_temp_new_ptr();
4873 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
4874 tcg_gen_addi_ptr(t_zm
, cpu_env
, vec_full_reg_offset(s
, zm
));
4875 tcg_gen_addi_ptr(t_zt
, cpu_env
, vec_full_reg_offset(s
, zt
));
4876 fn(cpu_env
, t_zt
, t_pg
, t_zm
, scalar
, desc
);
4878 tcg_temp_free_ptr(t_zt
);
4879 tcg_temp_free_ptr(t_zm
);
4880 tcg_temp_free_ptr(t_pg
);
4881 tcg_temp_free_i32(desc
);
4884 /* Indexed by [ff][xs][u][msz]. */
4885 static gen_helper_gvec_mem_scatter
* const gather_load_fn32
[2][2][2][3] = {
4886 { { { gen_helper_sve_ldbss_zsu
,
4887 gen_helper_sve_ldhss_zsu
,
4889 { gen_helper_sve_ldbsu_zsu
,
4890 gen_helper_sve_ldhsu_zsu
,
4891 gen_helper_sve_ldssu_zsu
, } },
4892 { { gen_helper_sve_ldbss_zss
,
4893 gen_helper_sve_ldhss_zss
,
4895 { gen_helper_sve_ldbsu_zss
,
4896 gen_helper_sve_ldhsu_zss
,
4897 gen_helper_sve_ldssu_zss
, } } },
4899 { { { gen_helper_sve_ldffbss_zsu
,
4900 gen_helper_sve_ldffhss_zsu
,
4902 { gen_helper_sve_ldffbsu_zsu
,
4903 gen_helper_sve_ldffhsu_zsu
,
4904 gen_helper_sve_ldffssu_zsu
, } },
4905 { { gen_helper_sve_ldffbss_zss
,
4906 gen_helper_sve_ldffhss_zss
,
4908 { gen_helper_sve_ldffbsu_zss
,
4909 gen_helper_sve_ldffhsu_zss
,
4910 gen_helper_sve_ldffssu_zss
, } } }
4913 /* Note that we overload xs=2 to indicate 64-bit offset. */
4914 static gen_helper_gvec_mem_scatter
* const gather_load_fn64
[2][3][2][4] = {
4915 { { { gen_helper_sve_ldbds_zsu
,
4916 gen_helper_sve_ldhds_zsu
,
4917 gen_helper_sve_ldsds_zsu
,
4919 { gen_helper_sve_ldbdu_zsu
,
4920 gen_helper_sve_ldhdu_zsu
,
4921 gen_helper_sve_ldsdu_zsu
,
4922 gen_helper_sve_ldddu_zsu
, } },
4923 { { gen_helper_sve_ldbds_zss
,
4924 gen_helper_sve_ldhds_zss
,
4925 gen_helper_sve_ldsds_zss
,
4927 { gen_helper_sve_ldbdu_zss
,
4928 gen_helper_sve_ldhdu_zss
,
4929 gen_helper_sve_ldsdu_zss
,
4930 gen_helper_sve_ldddu_zss
, } },
4931 { { gen_helper_sve_ldbds_zd
,
4932 gen_helper_sve_ldhds_zd
,
4933 gen_helper_sve_ldsds_zd
,
4935 { gen_helper_sve_ldbdu_zd
,
4936 gen_helper_sve_ldhdu_zd
,
4937 gen_helper_sve_ldsdu_zd
,
4938 gen_helper_sve_ldddu_zd
, } } },
4940 { { { gen_helper_sve_ldffbds_zsu
,
4941 gen_helper_sve_ldffhds_zsu
,
4942 gen_helper_sve_ldffsds_zsu
,
4944 { gen_helper_sve_ldffbdu_zsu
,
4945 gen_helper_sve_ldffhdu_zsu
,
4946 gen_helper_sve_ldffsdu_zsu
,
4947 gen_helper_sve_ldffddu_zsu
, } },
4948 { { gen_helper_sve_ldffbds_zss
,
4949 gen_helper_sve_ldffhds_zss
,
4950 gen_helper_sve_ldffsds_zss
,
4952 { gen_helper_sve_ldffbdu_zss
,
4953 gen_helper_sve_ldffhdu_zss
,
4954 gen_helper_sve_ldffsdu_zss
,
4955 gen_helper_sve_ldffddu_zss
, } },
4956 { { gen_helper_sve_ldffbds_zd
,
4957 gen_helper_sve_ldffhds_zd
,
4958 gen_helper_sve_ldffsds_zd
,
4960 { gen_helper_sve_ldffbdu_zd
,
4961 gen_helper_sve_ldffhdu_zd
,
4962 gen_helper_sve_ldffsdu_zd
,
4963 gen_helper_sve_ldffddu_zd
, } } }
4966 static bool trans_LD1_zprz(DisasContext
*s
, arg_LD1_zprz
*a
, uint32_t insn
)
4968 gen_helper_gvec_mem_scatter
*fn
= NULL
;
4970 if (!sve_access_check(s
)) {
4976 fn
= gather_load_fn32
[a
->ff
][a
->xs
][a
->u
][a
->msz
];
4979 fn
= gather_load_fn64
[a
->ff
][a
->xs
][a
->u
][a
->msz
];
4984 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rm
, a
->scale
* a
->msz
,
4985 cpu_reg_sp(s
, a
->rn
), fn
);
4989 static bool trans_LD1_zpiz(DisasContext
*s
, arg_LD1_zpiz
*a
, uint32_t insn
)
4991 gen_helper_gvec_mem_scatter
*fn
= NULL
;
4994 if (a
->esz
< a
->msz
|| (a
->esz
== a
->msz
&& !a
->u
)) {
4997 if (!sve_access_check(s
)) {
5003 fn
= gather_load_fn32
[a
->ff
][0][a
->u
][a
->msz
];
5006 fn
= gather_load_fn64
[a
->ff
][2][a
->u
][a
->msz
];
5011 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5012 * by loading the immediate into the scalar parameter.
5014 imm
= tcg_const_i64(a
->imm
<< a
->msz
);
5015 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rn
, 0, imm
, fn
);
5016 tcg_temp_free_i64(imm
);
5020 /* Indexed by [xs][msz]. */
5021 static gen_helper_gvec_mem_scatter
* const scatter_store_fn32
[2][3] = {
5022 { gen_helper_sve_stbs_zsu
,
5023 gen_helper_sve_sths_zsu
,
5024 gen_helper_sve_stss_zsu
, },
5025 { gen_helper_sve_stbs_zss
,
5026 gen_helper_sve_sths_zss
,
5027 gen_helper_sve_stss_zss
, },
5030 /* Note that we overload xs=2 to indicate 64-bit offset. */
5031 static gen_helper_gvec_mem_scatter
* const scatter_store_fn64
[3][4] = {
5032 { gen_helper_sve_stbd_zsu
,
5033 gen_helper_sve_sthd_zsu
,
5034 gen_helper_sve_stsd_zsu
,
5035 gen_helper_sve_stdd_zsu
, },
5036 { gen_helper_sve_stbd_zss
,
5037 gen_helper_sve_sthd_zss
,
5038 gen_helper_sve_stsd_zss
,
5039 gen_helper_sve_stdd_zss
, },
5040 { gen_helper_sve_stbd_zd
,
5041 gen_helper_sve_sthd_zd
,
5042 gen_helper_sve_stsd_zd
,
5043 gen_helper_sve_stdd_zd
, },
5046 static bool trans_ST1_zprz(DisasContext
*s
, arg_ST1_zprz
*a
, uint32_t insn
)
5048 gen_helper_gvec_mem_scatter
*fn
;
5050 if (a
->esz
< a
->msz
|| (a
->msz
== 0 && a
->scale
)) {
5053 if (!sve_access_check(s
)) {
5058 fn
= scatter_store_fn32
[a
->xs
][a
->msz
];
5061 fn
= scatter_store_fn64
[a
->xs
][a
->msz
];
5064 g_assert_not_reached();
5066 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rm
, a
->scale
* a
->msz
,
5067 cpu_reg_sp(s
, a
->rn
), fn
);
5071 static bool trans_ST1_zpiz(DisasContext
*s
, arg_ST1_zpiz
*a
, uint32_t insn
)
5073 gen_helper_gvec_mem_scatter
*fn
= NULL
;
5076 if (a
->esz
< a
->msz
) {
5079 if (!sve_access_check(s
)) {
5085 fn
= scatter_store_fn32
[0][a
->msz
];
5088 fn
= scatter_store_fn64
[2][a
->msz
];
5093 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5094 * by loading the immediate into the scalar parameter.
5096 imm
= tcg_const_i64(a
->imm
<< a
->msz
);
5097 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rn
, 0, imm
, fn
);
5098 tcg_temp_free_i64(imm
);
5106 static bool trans_PRF(DisasContext
*s
, arg_PRF
*a
, uint32_t insn
)
5108 /* Prefetch is a nop within QEMU. */
5109 sve_access_check(s
);
5113 static bool trans_PRF_rr(DisasContext
*s
, arg_PRF_rr
*a
, uint32_t insn
)
5118 /* Prefetch is a nop within QEMU. */
5119 sve_access_check(s
);
5126 * TODO: The implementation so far could handle predicated merging movprfx.
5127 * The helper functions as written take an extra source register to
5128 * use in the operation, but the result is only written when predication
5129 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
5130 * to allow the final write back to the destination to be unconditional.
5131 * For predicated zeroing movprfx, we need to rearrange the helpers to
5132 * allow the final write back to zero inactives.
5134 * In the meantime, just emit the moves.
5137 static bool trans_MOVPRFX(DisasContext
*s
, arg_MOVPRFX
*a
, uint32_t insn
)
5139 return do_mov_z(s
, a
->rd
, a
->rn
);
5142 static bool trans_MOVPRFX_m(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
5144 if (sve_access_check(s
)) {
5145 do_sel_z(s
, a
->rd
, a
->rn
, a
->rd
, a
->pg
, a
->esz
);
5150 static bool trans_MOVPRFX_z(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
5152 if (sve_access_check(s
)) {
5153 do_movz_zpz(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
);