2 * AArch64 SVE translation
4 * Copyright (c) 2018 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
22 #include "exec/exec-all.h"
24 #include "tcg-op-gvec.h"
25 #include "tcg-gvec-desc.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
35 #include "fpu/softfloat.h"
38 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
39 TCGv_i64
, uint32_t, uint32_t);
41 typedef void gen_helper_gvec_flags_3(TCGv_i32
, TCGv_ptr
, TCGv_ptr
,
43 typedef void gen_helper_gvec_flags_4(TCGv_i32
, TCGv_ptr
, TCGv_ptr
,
44 TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
46 typedef void gen_helper_gvec_mem(TCGv_env
, TCGv_ptr
, TCGv_i64
, TCGv_i32
);
47 typedef void gen_helper_gvec_mem_scatter(TCGv_env
, TCGv_ptr
, TCGv_ptr
,
48 TCGv_ptr
, TCGv_i64
, TCGv_i32
);
51 * Helpers for extracting complex instruction fields.
54 /* See e.g. ASR (immediate, predicated).
55 * Returns -1 for unallocated encoding; diagnose later.
57 static int tszimm_esz(int x
)
59 x
>>= 3; /* discard imm3 */
63 static int tszimm_shr(int x
)
65 return (16 << tszimm_esz(x
)) - x
;
68 /* See e.g. LSL (immediate, predicated). */
69 static int tszimm_shl(int x
)
71 return x
- (8 << tszimm_esz(x
));
74 static inline int plus1(int x
)
79 /* The SH bit is in bit 8. Extract the low 8 and shift. */
80 static inline int expand_imm_sh8s(int x
)
82 return (int8_t)x
<< (x
& 0x100 ? 8 : 0);
85 static inline int expand_imm_sh8u(int x
)
87 return (uint8_t)x
<< (x
& 0x100 ? 8 : 0);
90 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
91 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
93 static inline int msz_dtype(int msz
)
95 static const uint8_t dtype
[4] = { 0, 5, 10, 15 };
100 * Include the generated decoder.
103 #include "decode-sve.inc.c"
106 * Implement all of the translator functions referenced by the decoder.
109 /* Return the offset info CPUARMState of the predicate vector register Pn.
110 * Note for this purpose, FFR is P16.
112 static inline int pred_full_reg_offset(DisasContext
*s
, int regno
)
114 return offsetof(CPUARMState
, vfp
.pregs
[regno
]);
117 /* Return the byte size of the whole predicate register, VL / 64. */
118 static inline int pred_full_reg_size(DisasContext
*s
)
120 return s
->sve_len
>> 3;
123 /* Round up the size of a register to a size allowed by
124 * the tcg vector infrastructure. Any operation which uses this
125 * size may assume that the bits above pred_full_reg_size are zero,
126 * and must leave them the same way.
128 * Note that this is not needed for the vector registers as they
129 * are always properly sized for tcg vectors.
131 static int size_for_gvec(int size
)
136 return QEMU_ALIGN_UP(size
, 16);
140 static int pred_gvec_reg_size(DisasContext
*s
)
142 return size_for_gvec(pred_full_reg_size(s
));
145 /* Invoke a vector expander on two Zregs. */
146 static bool do_vector2_z(DisasContext
*s
, GVecGen2Fn
*gvec_fn
,
147 int esz
, int rd
, int rn
)
149 if (sve_access_check(s
)) {
150 unsigned vsz
= vec_full_reg_size(s
);
151 gvec_fn(esz
, vec_full_reg_offset(s
, rd
),
152 vec_full_reg_offset(s
, rn
), vsz
, vsz
);
157 /* Invoke a vector expander on three Zregs. */
158 static bool do_vector3_z(DisasContext
*s
, GVecGen3Fn
*gvec_fn
,
159 int esz
, int rd
, int rn
, int rm
)
161 if (sve_access_check(s
)) {
162 unsigned vsz
= vec_full_reg_size(s
);
163 gvec_fn(esz
, vec_full_reg_offset(s
, rd
),
164 vec_full_reg_offset(s
, rn
),
165 vec_full_reg_offset(s
, rm
), vsz
, vsz
);
170 /* Invoke a vector move on two Zregs. */
171 static bool do_mov_z(DisasContext
*s
, int rd
, int rn
)
173 return do_vector2_z(s
, tcg_gen_gvec_mov
, 0, rd
, rn
);
176 /* Initialize a Zreg with replications of a 64-bit immediate. */
177 static void do_dupi_z(DisasContext
*s
, int rd
, uint64_t word
)
179 unsigned vsz
= vec_full_reg_size(s
);
180 tcg_gen_gvec_dup64i(vec_full_reg_offset(s
, rd
), vsz
, vsz
, word
);
183 /* Invoke a vector expander on two Pregs. */
184 static bool do_vector2_p(DisasContext
*s
, GVecGen2Fn
*gvec_fn
,
185 int esz
, int rd
, int rn
)
187 if (sve_access_check(s
)) {
188 unsigned psz
= pred_gvec_reg_size(s
);
189 gvec_fn(esz
, pred_full_reg_offset(s
, rd
),
190 pred_full_reg_offset(s
, rn
), psz
, psz
);
195 /* Invoke a vector expander on three Pregs. */
196 static bool do_vector3_p(DisasContext
*s
, GVecGen3Fn
*gvec_fn
,
197 int esz
, int rd
, int rn
, int rm
)
199 if (sve_access_check(s
)) {
200 unsigned psz
= pred_gvec_reg_size(s
);
201 gvec_fn(esz
, pred_full_reg_offset(s
, rd
),
202 pred_full_reg_offset(s
, rn
),
203 pred_full_reg_offset(s
, rm
), psz
, psz
);
208 /* Invoke a vector operation on four Pregs. */
209 static bool do_vecop4_p(DisasContext
*s
, const GVecGen4
*gvec_op
,
210 int rd
, int rn
, int rm
, int rg
)
212 if (sve_access_check(s
)) {
213 unsigned psz
= pred_gvec_reg_size(s
);
214 tcg_gen_gvec_4(pred_full_reg_offset(s
, rd
),
215 pred_full_reg_offset(s
, rn
),
216 pred_full_reg_offset(s
, rm
),
217 pred_full_reg_offset(s
, rg
),
223 /* Invoke a vector move on two Pregs. */
224 static bool do_mov_p(DisasContext
*s
, int rd
, int rn
)
226 return do_vector2_p(s
, tcg_gen_gvec_mov
, 0, rd
, rn
);
229 /* Set the cpu flags as per a return from an SVE helper. */
230 static void do_pred_flags(TCGv_i32 t
)
232 tcg_gen_mov_i32(cpu_NF
, t
);
233 tcg_gen_andi_i32(cpu_ZF
, t
, 2);
234 tcg_gen_andi_i32(cpu_CF
, t
, 1);
235 tcg_gen_movi_i32(cpu_VF
, 0);
238 /* Subroutines computing the ARM PredTest psuedofunction. */
239 static void do_predtest1(TCGv_i64 d
, TCGv_i64 g
)
241 TCGv_i32 t
= tcg_temp_new_i32();
243 gen_helper_sve_predtest1(t
, d
, g
);
245 tcg_temp_free_i32(t
);
248 static void do_predtest(DisasContext
*s
, int dofs
, int gofs
, int words
)
250 TCGv_ptr dptr
= tcg_temp_new_ptr();
251 TCGv_ptr gptr
= tcg_temp_new_ptr();
254 tcg_gen_addi_ptr(dptr
, cpu_env
, dofs
);
255 tcg_gen_addi_ptr(gptr
, cpu_env
, gofs
);
256 t
= tcg_const_i32(words
);
258 gen_helper_sve_predtest(t
, dptr
, gptr
, t
);
259 tcg_temp_free_ptr(dptr
);
260 tcg_temp_free_ptr(gptr
);
263 tcg_temp_free_i32(t
);
266 /* For each element size, the bits within a predicate word that are active. */
267 const uint64_t pred_esz_masks
[4] = {
268 0xffffffffffffffffull
, 0x5555555555555555ull
,
269 0x1111111111111111ull
, 0x0101010101010101ull
273 *** SVE Logical - Unpredicated Group
276 static bool trans_AND_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
278 return do_vector3_z(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->rm
);
281 static bool trans_ORR_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
283 if (a
->rn
== a
->rm
) { /* MOV */
284 return do_mov_z(s
, a
->rd
, a
->rn
);
286 return do_vector3_z(s
, tcg_gen_gvec_or
, 0, a
->rd
, a
->rn
, a
->rm
);
290 static bool trans_EOR_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
292 return do_vector3_z(s
, tcg_gen_gvec_xor
, 0, a
->rd
, a
->rn
, a
->rm
);
295 static bool trans_BIC_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
297 return do_vector3_z(s
, tcg_gen_gvec_andc
, 0, a
->rd
, a
->rn
, a
->rm
);
301 *** SVE Integer Arithmetic - Unpredicated Group
304 static bool trans_ADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
306 return do_vector3_z(s
, tcg_gen_gvec_add
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
309 static bool trans_SUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
311 return do_vector3_z(s
, tcg_gen_gvec_sub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
314 static bool trans_SQADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
316 return do_vector3_z(s
, tcg_gen_gvec_ssadd
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
319 static bool trans_SQSUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
321 return do_vector3_z(s
, tcg_gen_gvec_sssub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
324 static bool trans_UQADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
326 return do_vector3_z(s
, tcg_gen_gvec_usadd
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
329 static bool trans_UQSUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
)
331 return do_vector3_z(s
, tcg_gen_gvec_ussub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
335 *** SVE Integer Arithmetic - Binary Predicated Group
338 static bool do_zpzz_ool(DisasContext
*s
, arg_rprr_esz
*a
, gen_helper_gvec_4
*fn
)
340 unsigned vsz
= vec_full_reg_size(s
);
344 if (sve_access_check(s
)) {
345 tcg_gen_gvec_4_ool(vec_full_reg_offset(s
, a
->rd
),
346 vec_full_reg_offset(s
, a
->rn
),
347 vec_full_reg_offset(s
, a
->rm
),
348 pred_full_reg_offset(s
, a
->pg
),
354 /* Select active elememnts from Zn and inactive elements from Zm,
355 * storing the result in Zd.
357 static void do_sel_z(DisasContext
*s
, int rd
, int rn
, int rm
, int pg
, int esz
)
359 static gen_helper_gvec_4
* const fns
[4] = {
360 gen_helper_sve_sel_zpzz_b
, gen_helper_sve_sel_zpzz_h
,
361 gen_helper_sve_sel_zpzz_s
, gen_helper_sve_sel_zpzz_d
363 unsigned vsz
= vec_full_reg_size(s
);
364 tcg_gen_gvec_4_ool(vec_full_reg_offset(s
, rd
),
365 vec_full_reg_offset(s
, rn
),
366 vec_full_reg_offset(s
, rm
),
367 pred_full_reg_offset(s
, pg
),
368 vsz
, vsz
, 0, fns
[esz
]);
371 #define DO_ZPZZ(NAME, name) \
372 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a) \
374 static gen_helper_gvec_4 * const fns[4] = { \
375 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
376 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
378 return do_zpzz_ool(s, a, fns[a->esz]); \
397 DO_ZPZZ(SMULH
, smulh
)
398 DO_ZPZZ(UMULH
, umulh
)
404 static bool trans_SDIV_zpzz(DisasContext
*s
, arg_rprr_esz
*a
)
406 static gen_helper_gvec_4
* const fns
[4] = {
407 NULL
, NULL
, gen_helper_sve_sdiv_zpzz_s
, gen_helper_sve_sdiv_zpzz_d
409 return do_zpzz_ool(s
, a
, fns
[a
->esz
]);
412 static bool trans_UDIV_zpzz(DisasContext
*s
, arg_rprr_esz
*a
)
414 static gen_helper_gvec_4
* const fns
[4] = {
415 NULL
, NULL
, gen_helper_sve_udiv_zpzz_s
, gen_helper_sve_udiv_zpzz_d
417 return do_zpzz_ool(s
, a
, fns
[a
->esz
]);
420 static bool trans_SEL_zpzz(DisasContext
*s
, arg_rprr_esz
*a
)
422 if (sve_access_check(s
)) {
423 do_sel_z(s
, a
->rd
, a
->rn
, a
->rm
, a
->pg
, a
->esz
);
431 *** SVE Integer Arithmetic - Unary Predicated Group
434 static bool do_zpz_ool(DisasContext
*s
, arg_rpr_esz
*a
, gen_helper_gvec_3
*fn
)
439 if (sve_access_check(s
)) {
440 unsigned vsz
= vec_full_reg_size(s
);
441 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
442 vec_full_reg_offset(s
, a
->rn
),
443 pred_full_reg_offset(s
, a
->pg
),
449 #define DO_ZPZ(NAME, name) \
450 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
452 static gen_helper_gvec_3 * const fns[4] = { \
453 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
454 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
456 return do_zpz_ool(s, a, fns[a->esz]); \
461 DO_ZPZ(CNT_zpz
, cnt_zpz
)
463 DO_ZPZ(NOT_zpz
, not_zpz
)
467 static bool trans_FABS(DisasContext
*s
, arg_rpr_esz
*a
)
469 static gen_helper_gvec_3
* const fns
[4] = {
471 gen_helper_sve_fabs_h
,
472 gen_helper_sve_fabs_s
,
473 gen_helper_sve_fabs_d
475 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
478 static bool trans_FNEG(DisasContext
*s
, arg_rpr_esz
*a
)
480 static gen_helper_gvec_3
* const fns
[4] = {
482 gen_helper_sve_fneg_h
,
483 gen_helper_sve_fneg_s
,
484 gen_helper_sve_fneg_d
486 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
489 static bool trans_SXTB(DisasContext
*s
, arg_rpr_esz
*a
)
491 static gen_helper_gvec_3
* const fns
[4] = {
493 gen_helper_sve_sxtb_h
,
494 gen_helper_sve_sxtb_s
,
495 gen_helper_sve_sxtb_d
497 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
500 static bool trans_UXTB(DisasContext
*s
, arg_rpr_esz
*a
)
502 static gen_helper_gvec_3
* const fns
[4] = {
504 gen_helper_sve_uxtb_h
,
505 gen_helper_sve_uxtb_s
,
506 gen_helper_sve_uxtb_d
508 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
511 static bool trans_SXTH(DisasContext
*s
, arg_rpr_esz
*a
)
513 static gen_helper_gvec_3
* const fns
[4] = {
515 gen_helper_sve_sxth_s
,
516 gen_helper_sve_sxth_d
518 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
521 static bool trans_UXTH(DisasContext
*s
, arg_rpr_esz
*a
)
523 static gen_helper_gvec_3
* const fns
[4] = {
525 gen_helper_sve_uxth_s
,
526 gen_helper_sve_uxth_d
528 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
531 static bool trans_SXTW(DisasContext
*s
, arg_rpr_esz
*a
)
533 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_sxtw_d
: NULL
);
536 static bool trans_UXTW(DisasContext
*s
, arg_rpr_esz
*a
)
538 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_uxtw_d
: NULL
);
544 *** SVE Integer Reduction Group
547 typedef void gen_helper_gvec_reduc(TCGv_i64
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
548 static bool do_vpz_ool(DisasContext
*s
, arg_rpr_esz
*a
,
549 gen_helper_gvec_reduc
*fn
)
551 unsigned vsz
= vec_full_reg_size(s
);
559 if (!sve_access_check(s
)) {
563 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
564 temp
= tcg_temp_new_i64();
565 t_zn
= tcg_temp_new_ptr();
566 t_pg
= tcg_temp_new_ptr();
568 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
569 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
570 fn(temp
, t_zn
, t_pg
, desc
);
571 tcg_temp_free_ptr(t_zn
);
572 tcg_temp_free_ptr(t_pg
);
573 tcg_temp_free_i32(desc
);
575 write_fp_dreg(s
, a
->rd
, temp
);
576 tcg_temp_free_i64(temp
);
580 #define DO_VPZ(NAME, name) \
581 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
583 static gen_helper_gvec_reduc * const fns[4] = { \
584 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
585 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
587 return do_vpz_ool(s, a, fns[a->esz]); \
600 static bool trans_SADDV(DisasContext
*s
, arg_rpr_esz
*a
)
602 static gen_helper_gvec_reduc
* const fns
[4] = {
603 gen_helper_sve_saddv_b
, gen_helper_sve_saddv_h
,
604 gen_helper_sve_saddv_s
, NULL
606 return do_vpz_ool(s
, a
, fns
[a
->esz
]);
612 *** SVE Shift by Immediate - Predicated Group
615 /* Store zero into every active element of Zd. We will use this for two
616 * and three-operand predicated instructions for which logic dictates a
619 static bool do_clr_zp(DisasContext
*s
, int rd
, int pg
, int esz
)
621 static gen_helper_gvec_2
* const fns
[4] = {
622 gen_helper_sve_clr_b
, gen_helper_sve_clr_h
,
623 gen_helper_sve_clr_s
, gen_helper_sve_clr_d
,
625 if (sve_access_check(s
)) {
626 unsigned vsz
= vec_full_reg_size(s
);
627 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, rd
),
628 pred_full_reg_offset(s
, pg
),
629 vsz
, vsz
, 0, fns
[esz
]);
634 /* Copy Zn into Zd, storing zeros into inactive elements. */
635 static void do_movz_zpz(DisasContext
*s
, int rd
, int rn
, int pg
, int esz
)
637 static gen_helper_gvec_3
* const fns
[4] = {
638 gen_helper_sve_movz_b
, gen_helper_sve_movz_h
,
639 gen_helper_sve_movz_s
, gen_helper_sve_movz_d
,
641 unsigned vsz
= vec_full_reg_size(s
);
642 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, rd
),
643 vec_full_reg_offset(s
, rn
),
644 pred_full_reg_offset(s
, pg
),
645 vsz
, vsz
, 0, fns
[esz
]);
648 static bool do_zpzi_ool(DisasContext
*s
, arg_rpri_esz
*a
,
649 gen_helper_gvec_3
*fn
)
651 if (sve_access_check(s
)) {
652 unsigned vsz
= vec_full_reg_size(s
);
653 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
654 vec_full_reg_offset(s
, a
->rn
),
655 pred_full_reg_offset(s
, a
->pg
),
656 vsz
, vsz
, a
->imm
, fn
);
661 static bool trans_ASR_zpzi(DisasContext
*s
, arg_rpri_esz
*a
)
663 static gen_helper_gvec_3
* const fns
[4] = {
664 gen_helper_sve_asr_zpzi_b
, gen_helper_sve_asr_zpzi_h
,
665 gen_helper_sve_asr_zpzi_s
, gen_helper_sve_asr_zpzi_d
,
668 /* Invalid tsz encoding -- see tszimm_esz. */
671 /* Shift by element size is architecturally valid. For
672 arithmetic right-shift, it's the same as by one less. */
673 a
->imm
= MIN(a
->imm
, (8 << a
->esz
) - 1);
674 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
677 static bool trans_LSR_zpzi(DisasContext
*s
, arg_rpri_esz
*a
)
679 static gen_helper_gvec_3
* const fns
[4] = {
680 gen_helper_sve_lsr_zpzi_b
, gen_helper_sve_lsr_zpzi_h
,
681 gen_helper_sve_lsr_zpzi_s
, gen_helper_sve_lsr_zpzi_d
,
686 /* Shift by element size is architecturally valid.
687 For logical shifts, it is a zeroing operation. */
688 if (a
->imm
>= (8 << a
->esz
)) {
689 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
691 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
695 static bool trans_LSL_zpzi(DisasContext
*s
, arg_rpri_esz
*a
)
697 static gen_helper_gvec_3
* const fns
[4] = {
698 gen_helper_sve_lsl_zpzi_b
, gen_helper_sve_lsl_zpzi_h
,
699 gen_helper_sve_lsl_zpzi_s
, gen_helper_sve_lsl_zpzi_d
,
704 /* Shift by element size is architecturally valid.
705 For logical shifts, it is a zeroing operation. */
706 if (a
->imm
>= (8 << a
->esz
)) {
707 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
709 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
713 static bool trans_ASRD(DisasContext
*s
, arg_rpri_esz
*a
)
715 static gen_helper_gvec_3
* const fns
[4] = {
716 gen_helper_sve_asrd_b
, gen_helper_sve_asrd_h
,
717 gen_helper_sve_asrd_s
, gen_helper_sve_asrd_d
,
722 /* Shift by element size is architecturally valid. For arithmetic
723 right shift for division, it is a zeroing operation. */
724 if (a
->imm
>= (8 << a
->esz
)) {
725 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
727 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
732 *** SVE Bitwise Shift - Predicated Group
735 #define DO_ZPZW(NAME, name) \
736 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a) \
738 static gen_helper_gvec_4 * const fns[3] = { \
739 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
740 gen_helper_sve_##name##_zpzw_s, \
742 if (a->esz < 0 || a->esz >= 3) { \
745 return do_zpzz_ool(s, a, fns[a->esz]); \
755 *** SVE Bitwise Shift - Unpredicated Group
758 static bool do_shift_imm(DisasContext
*s
, arg_rri_esz
*a
, bool asr
,
759 void (*gvec_fn
)(unsigned, uint32_t, uint32_t,
760 int64_t, uint32_t, uint32_t))
763 /* Invalid tsz encoding -- see tszimm_esz. */
766 if (sve_access_check(s
)) {
767 unsigned vsz
= vec_full_reg_size(s
);
768 /* Shift by element size is architecturally valid. For
769 arithmetic right-shift, it's the same as by one less.
770 Otherwise it is a zeroing operation. */
771 if (a
->imm
>= 8 << a
->esz
) {
773 a
->imm
= (8 << a
->esz
) - 1;
775 do_dupi_z(s
, a
->rd
, 0);
779 gvec_fn(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
780 vec_full_reg_offset(s
, a
->rn
), a
->imm
, vsz
, vsz
);
785 static bool trans_ASR_zzi(DisasContext
*s
, arg_rri_esz
*a
)
787 return do_shift_imm(s
, a
, true, tcg_gen_gvec_sari
);
790 static bool trans_LSR_zzi(DisasContext
*s
, arg_rri_esz
*a
)
792 return do_shift_imm(s
, a
, false, tcg_gen_gvec_shri
);
795 static bool trans_LSL_zzi(DisasContext
*s
, arg_rri_esz
*a
)
797 return do_shift_imm(s
, a
, false, tcg_gen_gvec_shli
);
800 static bool do_zzw_ool(DisasContext
*s
, arg_rrr_esz
*a
, gen_helper_gvec_3
*fn
)
805 if (sve_access_check(s
)) {
806 unsigned vsz
= vec_full_reg_size(s
);
807 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
808 vec_full_reg_offset(s
, a
->rn
),
809 vec_full_reg_offset(s
, a
->rm
),
815 #define DO_ZZW(NAME, name) \
816 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a) \
818 static gen_helper_gvec_3 * const fns[4] = { \
819 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
820 gen_helper_sve_##name##_zzw_s, NULL \
822 return do_zzw_ool(s, a, fns[a->esz]); \
832 *** SVE Integer Multiply-Add Group
835 static bool do_zpzzz_ool(DisasContext
*s
, arg_rprrr_esz
*a
,
836 gen_helper_gvec_5
*fn
)
838 if (sve_access_check(s
)) {
839 unsigned vsz
= vec_full_reg_size(s
);
840 tcg_gen_gvec_5_ool(vec_full_reg_offset(s
, a
->rd
),
841 vec_full_reg_offset(s
, a
->ra
),
842 vec_full_reg_offset(s
, a
->rn
),
843 vec_full_reg_offset(s
, a
->rm
),
844 pred_full_reg_offset(s
, a
->pg
),
850 #define DO_ZPZZZ(NAME, name) \
851 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
853 static gen_helper_gvec_5 * const fns[4] = { \
854 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
855 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
857 return do_zpzzz_ool(s, a, fns[a->esz]); \
866 *** SVE Index Generation Group
869 static void do_index(DisasContext
*s
, int esz
, int rd
,
870 TCGv_i64 start
, TCGv_i64 incr
)
872 unsigned vsz
= vec_full_reg_size(s
);
873 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
874 TCGv_ptr t_zd
= tcg_temp_new_ptr();
876 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, rd
));
878 gen_helper_sve_index_d(t_zd
, start
, incr
, desc
);
880 typedef void index_fn(TCGv_ptr
, TCGv_i32
, TCGv_i32
, TCGv_i32
);
881 static index_fn
* const fns
[3] = {
882 gen_helper_sve_index_b
,
883 gen_helper_sve_index_h
,
884 gen_helper_sve_index_s
,
886 TCGv_i32 s32
= tcg_temp_new_i32();
887 TCGv_i32 i32
= tcg_temp_new_i32();
889 tcg_gen_extrl_i64_i32(s32
, start
);
890 tcg_gen_extrl_i64_i32(i32
, incr
);
891 fns
[esz
](t_zd
, s32
, i32
, desc
);
893 tcg_temp_free_i32(s32
);
894 tcg_temp_free_i32(i32
);
896 tcg_temp_free_ptr(t_zd
);
897 tcg_temp_free_i32(desc
);
900 static bool trans_INDEX_ii(DisasContext
*s
, arg_INDEX_ii
*a
)
902 if (sve_access_check(s
)) {
903 TCGv_i64 start
= tcg_const_i64(a
->imm1
);
904 TCGv_i64 incr
= tcg_const_i64(a
->imm2
);
905 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
906 tcg_temp_free_i64(start
);
907 tcg_temp_free_i64(incr
);
912 static bool trans_INDEX_ir(DisasContext
*s
, arg_INDEX_ir
*a
)
914 if (sve_access_check(s
)) {
915 TCGv_i64 start
= tcg_const_i64(a
->imm
);
916 TCGv_i64 incr
= cpu_reg(s
, a
->rm
);
917 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
918 tcg_temp_free_i64(start
);
923 static bool trans_INDEX_ri(DisasContext
*s
, arg_INDEX_ri
*a
)
925 if (sve_access_check(s
)) {
926 TCGv_i64 start
= cpu_reg(s
, a
->rn
);
927 TCGv_i64 incr
= tcg_const_i64(a
->imm
);
928 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
929 tcg_temp_free_i64(incr
);
934 static bool trans_INDEX_rr(DisasContext
*s
, arg_INDEX_rr
*a
)
936 if (sve_access_check(s
)) {
937 TCGv_i64 start
= cpu_reg(s
, a
->rn
);
938 TCGv_i64 incr
= cpu_reg(s
, a
->rm
);
939 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
945 *** SVE Stack Allocation Group
948 static bool trans_ADDVL(DisasContext
*s
, arg_ADDVL
*a
)
950 TCGv_i64 rd
= cpu_reg_sp(s
, a
->rd
);
951 TCGv_i64 rn
= cpu_reg_sp(s
, a
->rn
);
952 tcg_gen_addi_i64(rd
, rn
, a
->imm
* vec_full_reg_size(s
));
956 static bool trans_ADDPL(DisasContext
*s
, arg_ADDPL
*a
)
958 TCGv_i64 rd
= cpu_reg_sp(s
, a
->rd
);
959 TCGv_i64 rn
= cpu_reg_sp(s
, a
->rn
);
960 tcg_gen_addi_i64(rd
, rn
, a
->imm
* pred_full_reg_size(s
));
964 static bool trans_RDVL(DisasContext
*s
, arg_RDVL
*a
)
966 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
967 tcg_gen_movi_i64(reg
, a
->imm
* vec_full_reg_size(s
));
972 *** SVE Compute Vector Address Group
975 static bool do_adr(DisasContext
*s
, arg_rrri
*a
, gen_helper_gvec_3
*fn
)
977 if (sve_access_check(s
)) {
978 unsigned vsz
= vec_full_reg_size(s
);
979 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
980 vec_full_reg_offset(s
, a
->rn
),
981 vec_full_reg_offset(s
, a
->rm
),
982 vsz
, vsz
, a
->imm
, fn
);
987 static bool trans_ADR_p32(DisasContext
*s
, arg_rrri
*a
)
989 return do_adr(s
, a
, gen_helper_sve_adr_p32
);
992 static bool trans_ADR_p64(DisasContext
*s
, arg_rrri
*a
)
994 return do_adr(s
, a
, gen_helper_sve_adr_p64
);
997 static bool trans_ADR_s32(DisasContext
*s
, arg_rrri
*a
)
999 return do_adr(s
, a
, gen_helper_sve_adr_s32
);
1002 static bool trans_ADR_u32(DisasContext
*s
, arg_rrri
*a
)
1004 return do_adr(s
, a
, gen_helper_sve_adr_u32
);
1008 *** SVE Integer Misc - Unpredicated Group
1011 static bool trans_FEXPA(DisasContext
*s
, arg_rr_esz
*a
)
1013 static gen_helper_gvec_2
* const fns
[4] = {
1015 gen_helper_sve_fexpa_h
,
1016 gen_helper_sve_fexpa_s
,
1017 gen_helper_sve_fexpa_d
,
1022 if (sve_access_check(s
)) {
1023 unsigned vsz
= vec_full_reg_size(s
);
1024 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, a
->rd
),
1025 vec_full_reg_offset(s
, a
->rn
),
1026 vsz
, vsz
, 0, fns
[a
->esz
]);
1031 static bool trans_FTSSEL(DisasContext
*s
, arg_rrr_esz
*a
)
1033 static gen_helper_gvec_3
* const fns
[4] = {
1035 gen_helper_sve_ftssel_h
,
1036 gen_helper_sve_ftssel_s
,
1037 gen_helper_sve_ftssel_d
,
1042 if (sve_access_check(s
)) {
1043 unsigned vsz
= vec_full_reg_size(s
);
1044 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
1045 vec_full_reg_offset(s
, a
->rn
),
1046 vec_full_reg_offset(s
, a
->rm
),
1047 vsz
, vsz
, 0, fns
[a
->esz
]);
1053 *** SVE Predicate Logical Operations Group
1056 static bool do_pppp_flags(DisasContext
*s
, arg_rprr_s
*a
,
1057 const GVecGen4
*gvec_op
)
1059 if (!sve_access_check(s
)) {
1063 unsigned psz
= pred_gvec_reg_size(s
);
1064 int dofs
= pred_full_reg_offset(s
, a
->rd
);
1065 int nofs
= pred_full_reg_offset(s
, a
->rn
);
1066 int mofs
= pred_full_reg_offset(s
, a
->rm
);
1067 int gofs
= pred_full_reg_offset(s
, a
->pg
);
1070 /* Do the operation and the flags generation in temps. */
1071 TCGv_i64 pd
= tcg_temp_new_i64();
1072 TCGv_i64 pn
= tcg_temp_new_i64();
1073 TCGv_i64 pm
= tcg_temp_new_i64();
1074 TCGv_i64 pg
= tcg_temp_new_i64();
1076 tcg_gen_ld_i64(pn
, cpu_env
, nofs
);
1077 tcg_gen_ld_i64(pm
, cpu_env
, mofs
);
1078 tcg_gen_ld_i64(pg
, cpu_env
, gofs
);
1080 gvec_op
->fni8(pd
, pn
, pm
, pg
);
1081 tcg_gen_st_i64(pd
, cpu_env
, dofs
);
1083 do_predtest1(pd
, pg
);
1085 tcg_temp_free_i64(pd
);
1086 tcg_temp_free_i64(pn
);
1087 tcg_temp_free_i64(pm
);
1088 tcg_temp_free_i64(pg
);
1090 /* The operation and flags generation is large. The computation
1091 * of the flags depends on the original contents of the guarding
1092 * predicate. If the destination overwrites the guarding predicate,
1093 * then the easiest way to get this right is to save a copy.
1096 if (a
->rd
== a
->pg
) {
1097 tofs
= offsetof(CPUARMState
, vfp
.preg_tmp
);
1098 tcg_gen_gvec_mov(0, tofs
, gofs
, psz
, psz
);
1101 tcg_gen_gvec_4(dofs
, nofs
, mofs
, gofs
, psz
, psz
, gvec_op
);
1102 do_predtest(s
, dofs
, tofs
, psz
/ 8);
1107 static void gen_and_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1109 tcg_gen_and_i64(pd
, pn
, pm
);
1110 tcg_gen_and_i64(pd
, pd
, pg
);
1113 static void gen_and_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1114 TCGv_vec pm
, TCGv_vec pg
)
1116 tcg_gen_and_vec(vece
, pd
, pn
, pm
);
1117 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1120 static bool trans_AND_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1122 static const GVecGen4 op
= {
1123 .fni8
= gen_and_pg_i64
,
1124 .fniv
= gen_and_pg_vec
,
1125 .fno
= gen_helper_sve_and_pppp
,
1126 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1129 return do_pppp_flags(s
, a
, &op
);
1130 } else if (a
->rn
== a
->rm
) {
1131 if (a
->pg
== a
->rn
) {
1132 return do_mov_p(s
, a
->rd
, a
->rn
);
1134 return do_vector3_p(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->pg
);
1136 } else if (a
->pg
== a
->rn
|| a
->pg
== a
->rm
) {
1137 return do_vector3_p(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->rm
);
1139 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1143 static void gen_bic_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1145 tcg_gen_andc_i64(pd
, pn
, pm
);
1146 tcg_gen_and_i64(pd
, pd
, pg
);
1149 static void gen_bic_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1150 TCGv_vec pm
, TCGv_vec pg
)
1152 tcg_gen_andc_vec(vece
, pd
, pn
, pm
);
1153 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1156 static bool trans_BIC_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1158 static const GVecGen4 op
= {
1159 .fni8
= gen_bic_pg_i64
,
1160 .fniv
= gen_bic_pg_vec
,
1161 .fno
= gen_helper_sve_bic_pppp
,
1162 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1165 return do_pppp_flags(s
, a
, &op
);
1166 } else if (a
->pg
== a
->rn
) {
1167 return do_vector3_p(s
, tcg_gen_gvec_andc
, 0, a
->rd
, a
->rn
, a
->rm
);
1169 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1173 static void gen_eor_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1175 tcg_gen_xor_i64(pd
, pn
, pm
);
1176 tcg_gen_and_i64(pd
, pd
, pg
);
1179 static void gen_eor_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1180 TCGv_vec pm
, TCGv_vec pg
)
1182 tcg_gen_xor_vec(vece
, pd
, pn
, pm
);
1183 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1186 static bool trans_EOR_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1188 static const GVecGen4 op
= {
1189 .fni8
= gen_eor_pg_i64
,
1190 .fniv
= gen_eor_pg_vec
,
1191 .fno
= gen_helper_sve_eor_pppp
,
1192 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1195 return do_pppp_flags(s
, a
, &op
);
1197 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1201 static void gen_sel_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1203 tcg_gen_and_i64(pn
, pn
, pg
);
1204 tcg_gen_andc_i64(pm
, pm
, pg
);
1205 tcg_gen_or_i64(pd
, pn
, pm
);
1208 static void gen_sel_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1209 TCGv_vec pm
, TCGv_vec pg
)
1211 tcg_gen_and_vec(vece
, pn
, pn
, pg
);
1212 tcg_gen_andc_vec(vece
, pm
, pm
, pg
);
1213 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1216 static bool trans_SEL_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1218 static const GVecGen4 op
= {
1219 .fni8
= gen_sel_pg_i64
,
1220 .fniv
= gen_sel_pg_vec
,
1221 .fno
= gen_helper_sve_sel_pppp
,
1222 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1227 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1231 static void gen_orr_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1233 tcg_gen_or_i64(pd
, pn
, pm
);
1234 tcg_gen_and_i64(pd
, pd
, pg
);
1237 static void gen_orr_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1238 TCGv_vec pm
, TCGv_vec pg
)
1240 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1241 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1244 static bool trans_ORR_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1246 static const GVecGen4 op
= {
1247 .fni8
= gen_orr_pg_i64
,
1248 .fniv
= gen_orr_pg_vec
,
1249 .fno
= gen_helper_sve_orr_pppp
,
1250 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1253 return do_pppp_flags(s
, a
, &op
);
1254 } else if (a
->pg
== a
->rn
&& a
->rn
== a
->rm
) {
1255 return do_mov_p(s
, a
->rd
, a
->rn
);
1257 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1261 static void gen_orn_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1263 tcg_gen_orc_i64(pd
, pn
, pm
);
1264 tcg_gen_and_i64(pd
, pd
, pg
);
1267 static void gen_orn_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1268 TCGv_vec pm
, TCGv_vec pg
)
1270 tcg_gen_orc_vec(vece
, pd
, pn
, pm
);
1271 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1274 static bool trans_ORN_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1276 static const GVecGen4 op
= {
1277 .fni8
= gen_orn_pg_i64
,
1278 .fniv
= gen_orn_pg_vec
,
1279 .fno
= gen_helper_sve_orn_pppp
,
1280 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1283 return do_pppp_flags(s
, a
, &op
);
1285 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1289 static void gen_nor_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1291 tcg_gen_or_i64(pd
, pn
, pm
);
1292 tcg_gen_andc_i64(pd
, pg
, pd
);
1295 static void gen_nor_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1296 TCGv_vec pm
, TCGv_vec pg
)
1298 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1299 tcg_gen_andc_vec(vece
, pd
, pg
, pd
);
1302 static bool trans_NOR_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1304 static const GVecGen4 op
= {
1305 .fni8
= gen_nor_pg_i64
,
1306 .fniv
= gen_nor_pg_vec
,
1307 .fno
= gen_helper_sve_nor_pppp
,
1308 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1311 return do_pppp_flags(s
, a
, &op
);
1313 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1317 static void gen_nand_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1319 tcg_gen_and_i64(pd
, pn
, pm
);
1320 tcg_gen_andc_i64(pd
, pg
, pd
);
1323 static void gen_nand_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1324 TCGv_vec pm
, TCGv_vec pg
)
1326 tcg_gen_and_vec(vece
, pd
, pn
, pm
);
1327 tcg_gen_andc_vec(vece
, pd
, pg
, pd
);
1330 static bool trans_NAND_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1332 static const GVecGen4 op
= {
1333 .fni8
= gen_nand_pg_i64
,
1334 .fniv
= gen_nand_pg_vec
,
1335 .fno
= gen_helper_sve_nand_pppp
,
1336 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1339 return do_pppp_flags(s
, a
, &op
);
1341 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1346 *** SVE Predicate Misc Group
1349 static bool trans_PTEST(DisasContext
*s
, arg_PTEST
*a
)
1351 if (sve_access_check(s
)) {
1352 int nofs
= pred_full_reg_offset(s
, a
->rn
);
1353 int gofs
= pred_full_reg_offset(s
, a
->pg
);
1354 int words
= DIV_ROUND_UP(pred_full_reg_size(s
), 8);
1357 TCGv_i64 pn
= tcg_temp_new_i64();
1358 TCGv_i64 pg
= tcg_temp_new_i64();
1360 tcg_gen_ld_i64(pn
, cpu_env
, nofs
);
1361 tcg_gen_ld_i64(pg
, cpu_env
, gofs
);
1362 do_predtest1(pn
, pg
);
1364 tcg_temp_free_i64(pn
);
1365 tcg_temp_free_i64(pg
);
1367 do_predtest(s
, nofs
, gofs
, words
);
1373 /* See the ARM pseudocode DecodePredCount. */
1374 static unsigned decode_pred_count(unsigned fullsz
, int pattern
, int esz
)
1376 unsigned elements
= fullsz
>> esz
;
1380 case 0x0: /* POW2 */
1381 return pow2floor(elements
);
1392 case 0x9: /* VL16 */
1393 case 0xa: /* VL32 */
1394 case 0xb: /* VL64 */
1395 case 0xc: /* VL128 */
1396 case 0xd: /* VL256 */
1397 bound
= 16 << (pattern
- 9);
1399 case 0x1d: /* MUL4 */
1400 return elements
- elements
% 4;
1401 case 0x1e: /* MUL3 */
1402 return elements
- elements
% 3;
1403 case 0x1f: /* ALL */
1405 default: /* #uimm5 */
1408 return elements
>= bound
? bound
: 0;
1411 /* This handles all of the predicate initialization instructions,
1412 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1413 * so that decode_pred_count returns 0. For SETFFR, we will have
1414 * set RD == 16 == FFR.
1416 static bool do_predset(DisasContext
*s
, int esz
, int rd
, int pat
, bool setflag
)
1418 if (!sve_access_check(s
)) {
1422 unsigned fullsz
= vec_full_reg_size(s
);
1423 unsigned ofs
= pred_full_reg_offset(s
, rd
);
1424 unsigned numelem
, setsz
, i
;
1425 uint64_t word
, lastword
;
1428 numelem
= decode_pred_count(fullsz
, pat
, esz
);
1430 /* Determine what we must store into each bit, and how many. */
1432 lastword
= word
= 0;
1435 setsz
= numelem
<< esz
;
1436 lastword
= word
= pred_esz_masks
[esz
];
1438 lastword
&= MAKE_64BIT_MASK(0, setsz
% 64);
1442 t
= tcg_temp_new_i64();
1444 tcg_gen_movi_i64(t
, lastword
);
1445 tcg_gen_st_i64(t
, cpu_env
, ofs
);
1449 if (word
== lastword
) {
1450 unsigned maxsz
= size_for_gvec(fullsz
/ 8);
1451 unsigned oprsz
= size_for_gvec(setsz
/ 8);
1453 if (oprsz
* 8 == setsz
) {
1454 tcg_gen_gvec_dup64i(ofs
, oprsz
, maxsz
, word
);
1462 tcg_gen_movi_i64(t
, word
);
1463 for (i
= 0; i
< QEMU_ALIGN_DOWN(setsz
, 8); i
+= 8) {
1464 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1466 if (lastword
!= word
) {
1467 tcg_gen_movi_i64(t
, lastword
);
1468 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1472 tcg_gen_movi_i64(t
, 0);
1473 for (; i
< fullsz
; i
+= 8) {
1474 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1479 tcg_temp_free_i64(t
);
1483 tcg_gen_movi_i32(cpu_NF
, -(word
!= 0));
1484 tcg_gen_movi_i32(cpu_CF
, word
== 0);
1485 tcg_gen_movi_i32(cpu_VF
, 0);
1486 tcg_gen_mov_i32(cpu_ZF
, cpu_NF
);
1491 static bool trans_PTRUE(DisasContext
*s
, arg_PTRUE
*a
)
1493 return do_predset(s
, a
->esz
, a
->rd
, a
->pat
, a
->s
);
1496 static bool trans_SETFFR(DisasContext
*s
, arg_SETFFR
*a
)
1498 /* Note pat == 31 is #all, to set all elements. */
1499 return do_predset(s
, 0, FFR_PRED_NUM
, 31, false);
1502 static bool trans_PFALSE(DisasContext
*s
, arg_PFALSE
*a
)
1504 /* Note pat == 32 is #unimp, to set no elements. */
1505 return do_predset(s
, 0, a
->rd
, 32, false);
1508 static bool trans_RDFFR_p(DisasContext
*s
, arg_RDFFR_p
*a
)
1510 /* The path through do_pppp_flags is complicated enough to want to avoid
1511 * duplication. Frob the arguments into the form of a predicated AND.
1513 arg_rprr_s alt_a
= {
1514 .rd
= a
->rd
, .pg
= a
->pg
, .s
= a
->s
,
1515 .rn
= FFR_PRED_NUM
, .rm
= FFR_PRED_NUM
,
1517 return trans_AND_pppp(s
, &alt_a
);
1520 static bool trans_RDFFR(DisasContext
*s
, arg_RDFFR
*a
)
1522 return do_mov_p(s
, a
->rd
, FFR_PRED_NUM
);
1525 static bool trans_WRFFR(DisasContext
*s
, arg_WRFFR
*a
)
1527 return do_mov_p(s
, FFR_PRED_NUM
, a
->rn
);
1530 static bool do_pfirst_pnext(DisasContext
*s
, arg_rr_esz
*a
,
1531 void (*gen_fn
)(TCGv_i32
, TCGv_ptr
,
1532 TCGv_ptr
, TCGv_i32
))
1534 if (!sve_access_check(s
)) {
1538 TCGv_ptr t_pd
= tcg_temp_new_ptr();
1539 TCGv_ptr t_pg
= tcg_temp_new_ptr();
1543 desc
= DIV_ROUND_UP(pred_full_reg_size(s
), 8);
1544 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
1546 tcg_gen_addi_ptr(t_pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
1547 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
1548 t
= tcg_const_i32(desc
);
1550 gen_fn(t
, t_pd
, t_pg
, t
);
1551 tcg_temp_free_ptr(t_pd
);
1552 tcg_temp_free_ptr(t_pg
);
1555 tcg_temp_free_i32(t
);
1559 static bool trans_PFIRST(DisasContext
*s
, arg_rr_esz
*a
)
1561 return do_pfirst_pnext(s
, a
, gen_helper_sve_pfirst
);
1564 static bool trans_PNEXT(DisasContext
*s
, arg_rr_esz
*a
)
1566 return do_pfirst_pnext(s
, a
, gen_helper_sve_pnext
);
1570 *** SVE Element Count Group
1573 /* Perform an inline saturating addition of a 32-bit value within
1574 * a 64-bit register. The second operand is known to be positive,
1575 * which halves the comparisions we must perform to bound the result.
1577 static void do_sat_addsub_32(TCGv_i64 reg
, TCGv_i64 val
, bool u
, bool d
)
1583 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1585 tcg_gen_ext32u_i64(reg
, reg
);
1587 tcg_gen_ext32s_i64(reg
, reg
);
1590 tcg_gen_sub_i64(reg
, reg
, val
);
1591 ibound
= (u
? 0 : INT32_MIN
);
1594 tcg_gen_add_i64(reg
, reg
, val
);
1595 ibound
= (u
? UINT32_MAX
: INT32_MAX
);
1598 bound
= tcg_const_i64(ibound
);
1599 tcg_gen_movcond_i64(cond
, reg
, reg
, bound
, bound
, reg
);
1600 tcg_temp_free_i64(bound
);
1603 /* Similarly with 64-bit values. */
1604 static void do_sat_addsub_64(TCGv_i64 reg
, TCGv_i64 val
, bool u
, bool d
)
1606 TCGv_i64 t0
= tcg_temp_new_i64();
1607 TCGv_i64 t1
= tcg_temp_new_i64();
1612 tcg_gen_sub_i64(t0
, reg
, val
);
1613 tcg_gen_movi_i64(t1
, 0);
1614 tcg_gen_movcond_i64(TCG_COND_LTU
, reg
, reg
, val
, t1
, t0
);
1616 tcg_gen_add_i64(t0
, reg
, val
);
1617 tcg_gen_movi_i64(t1
, -1);
1618 tcg_gen_movcond_i64(TCG_COND_LTU
, reg
, t0
, reg
, t1
, t0
);
1622 /* Detect signed overflow for subtraction. */
1623 tcg_gen_xor_i64(t0
, reg
, val
);
1624 tcg_gen_sub_i64(t1
, reg
, val
);
1625 tcg_gen_xor_i64(reg
, reg
, t1
);
1626 tcg_gen_and_i64(t0
, t0
, reg
);
1628 /* Bound the result. */
1629 tcg_gen_movi_i64(reg
, INT64_MIN
);
1630 t2
= tcg_const_i64(0);
1631 tcg_gen_movcond_i64(TCG_COND_LT
, reg
, t0
, t2
, reg
, t1
);
1633 /* Detect signed overflow for addition. */
1634 tcg_gen_xor_i64(t0
, reg
, val
);
1635 tcg_gen_add_i64(reg
, reg
, val
);
1636 tcg_gen_xor_i64(t1
, reg
, val
);
1637 tcg_gen_andc_i64(t0
, t1
, t0
);
1639 /* Bound the result. */
1640 tcg_gen_movi_i64(t1
, INT64_MAX
);
1641 t2
= tcg_const_i64(0);
1642 tcg_gen_movcond_i64(TCG_COND_LT
, reg
, t0
, t2
, t1
, reg
);
1644 tcg_temp_free_i64(t2
);
1646 tcg_temp_free_i64(t0
);
1647 tcg_temp_free_i64(t1
);
1650 /* Similarly with a vector and a scalar operand. */
1651 static void do_sat_addsub_vec(DisasContext
*s
, int esz
, int rd
, int rn
,
1652 TCGv_i64 val
, bool u
, bool d
)
1654 unsigned vsz
= vec_full_reg_size(s
);
1655 TCGv_ptr dptr
, nptr
;
1659 dptr
= tcg_temp_new_ptr();
1660 nptr
= tcg_temp_new_ptr();
1661 tcg_gen_addi_ptr(dptr
, cpu_env
, vec_full_reg_offset(s
, rd
));
1662 tcg_gen_addi_ptr(nptr
, cpu_env
, vec_full_reg_offset(s
, rn
));
1663 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
1667 t32
= tcg_temp_new_i32();
1668 tcg_gen_extrl_i64_i32(t32
, val
);
1670 tcg_gen_neg_i32(t32
, t32
);
1673 gen_helper_sve_uqaddi_b(dptr
, nptr
, t32
, desc
);
1675 gen_helper_sve_sqaddi_b(dptr
, nptr
, t32
, desc
);
1677 tcg_temp_free_i32(t32
);
1681 t32
= tcg_temp_new_i32();
1682 tcg_gen_extrl_i64_i32(t32
, val
);
1684 tcg_gen_neg_i32(t32
, t32
);
1687 gen_helper_sve_uqaddi_h(dptr
, nptr
, t32
, desc
);
1689 gen_helper_sve_sqaddi_h(dptr
, nptr
, t32
, desc
);
1691 tcg_temp_free_i32(t32
);
1695 t64
= tcg_temp_new_i64();
1697 tcg_gen_neg_i64(t64
, val
);
1699 tcg_gen_mov_i64(t64
, val
);
1702 gen_helper_sve_uqaddi_s(dptr
, nptr
, t64
, desc
);
1704 gen_helper_sve_sqaddi_s(dptr
, nptr
, t64
, desc
);
1706 tcg_temp_free_i64(t64
);
1712 gen_helper_sve_uqsubi_d(dptr
, nptr
, val
, desc
);
1714 gen_helper_sve_uqaddi_d(dptr
, nptr
, val
, desc
);
1717 t64
= tcg_temp_new_i64();
1718 tcg_gen_neg_i64(t64
, val
);
1719 gen_helper_sve_sqaddi_d(dptr
, nptr
, t64
, desc
);
1720 tcg_temp_free_i64(t64
);
1722 gen_helper_sve_sqaddi_d(dptr
, nptr
, val
, desc
);
1727 g_assert_not_reached();
1730 tcg_temp_free_ptr(dptr
);
1731 tcg_temp_free_ptr(nptr
);
1732 tcg_temp_free_i32(desc
);
1735 static bool trans_CNT_r(DisasContext
*s
, arg_CNT_r
*a
)
1737 if (sve_access_check(s
)) {
1738 unsigned fullsz
= vec_full_reg_size(s
);
1739 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1740 tcg_gen_movi_i64(cpu_reg(s
, a
->rd
), numelem
* a
->imm
);
1745 static bool trans_INCDEC_r(DisasContext
*s
, arg_incdec_cnt
*a
)
1747 if (sve_access_check(s
)) {
1748 unsigned fullsz
= vec_full_reg_size(s
);
1749 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1750 int inc
= numelem
* a
->imm
* (a
->d
? -1 : 1);
1751 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1753 tcg_gen_addi_i64(reg
, reg
, inc
);
1758 static bool trans_SINCDEC_r_32(DisasContext
*s
, arg_incdec_cnt
*a
)
1760 if (!sve_access_check(s
)) {
1764 unsigned fullsz
= vec_full_reg_size(s
);
1765 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1766 int inc
= numelem
* a
->imm
;
1767 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1769 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1772 tcg_gen_ext32u_i64(reg
, reg
);
1774 tcg_gen_ext32s_i64(reg
, reg
);
1777 TCGv_i64 t
= tcg_const_i64(inc
);
1778 do_sat_addsub_32(reg
, t
, a
->u
, a
->d
);
1779 tcg_temp_free_i64(t
);
1784 static bool trans_SINCDEC_r_64(DisasContext
*s
, arg_incdec_cnt
*a
)
1786 if (!sve_access_check(s
)) {
1790 unsigned fullsz
= vec_full_reg_size(s
);
1791 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1792 int inc
= numelem
* a
->imm
;
1793 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1796 TCGv_i64 t
= tcg_const_i64(inc
);
1797 do_sat_addsub_64(reg
, t
, a
->u
, a
->d
);
1798 tcg_temp_free_i64(t
);
1803 static bool trans_INCDEC_v(DisasContext
*s
, arg_incdec2_cnt
*a
)
1809 unsigned fullsz
= vec_full_reg_size(s
);
1810 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1811 int inc
= numelem
* a
->imm
;
1814 if (sve_access_check(s
)) {
1815 TCGv_i64 t
= tcg_const_i64(a
->d
? -inc
: inc
);
1816 tcg_gen_gvec_adds(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
1817 vec_full_reg_offset(s
, a
->rn
),
1819 tcg_temp_free_i64(t
);
1822 do_mov_z(s
, a
->rd
, a
->rn
);
1827 static bool trans_SINCDEC_v(DisasContext
*s
, arg_incdec2_cnt
*a
)
1833 unsigned fullsz
= vec_full_reg_size(s
);
1834 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1835 int inc
= numelem
* a
->imm
;
1838 if (sve_access_check(s
)) {
1839 TCGv_i64 t
= tcg_const_i64(inc
);
1840 do_sat_addsub_vec(s
, a
->esz
, a
->rd
, a
->rn
, t
, a
->u
, a
->d
);
1841 tcg_temp_free_i64(t
);
1844 do_mov_z(s
, a
->rd
, a
->rn
);
1850 *** SVE Bitwise Immediate Group
1853 static bool do_zz_dbm(DisasContext
*s
, arg_rr_dbm
*a
, GVecGen2iFn
*gvec_fn
)
1856 if (!logic_imm_decode_wmask(&imm
, extract32(a
->dbm
, 12, 1),
1857 extract32(a
->dbm
, 0, 6),
1858 extract32(a
->dbm
, 6, 6))) {
1861 if (sve_access_check(s
)) {
1862 unsigned vsz
= vec_full_reg_size(s
);
1863 gvec_fn(MO_64
, vec_full_reg_offset(s
, a
->rd
),
1864 vec_full_reg_offset(s
, a
->rn
), imm
, vsz
, vsz
);
1869 static bool trans_AND_zzi(DisasContext
*s
, arg_rr_dbm
*a
)
1871 return do_zz_dbm(s
, a
, tcg_gen_gvec_andi
);
1874 static bool trans_ORR_zzi(DisasContext
*s
, arg_rr_dbm
*a
)
1876 return do_zz_dbm(s
, a
, tcg_gen_gvec_ori
);
1879 static bool trans_EOR_zzi(DisasContext
*s
, arg_rr_dbm
*a
)
1881 return do_zz_dbm(s
, a
, tcg_gen_gvec_xori
);
1884 static bool trans_DUPM(DisasContext
*s
, arg_DUPM
*a
)
1887 if (!logic_imm_decode_wmask(&imm
, extract32(a
->dbm
, 12, 1),
1888 extract32(a
->dbm
, 0, 6),
1889 extract32(a
->dbm
, 6, 6))) {
1892 if (sve_access_check(s
)) {
1893 do_dupi_z(s
, a
->rd
, imm
);
1899 *** SVE Integer Wide Immediate - Predicated Group
1902 /* Implement all merging copies. This is used for CPY (immediate),
1903 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1905 static void do_cpy_m(DisasContext
*s
, int esz
, int rd
, int rn
, int pg
,
1908 typedef void gen_cpy(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i64
, TCGv_i32
);
1909 static gen_cpy
* const fns
[4] = {
1910 gen_helper_sve_cpy_m_b
, gen_helper_sve_cpy_m_h
,
1911 gen_helper_sve_cpy_m_s
, gen_helper_sve_cpy_m_d
,
1913 unsigned vsz
= vec_full_reg_size(s
);
1914 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
1915 TCGv_ptr t_zd
= tcg_temp_new_ptr();
1916 TCGv_ptr t_zn
= tcg_temp_new_ptr();
1917 TCGv_ptr t_pg
= tcg_temp_new_ptr();
1919 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, rd
));
1920 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, rn
));
1921 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
1923 fns
[esz
](t_zd
, t_zn
, t_pg
, val
, desc
);
1925 tcg_temp_free_ptr(t_zd
);
1926 tcg_temp_free_ptr(t_zn
);
1927 tcg_temp_free_ptr(t_pg
);
1928 tcg_temp_free_i32(desc
);
1931 static bool trans_FCPY(DisasContext
*s
, arg_FCPY
*a
)
1936 if (sve_access_check(s
)) {
1937 /* Decode the VFP immediate. */
1938 uint64_t imm
= vfp_expand_imm(a
->esz
, a
->imm
);
1939 TCGv_i64 t_imm
= tcg_const_i64(imm
);
1940 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rn
, a
->pg
, t_imm
);
1941 tcg_temp_free_i64(t_imm
);
1946 static bool trans_CPY_m_i(DisasContext
*s
, arg_rpri_esz
*a
)
1948 if (a
->esz
== 0 && extract32(s
->insn
, 13, 1)) {
1951 if (sve_access_check(s
)) {
1952 TCGv_i64 t_imm
= tcg_const_i64(a
->imm
);
1953 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rn
, a
->pg
, t_imm
);
1954 tcg_temp_free_i64(t_imm
);
1959 static bool trans_CPY_z_i(DisasContext
*s
, arg_CPY_z_i
*a
)
1961 static gen_helper_gvec_2i
* const fns
[4] = {
1962 gen_helper_sve_cpy_z_b
, gen_helper_sve_cpy_z_h
,
1963 gen_helper_sve_cpy_z_s
, gen_helper_sve_cpy_z_d
,
1966 if (a
->esz
== 0 && extract32(s
->insn
, 13, 1)) {
1969 if (sve_access_check(s
)) {
1970 unsigned vsz
= vec_full_reg_size(s
);
1971 TCGv_i64 t_imm
= tcg_const_i64(a
->imm
);
1972 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s
, a
->rd
),
1973 pred_full_reg_offset(s
, a
->pg
),
1974 t_imm
, vsz
, vsz
, 0, fns
[a
->esz
]);
1975 tcg_temp_free_i64(t_imm
);
1981 *** SVE Permute Extract Group
1984 static bool trans_EXT(DisasContext
*s
, arg_EXT
*a
)
1986 if (!sve_access_check(s
)) {
1990 unsigned vsz
= vec_full_reg_size(s
);
1991 unsigned n_ofs
= a
->imm
>= vsz
? 0 : a
->imm
;
1992 unsigned n_siz
= vsz
- n_ofs
;
1993 unsigned d
= vec_full_reg_offset(s
, a
->rd
);
1994 unsigned n
= vec_full_reg_offset(s
, a
->rn
);
1995 unsigned m
= vec_full_reg_offset(s
, a
->rm
);
1997 /* Use host vector move insns if we have appropriate sizes
1998 * and no unfortunate overlap.
2001 && n_ofs
== size_for_gvec(n_ofs
)
2002 && n_siz
== size_for_gvec(n_siz
)
2003 && (d
!= n
|| n_siz
<= n_ofs
)) {
2004 tcg_gen_gvec_mov(0, d
, n
+ n_ofs
, n_siz
, n_siz
);
2006 tcg_gen_gvec_mov(0, d
+ n_siz
, m
, n_ofs
, n_ofs
);
2009 tcg_gen_gvec_3_ool(d
, n
, m
, vsz
, vsz
, n_ofs
, gen_helper_sve_ext
);
2015 *** SVE Permute - Unpredicated Group
2018 static bool trans_DUP_s(DisasContext
*s
, arg_DUP_s
*a
)
2020 if (sve_access_check(s
)) {
2021 unsigned vsz
= vec_full_reg_size(s
);
2022 tcg_gen_gvec_dup_i64(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
2023 vsz
, vsz
, cpu_reg_sp(s
, a
->rn
));
2028 static bool trans_DUP_x(DisasContext
*s
, arg_DUP_x
*a
)
2030 if ((a
->imm
& 0x1f) == 0) {
2033 if (sve_access_check(s
)) {
2034 unsigned vsz
= vec_full_reg_size(s
);
2035 unsigned dofs
= vec_full_reg_offset(s
, a
->rd
);
2036 unsigned esz
, index
;
2038 esz
= ctz32(a
->imm
);
2039 index
= a
->imm
>> (esz
+ 1);
2041 if ((index
<< esz
) < vsz
) {
2042 unsigned nofs
= vec_reg_offset(s
, a
->rn
, index
, esz
);
2043 tcg_gen_gvec_dup_mem(esz
, dofs
, nofs
, vsz
, vsz
);
2045 tcg_gen_gvec_dup64i(dofs
, vsz
, vsz
, 0);
2051 static void do_insr_i64(DisasContext
*s
, arg_rrr_esz
*a
, TCGv_i64 val
)
2053 typedef void gen_insr(TCGv_ptr
, TCGv_ptr
, TCGv_i64
, TCGv_i32
);
2054 static gen_insr
* const fns
[4] = {
2055 gen_helper_sve_insr_b
, gen_helper_sve_insr_h
,
2056 gen_helper_sve_insr_s
, gen_helper_sve_insr_d
,
2058 unsigned vsz
= vec_full_reg_size(s
);
2059 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
2060 TCGv_ptr t_zd
= tcg_temp_new_ptr();
2061 TCGv_ptr t_zn
= tcg_temp_new_ptr();
2063 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, a
->rd
));
2064 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
2066 fns
[a
->esz
](t_zd
, t_zn
, val
, desc
);
2068 tcg_temp_free_ptr(t_zd
);
2069 tcg_temp_free_ptr(t_zn
);
2070 tcg_temp_free_i32(desc
);
2073 static bool trans_INSR_f(DisasContext
*s
, arg_rrr_esz
*a
)
2075 if (sve_access_check(s
)) {
2076 TCGv_i64 t
= tcg_temp_new_i64();
2077 tcg_gen_ld_i64(t
, cpu_env
, vec_reg_offset(s
, a
->rm
, 0, MO_64
));
2078 do_insr_i64(s
, a
, t
);
2079 tcg_temp_free_i64(t
);
2084 static bool trans_INSR_r(DisasContext
*s
, arg_rrr_esz
*a
)
2086 if (sve_access_check(s
)) {
2087 do_insr_i64(s
, a
, cpu_reg(s
, a
->rm
));
2092 static bool trans_REV_v(DisasContext
*s
, arg_rr_esz
*a
)
2094 static gen_helper_gvec_2
* const fns
[4] = {
2095 gen_helper_sve_rev_b
, gen_helper_sve_rev_h
,
2096 gen_helper_sve_rev_s
, gen_helper_sve_rev_d
2099 if (sve_access_check(s
)) {
2100 unsigned vsz
= vec_full_reg_size(s
);
2101 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, a
->rd
),
2102 vec_full_reg_offset(s
, a
->rn
),
2103 vsz
, vsz
, 0, fns
[a
->esz
]);
2108 static bool trans_TBL(DisasContext
*s
, arg_rrr_esz
*a
)
2110 static gen_helper_gvec_3
* const fns
[4] = {
2111 gen_helper_sve_tbl_b
, gen_helper_sve_tbl_h
,
2112 gen_helper_sve_tbl_s
, gen_helper_sve_tbl_d
2115 if (sve_access_check(s
)) {
2116 unsigned vsz
= vec_full_reg_size(s
);
2117 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
2118 vec_full_reg_offset(s
, a
->rn
),
2119 vec_full_reg_offset(s
, a
->rm
),
2120 vsz
, vsz
, 0, fns
[a
->esz
]);
2125 static bool trans_UNPK(DisasContext
*s
, arg_UNPK
*a
)
2127 static gen_helper_gvec_2
* const fns
[4][2] = {
2129 { gen_helper_sve_sunpk_h
, gen_helper_sve_uunpk_h
},
2130 { gen_helper_sve_sunpk_s
, gen_helper_sve_uunpk_s
},
2131 { gen_helper_sve_sunpk_d
, gen_helper_sve_uunpk_d
},
2137 if (sve_access_check(s
)) {
2138 unsigned vsz
= vec_full_reg_size(s
);
2139 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, a
->rd
),
2140 vec_full_reg_offset(s
, a
->rn
)
2141 + (a
->h
? vsz
/ 2 : 0),
2142 vsz
, vsz
, 0, fns
[a
->esz
][a
->u
]);
2148 *** SVE Permute - Predicates Group
2151 static bool do_perm_pred3(DisasContext
*s
, arg_rrr_esz
*a
, bool high_odd
,
2152 gen_helper_gvec_3
*fn
)
2154 if (!sve_access_check(s
)) {
2158 unsigned vsz
= pred_full_reg_size(s
);
2160 /* Predicate sizes may be smaller and cannot use simd_desc.
2161 We cannot round up, as we do elsewhere, because we need
2162 the exact size for ZIP2 and REV. We retain the style for
2163 the other helpers for consistency. */
2164 TCGv_ptr t_d
= tcg_temp_new_ptr();
2165 TCGv_ptr t_n
= tcg_temp_new_ptr();
2166 TCGv_ptr t_m
= tcg_temp_new_ptr();
2171 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
2172 desc
= deposit32(desc
, SIMD_DATA_SHIFT
+ 2, 2, high_odd
);
2174 tcg_gen_addi_ptr(t_d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2175 tcg_gen_addi_ptr(t_n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2176 tcg_gen_addi_ptr(t_m
, cpu_env
, pred_full_reg_offset(s
, a
->rm
));
2177 t_desc
= tcg_const_i32(desc
);
2179 fn(t_d
, t_n
, t_m
, t_desc
);
2181 tcg_temp_free_ptr(t_d
);
2182 tcg_temp_free_ptr(t_n
);
2183 tcg_temp_free_ptr(t_m
);
2184 tcg_temp_free_i32(t_desc
);
2188 static bool do_perm_pred2(DisasContext
*s
, arg_rr_esz
*a
, bool high_odd
,
2189 gen_helper_gvec_2
*fn
)
2191 if (!sve_access_check(s
)) {
2195 unsigned vsz
= pred_full_reg_size(s
);
2196 TCGv_ptr t_d
= tcg_temp_new_ptr();
2197 TCGv_ptr t_n
= tcg_temp_new_ptr();
2201 tcg_gen_addi_ptr(t_d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2202 tcg_gen_addi_ptr(t_n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2204 /* Predicate sizes may be smaller and cannot use simd_desc.
2205 We cannot round up, as we do elsewhere, because we need
2206 the exact size for ZIP2 and REV. We retain the style for
2207 the other helpers for consistency. */
2210 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
2211 desc
= deposit32(desc
, SIMD_DATA_SHIFT
+ 2, 2, high_odd
);
2212 t_desc
= tcg_const_i32(desc
);
2214 fn(t_d
, t_n
, t_desc
);
2216 tcg_temp_free_i32(t_desc
);
2217 tcg_temp_free_ptr(t_d
);
2218 tcg_temp_free_ptr(t_n
);
2222 static bool trans_ZIP1_p(DisasContext
*s
, arg_rrr_esz
*a
)
2224 return do_perm_pred3(s
, a
, 0, gen_helper_sve_zip_p
);
2227 static bool trans_ZIP2_p(DisasContext
*s
, arg_rrr_esz
*a
)
2229 return do_perm_pred3(s
, a
, 1, gen_helper_sve_zip_p
);
2232 static bool trans_UZP1_p(DisasContext
*s
, arg_rrr_esz
*a
)
2234 return do_perm_pred3(s
, a
, 0, gen_helper_sve_uzp_p
);
2237 static bool trans_UZP2_p(DisasContext
*s
, arg_rrr_esz
*a
)
2239 return do_perm_pred3(s
, a
, 1, gen_helper_sve_uzp_p
);
2242 static bool trans_TRN1_p(DisasContext
*s
, arg_rrr_esz
*a
)
2244 return do_perm_pred3(s
, a
, 0, gen_helper_sve_trn_p
);
2247 static bool trans_TRN2_p(DisasContext
*s
, arg_rrr_esz
*a
)
2249 return do_perm_pred3(s
, a
, 1, gen_helper_sve_trn_p
);
2252 static bool trans_REV_p(DisasContext
*s
, arg_rr_esz
*a
)
2254 return do_perm_pred2(s
, a
, 0, gen_helper_sve_rev_p
);
2257 static bool trans_PUNPKLO(DisasContext
*s
, arg_PUNPKLO
*a
)
2259 return do_perm_pred2(s
, a
, 0, gen_helper_sve_punpk_p
);
2262 static bool trans_PUNPKHI(DisasContext
*s
, arg_PUNPKHI
*a
)
2264 return do_perm_pred2(s
, a
, 1, gen_helper_sve_punpk_p
);
2268 *** SVE Permute - Interleaving Group
2271 static bool do_zip(DisasContext
*s
, arg_rrr_esz
*a
, bool high
)
2273 static gen_helper_gvec_3
* const fns
[4] = {
2274 gen_helper_sve_zip_b
, gen_helper_sve_zip_h
,
2275 gen_helper_sve_zip_s
, gen_helper_sve_zip_d
,
2278 if (sve_access_check(s
)) {
2279 unsigned vsz
= vec_full_reg_size(s
);
2280 unsigned high_ofs
= high
? vsz
/ 2 : 0;
2281 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
2282 vec_full_reg_offset(s
, a
->rn
) + high_ofs
,
2283 vec_full_reg_offset(s
, a
->rm
) + high_ofs
,
2284 vsz
, vsz
, 0, fns
[a
->esz
]);
2289 static bool do_zzz_data_ool(DisasContext
*s
, arg_rrr_esz
*a
, int data
,
2290 gen_helper_gvec_3
*fn
)
2292 if (sve_access_check(s
)) {
2293 unsigned vsz
= vec_full_reg_size(s
);
2294 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
2295 vec_full_reg_offset(s
, a
->rn
),
2296 vec_full_reg_offset(s
, a
->rm
),
2297 vsz
, vsz
, data
, fn
);
2302 static bool trans_ZIP1_z(DisasContext
*s
, arg_rrr_esz
*a
)
2304 return do_zip(s
, a
, false);
2307 static bool trans_ZIP2_z(DisasContext
*s
, arg_rrr_esz
*a
)
2309 return do_zip(s
, a
, true);
2312 static gen_helper_gvec_3
* const uzp_fns
[4] = {
2313 gen_helper_sve_uzp_b
, gen_helper_sve_uzp_h
,
2314 gen_helper_sve_uzp_s
, gen_helper_sve_uzp_d
,
2317 static bool trans_UZP1_z(DisasContext
*s
, arg_rrr_esz
*a
)
2319 return do_zzz_data_ool(s
, a
, 0, uzp_fns
[a
->esz
]);
2322 static bool trans_UZP2_z(DisasContext
*s
, arg_rrr_esz
*a
)
2324 return do_zzz_data_ool(s
, a
, 1 << a
->esz
, uzp_fns
[a
->esz
]);
2327 static gen_helper_gvec_3
* const trn_fns
[4] = {
2328 gen_helper_sve_trn_b
, gen_helper_sve_trn_h
,
2329 gen_helper_sve_trn_s
, gen_helper_sve_trn_d
,
2332 static bool trans_TRN1_z(DisasContext
*s
, arg_rrr_esz
*a
)
2334 return do_zzz_data_ool(s
, a
, 0, trn_fns
[a
->esz
]);
2337 static bool trans_TRN2_z(DisasContext
*s
, arg_rrr_esz
*a
)
2339 return do_zzz_data_ool(s
, a
, 1 << a
->esz
, trn_fns
[a
->esz
]);
2343 *** SVE Permute Vector - Predicated Group
2346 static bool trans_COMPACT(DisasContext
*s
, arg_rpr_esz
*a
)
2348 static gen_helper_gvec_3
* const fns
[4] = {
2349 NULL
, NULL
, gen_helper_sve_compact_s
, gen_helper_sve_compact_d
2351 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2354 /* Call the helper that computes the ARM LastActiveElement pseudocode
2355 * function, scaled by the element size. This includes the not found
2356 * indication; e.g. not found for esz=3 is -8.
2358 static void find_last_active(DisasContext
*s
, TCGv_i32 ret
, int esz
, int pg
)
2360 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2361 * round up, as we do elsewhere, because we need the exact size.
2363 TCGv_ptr t_p
= tcg_temp_new_ptr();
2365 unsigned vsz
= pred_full_reg_size(s
);
2369 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, esz
);
2371 tcg_gen_addi_ptr(t_p
, cpu_env
, pred_full_reg_offset(s
, pg
));
2372 t_desc
= tcg_const_i32(desc
);
2374 gen_helper_sve_last_active_element(ret
, t_p
, t_desc
);
2376 tcg_temp_free_i32(t_desc
);
2377 tcg_temp_free_ptr(t_p
);
2380 /* Increment LAST to the offset of the next element in the vector,
2381 * wrapping around to 0.
2383 static void incr_last_active(DisasContext
*s
, TCGv_i32 last
, int esz
)
2385 unsigned vsz
= vec_full_reg_size(s
);
2387 tcg_gen_addi_i32(last
, last
, 1 << esz
);
2388 if (is_power_of_2(vsz
)) {
2389 tcg_gen_andi_i32(last
, last
, vsz
- 1);
2391 TCGv_i32 max
= tcg_const_i32(vsz
);
2392 TCGv_i32 zero
= tcg_const_i32(0);
2393 tcg_gen_movcond_i32(TCG_COND_GEU
, last
, last
, max
, zero
, last
);
2394 tcg_temp_free_i32(max
);
2395 tcg_temp_free_i32(zero
);
2399 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2400 static void wrap_last_active(DisasContext
*s
, TCGv_i32 last
, int esz
)
2402 unsigned vsz
= vec_full_reg_size(s
);
2404 if (is_power_of_2(vsz
)) {
2405 tcg_gen_andi_i32(last
, last
, vsz
- 1);
2407 TCGv_i32 max
= tcg_const_i32(vsz
- (1 << esz
));
2408 TCGv_i32 zero
= tcg_const_i32(0);
2409 tcg_gen_movcond_i32(TCG_COND_LT
, last
, last
, zero
, max
, last
);
2410 tcg_temp_free_i32(max
);
2411 tcg_temp_free_i32(zero
);
2415 /* Load an unsigned element of ESZ from BASE+OFS. */
2416 static TCGv_i64
load_esz(TCGv_ptr base
, int ofs
, int esz
)
2418 TCGv_i64 r
= tcg_temp_new_i64();
2422 tcg_gen_ld8u_i64(r
, base
, ofs
);
2425 tcg_gen_ld16u_i64(r
, base
, ofs
);
2428 tcg_gen_ld32u_i64(r
, base
, ofs
);
2431 tcg_gen_ld_i64(r
, base
, ofs
);
2434 g_assert_not_reached();
2439 /* Load an unsigned element of ESZ from RM[LAST]. */
2440 static TCGv_i64
load_last_active(DisasContext
*s
, TCGv_i32 last
,
2443 TCGv_ptr p
= tcg_temp_new_ptr();
2446 /* Convert offset into vector into offset into ENV.
2447 * The final adjustment for the vector register base
2448 * is added via constant offset to the load.
2450 #ifdef HOST_WORDS_BIGENDIAN
2451 /* Adjust for element ordering. See vec_reg_offset. */
2453 tcg_gen_xori_i32(last
, last
, 8 - (1 << esz
));
2456 tcg_gen_ext_i32_ptr(p
, last
);
2457 tcg_gen_add_ptr(p
, p
, cpu_env
);
2459 r
= load_esz(p
, vec_full_reg_offset(s
, rm
), esz
);
2460 tcg_temp_free_ptr(p
);
2465 /* Compute CLAST for a Zreg. */
2466 static bool do_clast_vector(DisasContext
*s
, arg_rprr_esz
*a
, bool before
)
2471 unsigned vsz
, esz
= a
->esz
;
2473 if (!sve_access_check(s
)) {
2477 last
= tcg_temp_local_new_i32();
2478 over
= gen_new_label();
2480 find_last_active(s
, last
, esz
, a
->pg
);
2482 /* There is of course no movcond for a 2048-bit vector,
2483 * so we must branch over the actual store.
2485 tcg_gen_brcondi_i32(TCG_COND_LT
, last
, 0, over
);
2488 incr_last_active(s
, last
, esz
);
2491 ele
= load_last_active(s
, last
, a
->rm
, esz
);
2492 tcg_temp_free_i32(last
);
2494 vsz
= vec_full_reg_size(s
);
2495 tcg_gen_gvec_dup_i64(esz
, vec_full_reg_offset(s
, a
->rd
), vsz
, vsz
, ele
);
2496 tcg_temp_free_i64(ele
);
2498 /* If this insn used MOVPRFX, we may need a second move. */
2499 if (a
->rd
!= a
->rn
) {
2500 TCGLabel
*done
= gen_new_label();
2503 gen_set_label(over
);
2504 do_mov_z(s
, a
->rd
, a
->rn
);
2506 gen_set_label(done
);
2508 gen_set_label(over
);
2513 static bool trans_CLASTA_z(DisasContext
*s
, arg_rprr_esz
*a
)
2515 return do_clast_vector(s
, a
, false);
2518 static bool trans_CLASTB_z(DisasContext
*s
, arg_rprr_esz
*a
)
2520 return do_clast_vector(s
, a
, true);
2523 /* Compute CLAST for a scalar. */
2524 static void do_clast_scalar(DisasContext
*s
, int esz
, int pg
, int rm
,
2525 bool before
, TCGv_i64 reg_val
)
2527 TCGv_i32 last
= tcg_temp_new_i32();
2528 TCGv_i64 ele
, cmp
, zero
;
2530 find_last_active(s
, last
, esz
, pg
);
2532 /* Extend the original value of last prior to incrementing. */
2533 cmp
= tcg_temp_new_i64();
2534 tcg_gen_ext_i32_i64(cmp
, last
);
2537 incr_last_active(s
, last
, esz
);
2540 /* The conceit here is that while last < 0 indicates not found, after
2541 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2542 * from which we can load garbage. We then discard the garbage with
2543 * a conditional move.
2545 ele
= load_last_active(s
, last
, rm
, esz
);
2546 tcg_temp_free_i32(last
);
2548 zero
= tcg_const_i64(0);
2549 tcg_gen_movcond_i64(TCG_COND_GE
, reg_val
, cmp
, zero
, ele
, reg_val
);
2551 tcg_temp_free_i64(zero
);
2552 tcg_temp_free_i64(cmp
);
2553 tcg_temp_free_i64(ele
);
2556 /* Compute CLAST for a Vreg. */
2557 static bool do_clast_fp(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2559 if (sve_access_check(s
)) {
2561 int ofs
= vec_reg_offset(s
, a
->rd
, 0, esz
);
2562 TCGv_i64 reg
= load_esz(cpu_env
, ofs
, esz
);
2564 do_clast_scalar(s
, esz
, a
->pg
, a
->rn
, before
, reg
);
2565 write_fp_dreg(s
, a
->rd
, reg
);
2566 tcg_temp_free_i64(reg
);
2571 static bool trans_CLASTA_v(DisasContext
*s
, arg_rpr_esz
*a
)
2573 return do_clast_fp(s
, a
, false);
2576 static bool trans_CLASTB_v(DisasContext
*s
, arg_rpr_esz
*a
)
2578 return do_clast_fp(s
, a
, true);
2581 /* Compute CLAST for a Xreg. */
2582 static bool do_clast_general(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2586 if (!sve_access_check(s
)) {
2590 reg
= cpu_reg(s
, a
->rd
);
2593 tcg_gen_ext8u_i64(reg
, reg
);
2596 tcg_gen_ext16u_i64(reg
, reg
);
2599 tcg_gen_ext32u_i64(reg
, reg
);
2604 g_assert_not_reached();
2607 do_clast_scalar(s
, a
->esz
, a
->pg
, a
->rn
, before
, reg
);
2611 static bool trans_CLASTA_r(DisasContext
*s
, arg_rpr_esz
*a
)
2613 return do_clast_general(s
, a
, false);
2616 static bool trans_CLASTB_r(DisasContext
*s
, arg_rpr_esz
*a
)
2618 return do_clast_general(s
, a
, true);
2621 /* Compute LAST for a scalar. */
2622 static TCGv_i64
do_last_scalar(DisasContext
*s
, int esz
,
2623 int pg
, int rm
, bool before
)
2625 TCGv_i32 last
= tcg_temp_new_i32();
2628 find_last_active(s
, last
, esz
, pg
);
2630 wrap_last_active(s
, last
, esz
);
2632 incr_last_active(s
, last
, esz
);
2635 ret
= load_last_active(s
, last
, rm
, esz
);
2636 tcg_temp_free_i32(last
);
2640 /* Compute LAST for a Vreg. */
2641 static bool do_last_fp(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2643 if (sve_access_check(s
)) {
2644 TCGv_i64 val
= do_last_scalar(s
, a
->esz
, a
->pg
, a
->rn
, before
);
2645 write_fp_dreg(s
, a
->rd
, val
);
2646 tcg_temp_free_i64(val
);
2651 static bool trans_LASTA_v(DisasContext
*s
, arg_rpr_esz
*a
)
2653 return do_last_fp(s
, a
, false);
2656 static bool trans_LASTB_v(DisasContext
*s
, arg_rpr_esz
*a
)
2658 return do_last_fp(s
, a
, true);
2661 /* Compute LAST for a Xreg. */
2662 static bool do_last_general(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2664 if (sve_access_check(s
)) {
2665 TCGv_i64 val
= do_last_scalar(s
, a
->esz
, a
->pg
, a
->rn
, before
);
2666 tcg_gen_mov_i64(cpu_reg(s
, a
->rd
), val
);
2667 tcg_temp_free_i64(val
);
2672 static bool trans_LASTA_r(DisasContext
*s
, arg_rpr_esz
*a
)
2674 return do_last_general(s
, a
, false);
2677 static bool trans_LASTB_r(DisasContext
*s
, arg_rpr_esz
*a
)
2679 return do_last_general(s
, a
, true);
2682 static bool trans_CPY_m_r(DisasContext
*s
, arg_rpr_esz
*a
)
2684 if (sve_access_check(s
)) {
2685 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rd
, a
->pg
, cpu_reg_sp(s
, a
->rn
));
2690 static bool trans_CPY_m_v(DisasContext
*s
, arg_rpr_esz
*a
)
2692 if (sve_access_check(s
)) {
2693 int ofs
= vec_reg_offset(s
, a
->rn
, 0, a
->esz
);
2694 TCGv_i64 t
= load_esz(cpu_env
, ofs
, a
->esz
);
2695 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rd
, a
->pg
, t
);
2696 tcg_temp_free_i64(t
);
2701 static bool trans_REVB(DisasContext
*s
, arg_rpr_esz
*a
)
2703 static gen_helper_gvec_3
* const fns
[4] = {
2705 gen_helper_sve_revb_h
,
2706 gen_helper_sve_revb_s
,
2707 gen_helper_sve_revb_d
,
2709 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2712 static bool trans_REVH(DisasContext
*s
, arg_rpr_esz
*a
)
2714 static gen_helper_gvec_3
* const fns
[4] = {
2717 gen_helper_sve_revh_s
,
2718 gen_helper_sve_revh_d
,
2720 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2723 static bool trans_REVW(DisasContext
*s
, arg_rpr_esz
*a
)
2725 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_revw_d
: NULL
);
2728 static bool trans_RBIT(DisasContext
*s
, arg_rpr_esz
*a
)
2730 static gen_helper_gvec_3
* const fns
[4] = {
2731 gen_helper_sve_rbit_b
,
2732 gen_helper_sve_rbit_h
,
2733 gen_helper_sve_rbit_s
,
2734 gen_helper_sve_rbit_d
,
2736 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2739 static bool trans_SPLICE(DisasContext
*s
, arg_rprr_esz
*a
)
2741 if (sve_access_check(s
)) {
2742 unsigned vsz
= vec_full_reg_size(s
);
2743 tcg_gen_gvec_4_ool(vec_full_reg_offset(s
, a
->rd
),
2744 vec_full_reg_offset(s
, a
->rn
),
2745 vec_full_reg_offset(s
, a
->rm
),
2746 pred_full_reg_offset(s
, a
->pg
),
2747 vsz
, vsz
, a
->esz
, gen_helper_sve_splice
);
2753 *** SVE Integer Compare - Vectors Group
2756 static bool do_ppzz_flags(DisasContext
*s
, arg_rprr_esz
*a
,
2757 gen_helper_gvec_flags_4
*gen_fn
)
2759 TCGv_ptr pd
, zn
, zm
, pg
;
2763 if (gen_fn
== NULL
) {
2766 if (!sve_access_check(s
)) {
2770 vsz
= vec_full_reg_size(s
);
2771 t
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
2772 pd
= tcg_temp_new_ptr();
2773 zn
= tcg_temp_new_ptr();
2774 zm
= tcg_temp_new_ptr();
2775 pg
= tcg_temp_new_ptr();
2777 tcg_gen_addi_ptr(pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2778 tcg_gen_addi_ptr(zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
2779 tcg_gen_addi_ptr(zm
, cpu_env
, vec_full_reg_offset(s
, a
->rm
));
2780 tcg_gen_addi_ptr(pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2782 gen_fn(t
, pd
, zn
, zm
, pg
, t
);
2784 tcg_temp_free_ptr(pd
);
2785 tcg_temp_free_ptr(zn
);
2786 tcg_temp_free_ptr(zm
);
2787 tcg_temp_free_ptr(pg
);
2791 tcg_temp_free_i32(t
);
2795 #define DO_PPZZ(NAME, name) \
2796 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
2798 static gen_helper_gvec_flags_4 * const fns[4] = { \
2799 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2800 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2802 return do_ppzz_flags(s, a, fns[a->esz]); \
2805 DO_PPZZ(CMPEQ
, cmpeq
)
2806 DO_PPZZ(CMPNE
, cmpne
)
2807 DO_PPZZ(CMPGT
, cmpgt
)
2808 DO_PPZZ(CMPGE
, cmpge
)
2809 DO_PPZZ(CMPHI
, cmphi
)
2810 DO_PPZZ(CMPHS
, cmphs
)
2814 #define DO_PPZW(NAME, name) \
2815 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \
2817 static gen_helper_gvec_flags_4 * const fns[4] = { \
2818 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2819 gen_helper_sve_##name##_ppzw_s, NULL \
2821 return do_ppzz_flags(s, a, fns[a->esz]); \
2824 DO_PPZW(CMPEQ
, cmpeq
)
2825 DO_PPZW(CMPNE
, cmpne
)
2826 DO_PPZW(CMPGT
, cmpgt
)
2827 DO_PPZW(CMPGE
, cmpge
)
2828 DO_PPZW(CMPHI
, cmphi
)
2829 DO_PPZW(CMPHS
, cmphs
)
2830 DO_PPZW(CMPLT
, cmplt
)
2831 DO_PPZW(CMPLE
, cmple
)
2832 DO_PPZW(CMPLO
, cmplo
)
2833 DO_PPZW(CMPLS
, cmpls
)
2838 *** SVE Integer Compare - Immediate Groups
2841 static bool do_ppzi_flags(DisasContext
*s
, arg_rpri_esz
*a
,
2842 gen_helper_gvec_flags_3
*gen_fn
)
2844 TCGv_ptr pd
, zn
, pg
;
2848 if (gen_fn
== NULL
) {
2851 if (!sve_access_check(s
)) {
2855 vsz
= vec_full_reg_size(s
);
2856 t
= tcg_const_i32(simd_desc(vsz
, vsz
, a
->imm
));
2857 pd
= tcg_temp_new_ptr();
2858 zn
= tcg_temp_new_ptr();
2859 pg
= tcg_temp_new_ptr();
2861 tcg_gen_addi_ptr(pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2862 tcg_gen_addi_ptr(zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
2863 tcg_gen_addi_ptr(pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2865 gen_fn(t
, pd
, zn
, pg
, t
);
2867 tcg_temp_free_ptr(pd
);
2868 tcg_temp_free_ptr(zn
);
2869 tcg_temp_free_ptr(pg
);
2873 tcg_temp_free_i32(t
);
2877 #define DO_PPZI(NAME, name) \
2878 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \
2880 static gen_helper_gvec_flags_3 * const fns[4] = { \
2881 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2882 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2884 return do_ppzi_flags(s, a, fns[a->esz]); \
2887 DO_PPZI(CMPEQ
, cmpeq
)
2888 DO_PPZI(CMPNE
, cmpne
)
2889 DO_PPZI(CMPGT
, cmpgt
)
2890 DO_PPZI(CMPGE
, cmpge
)
2891 DO_PPZI(CMPHI
, cmphi
)
2892 DO_PPZI(CMPHS
, cmphs
)
2893 DO_PPZI(CMPLT
, cmplt
)
2894 DO_PPZI(CMPLE
, cmple
)
2895 DO_PPZI(CMPLO
, cmplo
)
2896 DO_PPZI(CMPLS
, cmpls
)
2901 *** SVE Partition Break Group
2904 static bool do_brk3(DisasContext
*s
, arg_rprr_s
*a
,
2905 gen_helper_gvec_4
*fn
, gen_helper_gvec_flags_4
*fn_s
)
2907 if (!sve_access_check(s
)) {
2911 unsigned vsz
= pred_full_reg_size(s
);
2913 /* Predicate sizes may be smaller and cannot use simd_desc. */
2914 TCGv_ptr d
= tcg_temp_new_ptr();
2915 TCGv_ptr n
= tcg_temp_new_ptr();
2916 TCGv_ptr m
= tcg_temp_new_ptr();
2917 TCGv_ptr g
= tcg_temp_new_ptr();
2918 TCGv_i32 t
= tcg_const_i32(vsz
- 2);
2920 tcg_gen_addi_ptr(d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2921 tcg_gen_addi_ptr(n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2922 tcg_gen_addi_ptr(m
, cpu_env
, pred_full_reg_offset(s
, a
->rm
));
2923 tcg_gen_addi_ptr(g
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2926 fn_s(t
, d
, n
, m
, g
, t
);
2931 tcg_temp_free_ptr(d
);
2932 tcg_temp_free_ptr(n
);
2933 tcg_temp_free_ptr(m
);
2934 tcg_temp_free_ptr(g
);
2935 tcg_temp_free_i32(t
);
2939 static bool do_brk2(DisasContext
*s
, arg_rpr_s
*a
,
2940 gen_helper_gvec_3
*fn
, gen_helper_gvec_flags_3
*fn_s
)
2942 if (!sve_access_check(s
)) {
2946 unsigned vsz
= pred_full_reg_size(s
);
2948 /* Predicate sizes may be smaller and cannot use simd_desc. */
2949 TCGv_ptr d
= tcg_temp_new_ptr();
2950 TCGv_ptr n
= tcg_temp_new_ptr();
2951 TCGv_ptr g
= tcg_temp_new_ptr();
2952 TCGv_i32 t
= tcg_const_i32(vsz
- 2);
2954 tcg_gen_addi_ptr(d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2955 tcg_gen_addi_ptr(n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2956 tcg_gen_addi_ptr(g
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2959 fn_s(t
, d
, n
, g
, t
);
2964 tcg_temp_free_ptr(d
);
2965 tcg_temp_free_ptr(n
);
2966 tcg_temp_free_ptr(g
);
2967 tcg_temp_free_i32(t
);
2971 static bool trans_BRKPA(DisasContext
*s
, arg_rprr_s
*a
)
2973 return do_brk3(s
, a
, gen_helper_sve_brkpa
, gen_helper_sve_brkpas
);
2976 static bool trans_BRKPB(DisasContext
*s
, arg_rprr_s
*a
)
2978 return do_brk3(s
, a
, gen_helper_sve_brkpb
, gen_helper_sve_brkpbs
);
2981 static bool trans_BRKA_m(DisasContext
*s
, arg_rpr_s
*a
)
2983 return do_brk2(s
, a
, gen_helper_sve_brka_m
, gen_helper_sve_brkas_m
);
2986 static bool trans_BRKB_m(DisasContext
*s
, arg_rpr_s
*a
)
2988 return do_brk2(s
, a
, gen_helper_sve_brkb_m
, gen_helper_sve_brkbs_m
);
2991 static bool trans_BRKA_z(DisasContext
*s
, arg_rpr_s
*a
)
2993 return do_brk2(s
, a
, gen_helper_sve_brka_z
, gen_helper_sve_brkas_z
);
2996 static bool trans_BRKB_z(DisasContext
*s
, arg_rpr_s
*a
)
2998 return do_brk2(s
, a
, gen_helper_sve_brkb_z
, gen_helper_sve_brkbs_z
);
3001 static bool trans_BRKN(DisasContext
*s
, arg_rpr_s
*a
)
3003 return do_brk2(s
, a
, gen_helper_sve_brkn
, gen_helper_sve_brkns
);
3007 *** SVE Predicate Count Group
3010 static void do_cntp(DisasContext
*s
, TCGv_i64 val
, int esz
, int pn
, int pg
)
3012 unsigned psz
= pred_full_reg_size(s
);
3017 tcg_gen_ld_i64(val
, cpu_env
, pred_full_reg_offset(s
, pn
));
3019 TCGv_i64 g
= tcg_temp_new_i64();
3020 tcg_gen_ld_i64(g
, cpu_env
, pred_full_reg_offset(s
, pg
));
3021 tcg_gen_and_i64(val
, val
, g
);
3022 tcg_temp_free_i64(g
);
3025 /* Reduce the pred_esz_masks value simply to reduce the
3026 * size of the code generated here.
3028 psz_mask
= MAKE_64BIT_MASK(0, psz
* 8);
3029 tcg_gen_andi_i64(val
, val
, pred_esz_masks
[esz
] & psz_mask
);
3031 tcg_gen_ctpop_i64(val
, val
);
3033 TCGv_ptr t_pn
= tcg_temp_new_ptr();
3034 TCGv_ptr t_pg
= tcg_temp_new_ptr();
3039 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, esz
);
3041 tcg_gen_addi_ptr(t_pn
, cpu_env
, pred_full_reg_offset(s
, pn
));
3042 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
3043 t_desc
= tcg_const_i32(desc
);
3045 gen_helper_sve_cntp(val
, t_pn
, t_pg
, t_desc
);
3046 tcg_temp_free_ptr(t_pn
);
3047 tcg_temp_free_ptr(t_pg
);
3048 tcg_temp_free_i32(t_desc
);
3052 static bool trans_CNTP(DisasContext
*s
, arg_CNTP
*a
)
3054 if (sve_access_check(s
)) {
3055 do_cntp(s
, cpu_reg(s
, a
->rd
), a
->esz
, a
->rn
, a
->pg
);
3060 static bool trans_INCDECP_r(DisasContext
*s
, arg_incdec_pred
*a
)
3062 if (sve_access_check(s
)) {
3063 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
3064 TCGv_i64 val
= tcg_temp_new_i64();
3066 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3068 tcg_gen_sub_i64(reg
, reg
, val
);
3070 tcg_gen_add_i64(reg
, reg
, val
);
3072 tcg_temp_free_i64(val
);
3077 static bool trans_INCDECP_z(DisasContext
*s
, arg_incdec2_pred
*a
)
3082 if (sve_access_check(s
)) {
3083 unsigned vsz
= vec_full_reg_size(s
);
3084 TCGv_i64 val
= tcg_temp_new_i64();
3085 GVecGen2sFn
*gvec_fn
= a
->d
? tcg_gen_gvec_subs
: tcg_gen_gvec_adds
;
3087 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3088 gvec_fn(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
3089 vec_full_reg_offset(s
, a
->rn
), val
, vsz
, vsz
);
3094 static bool trans_SINCDECP_r_32(DisasContext
*s
, arg_incdec_pred
*a
)
3096 if (sve_access_check(s
)) {
3097 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
3098 TCGv_i64 val
= tcg_temp_new_i64();
3100 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3101 do_sat_addsub_32(reg
, val
, a
->u
, a
->d
);
3106 static bool trans_SINCDECP_r_64(DisasContext
*s
, arg_incdec_pred
*a
)
3108 if (sve_access_check(s
)) {
3109 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
3110 TCGv_i64 val
= tcg_temp_new_i64();
3112 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3113 do_sat_addsub_64(reg
, val
, a
->u
, a
->d
);
3118 static bool trans_SINCDECP_z(DisasContext
*s
, arg_incdec2_pred
*a
)
3123 if (sve_access_check(s
)) {
3124 TCGv_i64 val
= tcg_temp_new_i64();
3125 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3126 do_sat_addsub_vec(s
, a
->esz
, a
->rd
, a
->rn
, val
, a
->u
, a
->d
);
3132 *** SVE Integer Compare Scalars Group
3135 static bool trans_CTERM(DisasContext
*s
, arg_CTERM
*a
)
3137 if (!sve_access_check(s
)) {
3141 TCGCond cond
= (a
->ne
? TCG_COND_NE
: TCG_COND_EQ
);
3142 TCGv_i64 rn
= read_cpu_reg(s
, a
->rn
, a
->sf
);
3143 TCGv_i64 rm
= read_cpu_reg(s
, a
->rm
, a
->sf
);
3144 TCGv_i64 cmp
= tcg_temp_new_i64();
3146 tcg_gen_setcond_i64(cond
, cmp
, rn
, rm
);
3147 tcg_gen_extrl_i64_i32(cpu_NF
, cmp
);
3148 tcg_temp_free_i64(cmp
);
3150 /* VF = !NF & !CF. */
3151 tcg_gen_xori_i32(cpu_VF
, cpu_NF
, 1);
3152 tcg_gen_andc_i32(cpu_VF
, cpu_VF
, cpu_CF
);
3154 /* Both NF and VF actually look at bit 31. */
3155 tcg_gen_neg_i32(cpu_NF
, cpu_NF
);
3156 tcg_gen_neg_i32(cpu_VF
, cpu_VF
);
3160 static bool trans_WHILE(DisasContext
*s
, arg_WHILE
*a
)
3162 TCGv_i64 op0
, op1
, t0
, t1
, tmax
;
3165 unsigned desc
, vsz
= vec_full_reg_size(s
);
3168 if (!sve_access_check(s
)) {
3172 op0
= read_cpu_reg(s
, a
->rn
, 1);
3173 op1
= read_cpu_reg(s
, a
->rm
, 1);
3177 tcg_gen_ext32u_i64(op0
, op0
);
3178 tcg_gen_ext32u_i64(op1
, op1
);
3180 tcg_gen_ext32s_i64(op0
, op0
);
3181 tcg_gen_ext32s_i64(op1
, op1
);
3185 /* For the helper, compress the different conditions into a computation
3186 * of how many iterations for which the condition is true.
3188 t0
= tcg_temp_new_i64();
3189 t1
= tcg_temp_new_i64();
3190 tcg_gen_sub_i64(t0
, op1
, op0
);
3192 tmax
= tcg_const_i64(vsz
>> a
->esz
);
3194 /* Equality means one more iteration. */
3195 tcg_gen_addi_i64(t0
, t0
, 1);
3197 /* If op1 is max (un)signed integer (and the only time the addition
3198 * above could overflow), then we produce an all-true predicate by
3199 * setting the count to the vector length. This is because the
3200 * pseudocode is described as an increment + compare loop, and the
3201 * max integer would always compare true.
3203 tcg_gen_movi_i64(t1
, (a
->sf
3204 ? (a
->u
? UINT64_MAX
: INT64_MAX
)
3205 : (a
->u
? UINT32_MAX
: INT32_MAX
)));
3206 tcg_gen_movcond_i64(TCG_COND_EQ
, t0
, op1
, t1
, tmax
, t0
);
3209 /* Bound to the maximum. */
3210 tcg_gen_umin_i64(t0
, t0
, tmax
);
3211 tcg_temp_free_i64(tmax
);
3213 /* Set the count to zero if the condition is false. */
3215 ? (a
->eq
? TCG_COND_LEU
: TCG_COND_LTU
)
3216 : (a
->eq
? TCG_COND_LE
: TCG_COND_LT
));
3217 tcg_gen_movi_i64(t1
, 0);
3218 tcg_gen_movcond_i64(cond
, t0
, op0
, op1
, t0
, t1
);
3219 tcg_temp_free_i64(t1
);
3221 /* Since we're bounded, pass as a 32-bit type. */
3222 t2
= tcg_temp_new_i32();
3223 tcg_gen_extrl_i64_i32(t2
, t0
);
3224 tcg_temp_free_i64(t0
);
3226 /* Scale elements to bits. */
3227 tcg_gen_shli_i32(t2
, t2
, a
->esz
);
3229 desc
= (vsz
/ 8) - 2;
3230 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
3231 t3
= tcg_const_i32(desc
);
3233 ptr
= tcg_temp_new_ptr();
3234 tcg_gen_addi_ptr(ptr
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
3236 gen_helper_sve_while(t2
, ptr
, t2
, t3
);
3239 tcg_temp_free_ptr(ptr
);
3240 tcg_temp_free_i32(t2
);
3241 tcg_temp_free_i32(t3
);
3246 *** SVE Integer Wide Immediate - Unpredicated Group
3249 static bool trans_FDUP(DisasContext
*s
, arg_FDUP
*a
)
3254 if (sve_access_check(s
)) {
3255 unsigned vsz
= vec_full_reg_size(s
);
3256 int dofs
= vec_full_reg_offset(s
, a
->rd
);
3259 /* Decode the VFP immediate. */
3260 imm
= vfp_expand_imm(a
->esz
, a
->imm
);
3261 imm
= dup_const(a
->esz
, imm
);
3263 tcg_gen_gvec_dup64i(dofs
, vsz
, vsz
, imm
);
3268 static bool trans_DUP_i(DisasContext
*s
, arg_DUP_i
*a
)
3270 if (a
->esz
== 0 && extract32(s
->insn
, 13, 1)) {
3273 if (sve_access_check(s
)) {
3274 unsigned vsz
= vec_full_reg_size(s
);
3275 int dofs
= vec_full_reg_offset(s
, a
->rd
);
3277 tcg_gen_gvec_dup64i(dofs
, vsz
, vsz
, dup_const(a
->esz
, a
->imm
));
3282 static bool trans_ADD_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3284 if (a
->esz
== 0 && extract32(s
->insn
, 13, 1)) {
3287 if (sve_access_check(s
)) {
3288 unsigned vsz
= vec_full_reg_size(s
);
3289 tcg_gen_gvec_addi(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
3290 vec_full_reg_offset(s
, a
->rn
), a
->imm
, vsz
, vsz
);
3295 static bool trans_SUB_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3298 return trans_ADD_zzi(s
, a
);
3301 static bool trans_SUBR_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3303 static const GVecGen2s op
[4] = {
3304 { .fni8
= tcg_gen_vec_sub8_i64
,
3305 .fniv
= tcg_gen_sub_vec
,
3306 .fno
= gen_helper_sve_subri_b
,
3307 .opc
= INDEX_op_sub_vec
,
3309 .scalar_first
= true },
3310 { .fni8
= tcg_gen_vec_sub16_i64
,
3311 .fniv
= tcg_gen_sub_vec
,
3312 .fno
= gen_helper_sve_subri_h
,
3313 .opc
= INDEX_op_sub_vec
,
3315 .scalar_first
= true },
3316 { .fni4
= tcg_gen_sub_i32
,
3317 .fniv
= tcg_gen_sub_vec
,
3318 .fno
= gen_helper_sve_subri_s
,
3319 .opc
= INDEX_op_sub_vec
,
3321 .scalar_first
= true },
3322 { .fni8
= tcg_gen_sub_i64
,
3323 .fniv
= tcg_gen_sub_vec
,
3324 .fno
= gen_helper_sve_subri_d
,
3325 .opc
= INDEX_op_sub_vec
,
3326 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
3328 .scalar_first
= true }
3331 if (a
->esz
== 0 && extract32(s
->insn
, 13, 1)) {
3334 if (sve_access_check(s
)) {
3335 unsigned vsz
= vec_full_reg_size(s
);
3336 TCGv_i64 c
= tcg_const_i64(a
->imm
);
3337 tcg_gen_gvec_2s(vec_full_reg_offset(s
, a
->rd
),
3338 vec_full_reg_offset(s
, a
->rn
),
3339 vsz
, vsz
, c
, &op
[a
->esz
]);
3340 tcg_temp_free_i64(c
);
3345 static bool trans_MUL_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3347 if (sve_access_check(s
)) {
3348 unsigned vsz
= vec_full_reg_size(s
);
3349 tcg_gen_gvec_muli(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
3350 vec_full_reg_offset(s
, a
->rn
), a
->imm
, vsz
, vsz
);
3355 static bool do_zzi_sat(DisasContext
*s
, arg_rri_esz
*a
, bool u
, bool d
)
3357 if (a
->esz
== 0 && extract32(s
->insn
, 13, 1)) {
3360 if (sve_access_check(s
)) {
3361 TCGv_i64 val
= tcg_const_i64(a
->imm
);
3362 do_sat_addsub_vec(s
, a
->esz
, a
->rd
, a
->rn
, val
, u
, d
);
3363 tcg_temp_free_i64(val
);
3368 static bool trans_SQADD_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3370 return do_zzi_sat(s
, a
, false, false);
3373 static bool trans_UQADD_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3375 return do_zzi_sat(s
, a
, true, false);
3378 static bool trans_SQSUB_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3380 return do_zzi_sat(s
, a
, false, true);
3383 static bool trans_UQSUB_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3385 return do_zzi_sat(s
, a
, true, true);
3388 static bool do_zzi_ool(DisasContext
*s
, arg_rri_esz
*a
, gen_helper_gvec_2i
*fn
)
3390 if (sve_access_check(s
)) {
3391 unsigned vsz
= vec_full_reg_size(s
);
3392 TCGv_i64 c
= tcg_const_i64(a
->imm
);
3394 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s
, a
->rd
),
3395 vec_full_reg_offset(s
, a
->rn
),
3396 c
, vsz
, vsz
, 0, fn
);
3397 tcg_temp_free_i64(c
);
3402 #define DO_ZZI(NAME, name) \
3403 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a) \
3405 static gen_helper_gvec_2i * const fns[4] = { \
3406 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3407 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3409 return do_zzi_ool(s, a, fns[a->esz]); \
3419 static bool trans_DOT_zzz(DisasContext
*s
, arg_DOT_zzz
*a
)
3421 static gen_helper_gvec_3
* const fns
[2][2] = {
3422 { gen_helper_gvec_sdot_b
, gen_helper_gvec_sdot_h
},
3423 { gen_helper_gvec_udot_b
, gen_helper_gvec_udot_h
}
3426 if (sve_access_check(s
)) {
3427 unsigned vsz
= vec_full_reg_size(s
);
3428 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
3429 vec_full_reg_offset(s
, a
->rn
),
3430 vec_full_reg_offset(s
, a
->rm
),
3431 vsz
, vsz
, 0, fns
[a
->u
][a
->sz
]);
3436 static bool trans_DOT_zzx(DisasContext
*s
, arg_DOT_zzx
*a
)
3438 static gen_helper_gvec_3
* const fns
[2][2] = {
3439 { gen_helper_gvec_sdot_idx_b
, gen_helper_gvec_sdot_idx_h
},
3440 { gen_helper_gvec_udot_idx_b
, gen_helper_gvec_udot_idx_h
}
3443 if (sve_access_check(s
)) {
3444 unsigned vsz
= vec_full_reg_size(s
);
3445 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
3446 vec_full_reg_offset(s
, a
->rn
),
3447 vec_full_reg_offset(s
, a
->rm
),
3448 vsz
, vsz
, a
->index
, fns
[a
->u
][a
->sz
]);
3455 *** SVE Floating Point Multiply-Add Indexed Group
3458 static bool trans_FMLA_zzxz(DisasContext
*s
, arg_FMLA_zzxz
*a
)
3460 static gen_helper_gvec_4_ptr
* const fns
[3] = {
3461 gen_helper_gvec_fmla_idx_h
,
3462 gen_helper_gvec_fmla_idx_s
,
3463 gen_helper_gvec_fmla_idx_d
,
3466 if (sve_access_check(s
)) {
3467 unsigned vsz
= vec_full_reg_size(s
);
3468 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3469 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s
, a
->rd
),
3470 vec_full_reg_offset(s
, a
->rn
),
3471 vec_full_reg_offset(s
, a
->rm
),
3472 vec_full_reg_offset(s
, a
->ra
),
3473 status
, vsz
, vsz
, (a
->index
<< 1) | a
->sub
,
3475 tcg_temp_free_ptr(status
);
3481 *** SVE Floating Point Multiply Indexed Group
3484 static bool trans_FMUL_zzx(DisasContext
*s
, arg_FMUL_zzx
*a
)
3486 static gen_helper_gvec_3_ptr
* const fns
[3] = {
3487 gen_helper_gvec_fmul_idx_h
,
3488 gen_helper_gvec_fmul_idx_s
,
3489 gen_helper_gvec_fmul_idx_d
,
3492 if (sve_access_check(s
)) {
3493 unsigned vsz
= vec_full_reg_size(s
);
3494 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3495 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, a
->rd
),
3496 vec_full_reg_offset(s
, a
->rn
),
3497 vec_full_reg_offset(s
, a
->rm
),
3498 status
, vsz
, vsz
, a
->index
, fns
[a
->esz
- 1]);
3499 tcg_temp_free_ptr(status
);
3505 *** SVE Floating Point Fast Reduction Group
3508 typedef void gen_helper_fp_reduce(TCGv_i64
, TCGv_ptr
, TCGv_ptr
,
3509 TCGv_ptr
, TCGv_i32
);
3511 static void do_reduce(DisasContext
*s
, arg_rpr_esz
*a
,
3512 gen_helper_fp_reduce
*fn
)
3514 unsigned vsz
= vec_full_reg_size(s
);
3515 unsigned p2vsz
= pow2ceil(vsz
);
3516 TCGv_i32 t_desc
= tcg_const_i32(simd_desc(vsz
, p2vsz
, 0));
3517 TCGv_ptr t_zn
, t_pg
, status
;
3520 temp
= tcg_temp_new_i64();
3521 t_zn
= tcg_temp_new_ptr();
3522 t_pg
= tcg_temp_new_ptr();
3524 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
3525 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
3526 status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3528 fn(temp
, t_zn
, t_pg
, status
, t_desc
);
3529 tcg_temp_free_ptr(t_zn
);
3530 tcg_temp_free_ptr(t_pg
);
3531 tcg_temp_free_ptr(status
);
3532 tcg_temp_free_i32(t_desc
);
3534 write_fp_dreg(s
, a
->rd
, temp
);
3535 tcg_temp_free_i64(temp
);
3538 #define DO_VPZ(NAME, name) \
3539 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
3541 static gen_helper_fp_reduce * const fns[3] = { \
3542 gen_helper_sve_##name##_h, \
3543 gen_helper_sve_##name##_s, \
3544 gen_helper_sve_##name##_d, \
3546 if (a->esz == 0) { \
3549 if (sve_access_check(s)) { \
3550 do_reduce(s, a, fns[a->esz - 1]); \
3555 DO_VPZ(FADDV
, faddv
)
3556 DO_VPZ(FMINNMV
, fminnmv
)
3557 DO_VPZ(FMAXNMV
, fmaxnmv
)
3558 DO_VPZ(FMINV
, fminv
)
3559 DO_VPZ(FMAXV
, fmaxv
)
3562 *** SVE Floating Point Unary Operations - Unpredicated Group
3565 static void do_zz_fp(DisasContext
*s
, arg_rr_esz
*a
, gen_helper_gvec_2_ptr
*fn
)
3567 unsigned vsz
= vec_full_reg_size(s
);
3568 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3570 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s
, a
->rd
),
3571 vec_full_reg_offset(s
, a
->rn
),
3572 status
, vsz
, vsz
, 0, fn
);
3573 tcg_temp_free_ptr(status
);
3576 static bool trans_FRECPE(DisasContext
*s
, arg_rr_esz
*a
)
3578 static gen_helper_gvec_2_ptr
* const fns
[3] = {
3579 gen_helper_gvec_frecpe_h
,
3580 gen_helper_gvec_frecpe_s
,
3581 gen_helper_gvec_frecpe_d
,
3586 if (sve_access_check(s
)) {
3587 do_zz_fp(s
, a
, fns
[a
->esz
- 1]);
3592 static bool trans_FRSQRTE(DisasContext
*s
, arg_rr_esz
*a
)
3594 static gen_helper_gvec_2_ptr
* const fns
[3] = {
3595 gen_helper_gvec_frsqrte_h
,
3596 gen_helper_gvec_frsqrte_s
,
3597 gen_helper_gvec_frsqrte_d
,
3602 if (sve_access_check(s
)) {
3603 do_zz_fp(s
, a
, fns
[a
->esz
- 1]);
3609 *** SVE Floating Point Compare with Zero Group
3612 static void do_ppz_fp(DisasContext
*s
, arg_rpr_esz
*a
,
3613 gen_helper_gvec_3_ptr
*fn
)
3615 unsigned vsz
= vec_full_reg_size(s
);
3616 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3618 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s
, a
->rd
),
3619 vec_full_reg_offset(s
, a
->rn
),
3620 pred_full_reg_offset(s
, a
->pg
),
3621 status
, vsz
, vsz
, 0, fn
);
3622 tcg_temp_free_ptr(status
);
3625 #define DO_PPZ(NAME, name) \
3626 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
3628 static gen_helper_gvec_3_ptr * const fns[3] = { \
3629 gen_helper_sve_##name##_h, \
3630 gen_helper_sve_##name##_s, \
3631 gen_helper_sve_##name##_d, \
3633 if (a->esz == 0) { \
3636 if (sve_access_check(s)) { \
3637 do_ppz_fp(s, a, fns[a->esz - 1]); \
3642 DO_PPZ(FCMGE_ppz0
, fcmge0
)
3643 DO_PPZ(FCMGT_ppz0
, fcmgt0
)
3644 DO_PPZ(FCMLE_ppz0
, fcmle0
)
3645 DO_PPZ(FCMLT_ppz0
, fcmlt0
)
3646 DO_PPZ(FCMEQ_ppz0
, fcmeq0
)
3647 DO_PPZ(FCMNE_ppz0
, fcmne0
)
3652 *** SVE floating-point trig multiply-add coefficient
3655 static bool trans_FTMAD(DisasContext
*s
, arg_FTMAD
*a
)
3657 static gen_helper_gvec_3_ptr
* const fns
[3] = {
3658 gen_helper_sve_ftmad_h
,
3659 gen_helper_sve_ftmad_s
,
3660 gen_helper_sve_ftmad_d
,
3666 if (sve_access_check(s
)) {
3667 unsigned vsz
= vec_full_reg_size(s
);
3668 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3669 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, a
->rd
),
3670 vec_full_reg_offset(s
, a
->rn
),
3671 vec_full_reg_offset(s
, a
->rm
),
3672 status
, vsz
, vsz
, a
->imm
, fns
[a
->esz
- 1]);
3673 tcg_temp_free_ptr(status
);
3679 *** SVE Floating Point Accumulating Reduction Group
3682 static bool trans_FADDA(DisasContext
*s
, arg_rprr_esz
*a
)
3684 typedef void fadda_fn(TCGv_i64
, TCGv_i64
, TCGv_ptr
,
3685 TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
3686 static fadda_fn
* const fns
[3] = {
3687 gen_helper_sve_fadda_h
,
3688 gen_helper_sve_fadda_s
,
3689 gen_helper_sve_fadda_d
,
3691 unsigned vsz
= vec_full_reg_size(s
);
3692 TCGv_ptr t_rm
, t_pg
, t_fpst
;
3699 if (!sve_access_check(s
)) {
3703 t_val
= load_esz(cpu_env
, vec_reg_offset(s
, a
->rn
, 0, a
->esz
), a
->esz
);
3704 t_rm
= tcg_temp_new_ptr();
3705 t_pg
= tcg_temp_new_ptr();
3706 tcg_gen_addi_ptr(t_rm
, cpu_env
, vec_full_reg_offset(s
, a
->rm
));
3707 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
3708 t_fpst
= get_fpstatus_ptr(a
->esz
== MO_16
);
3709 t_desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
3711 fns
[a
->esz
- 1](t_val
, t_val
, t_rm
, t_pg
, t_fpst
, t_desc
);
3713 tcg_temp_free_i32(t_desc
);
3714 tcg_temp_free_ptr(t_fpst
);
3715 tcg_temp_free_ptr(t_pg
);
3716 tcg_temp_free_ptr(t_rm
);
3718 write_fp_dreg(s
, a
->rd
, t_val
);
3719 tcg_temp_free_i64(t_val
);
3724 *** SVE Floating Point Arithmetic - Unpredicated Group
3727 static bool do_zzz_fp(DisasContext
*s
, arg_rrr_esz
*a
,
3728 gen_helper_gvec_3_ptr
*fn
)
3733 if (sve_access_check(s
)) {
3734 unsigned vsz
= vec_full_reg_size(s
);
3735 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3736 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, a
->rd
),
3737 vec_full_reg_offset(s
, a
->rn
),
3738 vec_full_reg_offset(s
, a
->rm
),
3739 status
, vsz
, vsz
, 0, fn
);
3740 tcg_temp_free_ptr(status
);
3746 #define DO_FP3(NAME, name) \
3747 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
3749 static gen_helper_gvec_3_ptr * const fns[4] = { \
3750 NULL, gen_helper_gvec_##name##_h, \
3751 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3753 return do_zzz_fp(s, a, fns[a->esz]); \
3756 DO_FP3(FADD_zzz
, fadd
)
3757 DO_FP3(FSUB_zzz
, fsub
)
3758 DO_FP3(FMUL_zzz
, fmul
)
3759 DO_FP3(FTSMUL
, ftsmul
)
3760 DO_FP3(FRECPS
, recps
)
3761 DO_FP3(FRSQRTS
, rsqrts
)
3766 *** SVE Floating Point Arithmetic - Predicated Group
3769 static bool do_zpzz_fp(DisasContext
*s
, arg_rprr_esz
*a
,
3770 gen_helper_gvec_4_ptr
*fn
)
3775 if (sve_access_check(s
)) {
3776 unsigned vsz
= vec_full_reg_size(s
);
3777 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3778 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s
, a
->rd
),
3779 vec_full_reg_offset(s
, a
->rn
),
3780 vec_full_reg_offset(s
, a
->rm
),
3781 pred_full_reg_offset(s
, a
->pg
),
3782 status
, vsz
, vsz
, 0, fn
);
3783 tcg_temp_free_ptr(status
);
3788 #define DO_FP3(NAME, name) \
3789 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
3791 static gen_helper_gvec_4_ptr * const fns[4] = { \
3792 NULL, gen_helper_sve_##name##_h, \
3793 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3795 return do_zpzz_fp(s, a, fns[a->esz]); \
3798 DO_FP3(FADD_zpzz
, fadd
)
3799 DO_FP3(FSUB_zpzz
, fsub
)
3800 DO_FP3(FMUL_zpzz
, fmul
)
3801 DO_FP3(FMIN_zpzz
, fmin
)
3802 DO_FP3(FMAX_zpzz
, fmax
)
3803 DO_FP3(FMINNM_zpzz
, fminnum
)
3804 DO_FP3(FMAXNM_zpzz
, fmaxnum
)
3806 DO_FP3(FSCALE
, fscalbn
)
3808 DO_FP3(FMULX
, fmulx
)
3812 typedef void gen_helper_sve_fp2scalar(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
,
3813 TCGv_i64
, TCGv_ptr
, TCGv_i32
);
3815 static void do_fp_scalar(DisasContext
*s
, int zd
, int zn
, int pg
, bool is_fp16
,
3816 TCGv_i64 scalar
, gen_helper_sve_fp2scalar
*fn
)
3818 unsigned vsz
= vec_full_reg_size(s
);
3819 TCGv_ptr t_zd
, t_zn
, t_pg
, status
;
3822 t_zd
= tcg_temp_new_ptr();
3823 t_zn
= tcg_temp_new_ptr();
3824 t_pg
= tcg_temp_new_ptr();
3825 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, zd
));
3826 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, zn
));
3827 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
3829 status
= get_fpstatus_ptr(is_fp16
);
3830 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
3831 fn(t_zd
, t_zn
, t_pg
, scalar
, status
, desc
);
3833 tcg_temp_free_i32(desc
);
3834 tcg_temp_free_ptr(status
);
3835 tcg_temp_free_ptr(t_pg
);
3836 tcg_temp_free_ptr(t_zn
);
3837 tcg_temp_free_ptr(t_zd
);
3840 static void do_fp_imm(DisasContext
*s
, arg_rpri_esz
*a
, uint64_t imm
,
3841 gen_helper_sve_fp2scalar
*fn
)
3843 TCGv_i64 temp
= tcg_const_i64(imm
);
3844 do_fp_scalar(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
== MO_16
, temp
, fn
);
3845 tcg_temp_free_i64(temp
);
3848 #define DO_FP_IMM(NAME, name, const0, const1) \
3849 static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
3851 static gen_helper_sve_fp2scalar * const fns[3] = { \
3852 gen_helper_sve_##name##_h, \
3853 gen_helper_sve_##name##_s, \
3854 gen_helper_sve_##name##_d \
3856 static uint64_t const val[3][2] = { \
3857 { float16_##const0, float16_##const1 }, \
3858 { float32_##const0, float32_##const1 }, \
3859 { float64_##const0, float64_##const1 }, \
3861 if (a->esz == 0) { \
3864 if (sve_access_check(s)) { \
3865 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
3870 #define float16_two make_float16(0x4000)
3871 #define float32_two make_float32(0x40000000)
3872 #define float64_two make_float64(0x4000000000000000ULL)
3874 DO_FP_IMM(FADD
, fadds
, half
, one
)
3875 DO_FP_IMM(FSUB
, fsubs
, half
, one
)
3876 DO_FP_IMM(FMUL
, fmuls
, half
, two
)
3877 DO_FP_IMM(FSUBR
, fsubrs
, half
, one
)
3878 DO_FP_IMM(FMAXNM
, fmaxnms
, zero
, one
)
3879 DO_FP_IMM(FMINNM
, fminnms
, zero
, one
)
3880 DO_FP_IMM(FMAX
, fmaxs
, zero
, one
)
3881 DO_FP_IMM(FMIN
, fmins
, zero
, one
)
3885 static bool do_fp_cmp(DisasContext
*s
, arg_rprr_esz
*a
,
3886 gen_helper_gvec_4_ptr
*fn
)
3891 if (sve_access_check(s
)) {
3892 unsigned vsz
= vec_full_reg_size(s
);
3893 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3894 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s
, a
->rd
),
3895 vec_full_reg_offset(s
, a
->rn
),
3896 vec_full_reg_offset(s
, a
->rm
),
3897 pred_full_reg_offset(s
, a
->pg
),
3898 status
, vsz
, vsz
, 0, fn
);
3899 tcg_temp_free_ptr(status
);
3904 #define DO_FPCMP(NAME, name) \
3905 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
3907 static gen_helper_gvec_4_ptr * const fns[4] = { \
3908 NULL, gen_helper_sve_##name##_h, \
3909 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3911 return do_fp_cmp(s, a, fns[a->esz]); \
3914 DO_FPCMP(FCMGE
, fcmge
)
3915 DO_FPCMP(FCMGT
, fcmgt
)
3916 DO_FPCMP(FCMEQ
, fcmeq
)
3917 DO_FPCMP(FCMNE
, fcmne
)
3918 DO_FPCMP(FCMUO
, fcmuo
)
3919 DO_FPCMP(FACGE
, facge
)
3920 DO_FPCMP(FACGT
, facgt
)
3924 static bool trans_FCADD(DisasContext
*s
, arg_FCADD
*a
)
3926 static gen_helper_gvec_4_ptr
* const fns
[3] = {
3927 gen_helper_sve_fcadd_h
,
3928 gen_helper_sve_fcadd_s
,
3929 gen_helper_sve_fcadd_d
3935 if (sve_access_check(s
)) {
3936 unsigned vsz
= vec_full_reg_size(s
);
3937 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3938 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s
, a
->rd
),
3939 vec_full_reg_offset(s
, a
->rn
),
3940 vec_full_reg_offset(s
, a
->rm
),
3941 pred_full_reg_offset(s
, a
->pg
),
3942 status
, vsz
, vsz
, a
->rot
, fns
[a
->esz
- 1]);
3943 tcg_temp_free_ptr(status
);
3948 typedef void gen_helper_sve_fmla(TCGv_env
, TCGv_ptr
, TCGv_i32
);
3950 static bool do_fmla(DisasContext
*s
, arg_rprrr_esz
*a
, gen_helper_sve_fmla
*fn
)
3955 if (!sve_access_check(s
)) {
3959 unsigned vsz
= vec_full_reg_size(s
);
3962 TCGv_ptr pg
= tcg_temp_new_ptr();
3964 /* We would need 7 operands to pass these arguments "properly".
3965 * So we encode all the register numbers into the descriptor.
3967 desc
= deposit32(a
->rd
, 5, 5, a
->rn
);
3968 desc
= deposit32(desc
, 10, 5, a
->rm
);
3969 desc
= deposit32(desc
, 15, 5, a
->ra
);
3970 desc
= simd_desc(vsz
, vsz
, desc
);
3972 t_desc
= tcg_const_i32(desc
);
3973 tcg_gen_addi_ptr(pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
3974 fn(cpu_env
, pg
, t_desc
);
3975 tcg_temp_free_i32(t_desc
);
3976 tcg_temp_free_ptr(pg
);
3980 #define DO_FMLA(NAME, name) \
3981 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
3983 static gen_helper_sve_fmla * const fns[4] = { \
3984 NULL, gen_helper_sve_##name##_h, \
3985 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3987 return do_fmla(s, a, fns[a->esz]); \
3990 DO_FMLA(FMLA_zpzzz
, fmla_zpzzz
)
3991 DO_FMLA(FMLS_zpzzz
, fmls_zpzzz
)
3992 DO_FMLA(FNMLA_zpzzz
, fnmla_zpzzz
)
3993 DO_FMLA(FNMLS_zpzzz
, fnmls_zpzzz
)
3997 static bool trans_FCMLA_zpzzz(DisasContext
*s
, arg_FCMLA_zpzzz
*a
)
3999 static gen_helper_sve_fmla
* const fns
[3] = {
4000 gen_helper_sve_fcmla_zpzzz_h
,
4001 gen_helper_sve_fcmla_zpzzz_s
,
4002 gen_helper_sve_fcmla_zpzzz_d
,
4008 if (sve_access_check(s
)) {
4009 unsigned vsz
= vec_full_reg_size(s
);
4012 TCGv_ptr pg
= tcg_temp_new_ptr();
4014 /* We would need 7 operands to pass these arguments "properly".
4015 * So we encode all the register numbers into the descriptor.
4017 desc
= deposit32(a
->rd
, 5, 5, a
->rn
);
4018 desc
= deposit32(desc
, 10, 5, a
->rm
);
4019 desc
= deposit32(desc
, 15, 5, a
->ra
);
4020 desc
= deposit32(desc
, 20, 2, a
->rot
);
4021 desc
= sextract32(desc
, 0, 22);
4022 desc
= simd_desc(vsz
, vsz
, desc
);
4024 t_desc
= tcg_const_i32(desc
);
4025 tcg_gen_addi_ptr(pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
4026 fns
[a
->esz
- 1](cpu_env
, pg
, t_desc
);
4027 tcg_temp_free_i32(t_desc
);
4028 tcg_temp_free_ptr(pg
);
4033 static bool trans_FCMLA_zzxz(DisasContext
*s
, arg_FCMLA_zzxz
*a
)
4035 static gen_helper_gvec_3_ptr
* const fns
[2] = {
4036 gen_helper_gvec_fcmlah_idx
,
4037 gen_helper_gvec_fcmlas_idx
,
4040 tcg_debug_assert(a
->esz
== 1 || a
->esz
== 2);
4041 tcg_debug_assert(a
->rd
== a
->ra
);
4042 if (sve_access_check(s
)) {
4043 unsigned vsz
= vec_full_reg_size(s
);
4044 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
4045 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, a
->rd
),
4046 vec_full_reg_offset(s
, a
->rn
),
4047 vec_full_reg_offset(s
, a
->rm
),
4049 a
->index
* 4 + a
->rot
,
4051 tcg_temp_free_ptr(status
);
4057 *** SVE Floating Point Unary Operations Predicated Group
4060 static bool do_zpz_ptr(DisasContext
*s
, int rd
, int rn
, int pg
,
4061 bool is_fp16
, gen_helper_gvec_3_ptr
*fn
)
4063 if (sve_access_check(s
)) {
4064 unsigned vsz
= vec_full_reg_size(s
);
4065 TCGv_ptr status
= get_fpstatus_ptr(is_fp16
);
4066 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, rd
),
4067 vec_full_reg_offset(s
, rn
),
4068 pred_full_reg_offset(s
, pg
),
4069 status
, vsz
, vsz
, 0, fn
);
4070 tcg_temp_free_ptr(status
);
4075 static bool trans_FCVT_sh(DisasContext
*s
, arg_rpr_esz
*a
)
4077 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_sh
);
4080 static bool trans_FCVT_hs(DisasContext
*s
, arg_rpr_esz
*a
)
4082 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_hs
);
4085 static bool trans_FCVT_dh(DisasContext
*s
, arg_rpr_esz
*a
)
4087 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_dh
);
4090 static bool trans_FCVT_hd(DisasContext
*s
, arg_rpr_esz
*a
)
4092 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_hd
);
4095 static bool trans_FCVT_ds(DisasContext
*s
, arg_rpr_esz
*a
)
4097 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_ds
);
4100 static bool trans_FCVT_sd(DisasContext
*s
, arg_rpr_esz
*a
)
4102 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvt_sd
);
4105 static bool trans_FCVTZS_hh(DisasContext
*s
, arg_rpr_esz
*a
)
4107 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzs_hh
);
4110 static bool trans_FCVTZU_hh(DisasContext
*s
, arg_rpr_esz
*a
)
4112 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzu_hh
);
4115 static bool trans_FCVTZS_hs(DisasContext
*s
, arg_rpr_esz
*a
)
4117 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzs_hs
);
4120 static bool trans_FCVTZU_hs(DisasContext
*s
, arg_rpr_esz
*a
)
4122 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzu_hs
);
4125 static bool trans_FCVTZS_hd(DisasContext
*s
, arg_rpr_esz
*a
)
4127 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzs_hd
);
4130 static bool trans_FCVTZU_hd(DisasContext
*s
, arg_rpr_esz
*a
)
4132 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_fcvtzu_hd
);
4135 static bool trans_FCVTZS_ss(DisasContext
*s
, arg_rpr_esz
*a
)
4137 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzs_ss
);
4140 static bool trans_FCVTZU_ss(DisasContext
*s
, arg_rpr_esz
*a
)
4142 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzu_ss
);
4145 static bool trans_FCVTZS_sd(DisasContext
*s
, arg_rpr_esz
*a
)
4147 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzs_sd
);
4150 static bool trans_FCVTZU_sd(DisasContext
*s
, arg_rpr_esz
*a
)
4152 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzu_sd
);
4155 static bool trans_FCVTZS_ds(DisasContext
*s
, arg_rpr_esz
*a
)
4157 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzs_ds
);
4160 static bool trans_FCVTZU_ds(DisasContext
*s
, arg_rpr_esz
*a
)
4162 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzu_ds
);
4165 static bool trans_FCVTZS_dd(DisasContext
*s
, arg_rpr_esz
*a
)
4167 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzs_dd
);
4170 static bool trans_FCVTZU_dd(DisasContext
*s
, arg_rpr_esz
*a
)
4172 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_fcvtzu_dd
);
4175 static gen_helper_gvec_3_ptr
* const frint_fns
[3] = {
4176 gen_helper_sve_frint_h
,
4177 gen_helper_sve_frint_s
,
4178 gen_helper_sve_frint_d
4181 static bool trans_FRINTI(DisasContext
*s
, arg_rpr_esz
*a
)
4186 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
== MO_16
,
4187 frint_fns
[a
->esz
- 1]);
4190 static bool trans_FRINTX(DisasContext
*s
, arg_rpr_esz
*a
)
4192 static gen_helper_gvec_3_ptr
* const fns
[3] = {
4193 gen_helper_sve_frintx_h
,
4194 gen_helper_sve_frintx_s
,
4195 gen_helper_sve_frintx_d
4200 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
== MO_16
, fns
[a
->esz
- 1]);
4203 static bool do_frint_mode(DisasContext
*s
, arg_rpr_esz
*a
, int mode
)
4208 if (sve_access_check(s
)) {
4209 unsigned vsz
= vec_full_reg_size(s
);
4210 TCGv_i32 tmode
= tcg_const_i32(mode
);
4211 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
4213 gen_helper_set_rmode(tmode
, tmode
, status
);
4215 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, a
->rd
),
4216 vec_full_reg_offset(s
, a
->rn
),
4217 pred_full_reg_offset(s
, a
->pg
),
4218 status
, vsz
, vsz
, 0, frint_fns
[a
->esz
- 1]);
4220 gen_helper_set_rmode(tmode
, tmode
, status
);
4221 tcg_temp_free_i32(tmode
);
4222 tcg_temp_free_ptr(status
);
4227 static bool trans_FRINTN(DisasContext
*s
, arg_rpr_esz
*a
)
4229 return do_frint_mode(s
, a
, float_round_nearest_even
);
4232 static bool trans_FRINTP(DisasContext
*s
, arg_rpr_esz
*a
)
4234 return do_frint_mode(s
, a
, float_round_up
);
4237 static bool trans_FRINTM(DisasContext
*s
, arg_rpr_esz
*a
)
4239 return do_frint_mode(s
, a
, float_round_down
);
4242 static bool trans_FRINTZ(DisasContext
*s
, arg_rpr_esz
*a
)
4244 return do_frint_mode(s
, a
, float_round_to_zero
);
4247 static bool trans_FRINTA(DisasContext
*s
, arg_rpr_esz
*a
)
4249 return do_frint_mode(s
, a
, float_round_ties_away
);
4252 static bool trans_FRECPX(DisasContext
*s
, arg_rpr_esz
*a
)
4254 static gen_helper_gvec_3_ptr
* const fns
[3] = {
4255 gen_helper_sve_frecpx_h
,
4256 gen_helper_sve_frecpx_s
,
4257 gen_helper_sve_frecpx_d
4262 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
== MO_16
, fns
[a
->esz
- 1]);
4265 static bool trans_FSQRT(DisasContext
*s
, arg_rpr_esz
*a
)
4267 static gen_helper_gvec_3_ptr
* const fns
[3] = {
4268 gen_helper_sve_fsqrt_h
,
4269 gen_helper_sve_fsqrt_s
,
4270 gen_helper_sve_fsqrt_d
4275 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
== MO_16
, fns
[a
->esz
- 1]);
4278 static bool trans_SCVTF_hh(DisasContext
*s
, arg_rpr_esz
*a
)
4280 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_scvt_hh
);
4283 static bool trans_SCVTF_sh(DisasContext
*s
, arg_rpr_esz
*a
)
4285 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_scvt_sh
);
4288 static bool trans_SCVTF_dh(DisasContext
*s
, arg_rpr_esz
*a
)
4290 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_scvt_dh
);
4293 static bool trans_SCVTF_ss(DisasContext
*s
, arg_rpr_esz
*a
)
4295 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_scvt_ss
);
4298 static bool trans_SCVTF_ds(DisasContext
*s
, arg_rpr_esz
*a
)
4300 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_scvt_ds
);
4303 static bool trans_SCVTF_sd(DisasContext
*s
, arg_rpr_esz
*a
)
4305 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_scvt_sd
);
4308 static bool trans_SCVTF_dd(DisasContext
*s
, arg_rpr_esz
*a
)
4310 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_scvt_dd
);
4313 static bool trans_UCVTF_hh(DisasContext
*s
, arg_rpr_esz
*a
)
4315 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_ucvt_hh
);
4318 static bool trans_UCVTF_sh(DisasContext
*s
, arg_rpr_esz
*a
)
4320 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_ucvt_sh
);
4323 static bool trans_UCVTF_dh(DisasContext
*s
, arg_rpr_esz
*a
)
4325 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_ucvt_dh
);
4328 static bool trans_UCVTF_ss(DisasContext
*s
, arg_rpr_esz
*a
)
4330 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_ucvt_ss
);
4333 static bool trans_UCVTF_ds(DisasContext
*s
, arg_rpr_esz
*a
)
4335 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_ucvt_ds
);
4338 static bool trans_UCVTF_sd(DisasContext
*s
, arg_rpr_esz
*a
)
4340 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_ucvt_sd
);
4343 static bool trans_UCVTF_dd(DisasContext
*s
, arg_rpr_esz
*a
)
4345 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_ucvt_dd
);
4349 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4352 /* Subroutine loading a vector register at VOFS of LEN bytes.
4353 * The load should begin at the address Rn + IMM.
4356 static void do_ldr(DisasContext
*s
, uint32_t vofs
, int len
, int rn
, int imm
)
4358 int len_align
= QEMU_ALIGN_DOWN(len
, 8);
4359 int len_remain
= len
% 8;
4360 int nparts
= len
/ 8 + ctpop8(len_remain
);
4361 int midx
= get_mem_index(s
);
4362 TCGv_i64 addr
, t0
, t1
;
4364 addr
= tcg_temp_new_i64();
4365 t0
= tcg_temp_new_i64();
4367 /* Note that unpredicated load/store of vector/predicate registers
4368 * are defined as a stream of bytes, which equates to little-endian
4369 * operations on larger quantities. There is no nice way to force
4370 * a little-endian load for aarch64_be-linux-user out of line.
4372 * Attempt to keep code expansion to a minimum by limiting the
4373 * amount of unrolling done.
4378 for (i
= 0; i
< len_align
; i
+= 8) {
4379 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ i
);
4380 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LEQ
);
4381 tcg_gen_st_i64(t0
, cpu_env
, vofs
+ i
);
4384 TCGLabel
*loop
= gen_new_label();
4385 TCGv_ptr tp
, i
= tcg_const_local_ptr(0);
4387 gen_set_label(loop
);
4389 /* Minimize the number of local temps that must be re-read from
4390 * the stack each iteration. Instead, re-compute values other
4391 * than the loop counter.
4393 tp
= tcg_temp_new_ptr();
4394 tcg_gen_addi_ptr(tp
, i
, imm
);
4395 tcg_gen_extu_ptr_i64(addr
, tp
);
4396 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, rn
));
4398 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LEQ
);
4400 tcg_gen_add_ptr(tp
, cpu_env
, i
);
4401 tcg_gen_addi_ptr(i
, i
, 8);
4402 tcg_gen_st_i64(t0
, tp
, vofs
);
4403 tcg_temp_free_ptr(tp
);
4405 tcg_gen_brcondi_ptr(TCG_COND_LTU
, i
, len_align
, loop
);
4406 tcg_temp_free_ptr(i
);
4409 /* Predicate register loads can be any multiple of 2.
4410 * Note that we still store the entire 64-bit unit into cpu_env.
4413 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ len_align
);
4415 switch (len_remain
) {
4419 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LE
| ctz32(len_remain
));
4423 t1
= tcg_temp_new_i64();
4424 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LEUL
);
4425 tcg_gen_addi_i64(addr
, addr
, 4);
4426 tcg_gen_qemu_ld_i64(t1
, addr
, midx
, MO_LEUW
);
4427 tcg_gen_deposit_i64(t0
, t0
, t1
, 32, 32);
4428 tcg_temp_free_i64(t1
);
4432 g_assert_not_reached();
4434 tcg_gen_st_i64(t0
, cpu_env
, vofs
+ len_align
);
4436 tcg_temp_free_i64(addr
);
4437 tcg_temp_free_i64(t0
);
4440 /* Similarly for stores. */
4441 static void do_str(DisasContext
*s
, uint32_t vofs
, int len
, int rn
, int imm
)
4443 int len_align
= QEMU_ALIGN_DOWN(len
, 8);
4444 int len_remain
= len
% 8;
4445 int nparts
= len
/ 8 + ctpop8(len_remain
);
4446 int midx
= get_mem_index(s
);
4449 addr
= tcg_temp_new_i64();
4450 t0
= tcg_temp_new_i64();
4452 /* Note that unpredicated load/store of vector/predicate registers
4453 * are defined as a stream of bytes, which equates to little-endian
4454 * operations on larger quantities. There is no nice way to force
4455 * a little-endian store for aarch64_be-linux-user out of line.
4457 * Attempt to keep code expansion to a minimum by limiting the
4458 * amount of unrolling done.
4463 for (i
= 0; i
< len_align
; i
+= 8) {
4464 tcg_gen_ld_i64(t0
, cpu_env
, vofs
+ i
);
4465 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ i
);
4466 tcg_gen_qemu_st_i64(t0
, addr
, midx
, MO_LEQ
);
4469 TCGLabel
*loop
= gen_new_label();
4470 TCGv_ptr t2
, i
= tcg_const_local_ptr(0);
4472 gen_set_label(loop
);
4474 t2
= tcg_temp_new_ptr();
4475 tcg_gen_add_ptr(t2
, cpu_env
, i
);
4476 tcg_gen_ld_i64(t0
, t2
, vofs
);
4478 /* Minimize the number of local temps that must be re-read from
4479 * the stack each iteration. Instead, re-compute values other
4480 * than the loop counter.
4482 tcg_gen_addi_ptr(t2
, i
, imm
);
4483 tcg_gen_extu_ptr_i64(addr
, t2
);
4484 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, rn
));
4485 tcg_temp_free_ptr(t2
);
4487 tcg_gen_qemu_st_i64(t0
, addr
, midx
, MO_LEQ
);
4489 tcg_gen_addi_ptr(i
, i
, 8);
4491 tcg_gen_brcondi_ptr(TCG_COND_LTU
, i
, len_align
, loop
);
4492 tcg_temp_free_ptr(i
);
4495 /* Predicate register stores can be any multiple of 2. */
4497 tcg_gen_ld_i64(t0
, cpu_env
, vofs
+ len_align
);
4498 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ len_align
);
4500 switch (len_remain
) {
4504 tcg_gen_qemu_st_i64(t0
, addr
, midx
, MO_LE
| ctz32(len_remain
));
4508 tcg_gen_qemu_st_i64(t0
, addr
, midx
, MO_LEUL
);
4509 tcg_gen_addi_i64(addr
, addr
, 4);
4510 tcg_gen_shri_i64(t0
, t0
, 32);
4511 tcg_gen_qemu_st_i64(t0
, addr
, midx
, MO_LEUW
);
4515 g_assert_not_reached();
4518 tcg_temp_free_i64(addr
);
4519 tcg_temp_free_i64(t0
);
4522 static bool trans_LDR_zri(DisasContext
*s
, arg_rri
*a
)
4524 if (sve_access_check(s
)) {
4525 int size
= vec_full_reg_size(s
);
4526 int off
= vec_full_reg_offset(s
, a
->rd
);
4527 do_ldr(s
, off
, size
, a
->rn
, a
->imm
* size
);
4532 static bool trans_LDR_pri(DisasContext
*s
, arg_rri
*a
)
4534 if (sve_access_check(s
)) {
4535 int size
= pred_full_reg_size(s
);
4536 int off
= pred_full_reg_offset(s
, a
->rd
);
4537 do_ldr(s
, off
, size
, a
->rn
, a
->imm
* size
);
4542 static bool trans_STR_zri(DisasContext
*s
, arg_rri
*a
)
4544 if (sve_access_check(s
)) {
4545 int size
= vec_full_reg_size(s
);
4546 int off
= vec_full_reg_offset(s
, a
->rd
);
4547 do_str(s
, off
, size
, a
->rn
, a
->imm
* size
);
4552 static bool trans_STR_pri(DisasContext
*s
, arg_rri
*a
)
4554 if (sve_access_check(s
)) {
4555 int size
= pred_full_reg_size(s
);
4556 int off
= pred_full_reg_offset(s
, a
->rd
);
4557 do_str(s
, off
, size
, a
->rn
, a
->imm
* size
);
4563 *** SVE Memory - Contiguous Load Group
4566 /* The memory mode of the dtype. */
4567 static const TCGMemOp dtype_mop
[16] = {
4568 MO_UB
, MO_UB
, MO_UB
, MO_UB
,
4569 MO_SL
, MO_UW
, MO_UW
, MO_UW
,
4570 MO_SW
, MO_SW
, MO_UL
, MO_UL
,
4571 MO_SB
, MO_SB
, MO_SB
, MO_Q
4574 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4576 /* The vector element size of dtype. */
4577 static const uint8_t dtype_esz
[16] = {
4584 static TCGMemOpIdx
sve_memopidx(DisasContext
*s
, int dtype
)
4586 return make_memop_idx(s
->be_data
| dtype_mop
[dtype
], get_mem_index(s
));
4589 static void do_mem_zpa(DisasContext
*s
, int zt
, int pg
, TCGv_i64 addr
,
4590 int dtype
, gen_helper_gvec_mem
*fn
)
4592 unsigned vsz
= vec_full_reg_size(s
);
4597 /* For e.g. LD4, there are not enough arguments to pass all 4
4598 * registers as pointers, so encode the regno into the data field.
4599 * For consistency, do this even for LD1.
4601 desc
= sve_memopidx(s
, dtype
);
4602 desc
|= zt
<< MEMOPIDX_SHIFT
;
4603 desc
= simd_desc(vsz
, vsz
, desc
);
4604 t_desc
= tcg_const_i32(desc
);
4605 t_pg
= tcg_temp_new_ptr();
4607 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
4608 fn(cpu_env
, t_pg
, addr
, t_desc
);
4610 tcg_temp_free_ptr(t_pg
);
4611 tcg_temp_free_i32(t_desc
);
4614 static void do_ld_zpa(DisasContext
*s
, int zt
, int pg
,
4615 TCGv_i64 addr
, int dtype
, int nreg
)
4617 static gen_helper_gvec_mem
* const fns
[2][16][4] = {
4619 { { gen_helper_sve_ld1bb_r
, gen_helper_sve_ld2bb_r
,
4620 gen_helper_sve_ld3bb_r
, gen_helper_sve_ld4bb_r
},
4621 { gen_helper_sve_ld1bhu_r
, NULL
, NULL
, NULL
},
4622 { gen_helper_sve_ld1bsu_r
, NULL
, NULL
, NULL
},
4623 { gen_helper_sve_ld1bdu_r
, NULL
, NULL
, NULL
},
4625 { gen_helper_sve_ld1sds_le_r
, NULL
, NULL
, NULL
},
4626 { gen_helper_sve_ld1hh_le_r
, gen_helper_sve_ld2hh_le_r
,
4627 gen_helper_sve_ld3hh_le_r
, gen_helper_sve_ld4hh_le_r
},
4628 { gen_helper_sve_ld1hsu_le_r
, NULL
, NULL
, NULL
},
4629 { gen_helper_sve_ld1hdu_le_r
, NULL
, NULL
, NULL
},
4631 { gen_helper_sve_ld1hds_le_r
, NULL
, NULL
, NULL
},
4632 { gen_helper_sve_ld1hss_le_r
, NULL
, NULL
, NULL
},
4633 { gen_helper_sve_ld1ss_le_r
, gen_helper_sve_ld2ss_le_r
,
4634 gen_helper_sve_ld3ss_le_r
, gen_helper_sve_ld4ss_le_r
},
4635 { gen_helper_sve_ld1sdu_le_r
, NULL
, NULL
, NULL
},
4637 { gen_helper_sve_ld1bds_r
, NULL
, NULL
, NULL
},
4638 { gen_helper_sve_ld1bss_r
, NULL
, NULL
, NULL
},
4639 { gen_helper_sve_ld1bhs_r
, NULL
, NULL
, NULL
},
4640 { gen_helper_sve_ld1dd_le_r
, gen_helper_sve_ld2dd_le_r
,
4641 gen_helper_sve_ld3dd_le_r
, gen_helper_sve_ld4dd_le_r
} },
4644 { { gen_helper_sve_ld1bb_r
, gen_helper_sve_ld2bb_r
,
4645 gen_helper_sve_ld3bb_r
, gen_helper_sve_ld4bb_r
},
4646 { gen_helper_sve_ld1bhu_r
, NULL
, NULL
, NULL
},
4647 { gen_helper_sve_ld1bsu_r
, NULL
, NULL
, NULL
},
4648 { gen_helper_sve_ld1bdu_r
, NULL
, NULL
, NULL
},
4650 { gen_helper_sve_ld1sds_be_r
, NULL
, NULL
, NULL
},
4651 { gen_helper_sve_ld1hh_be_r
, gen_helper_sve_ld2hh_be_r
,
4652 gen_helper_sve_ld3hh_be_r
, gen_helper_sve_ld4hh_be_r
},
4653 { gen_helper_sve_ld1hsu_be_r
, NULL
, NULL
, NULL
},
4654 { gen_helper_sve_ld1hdu_be_r
, NULL
, NULL
, NULL
},
4656 { gen_helper_sve_ld1hds_be_r
, NULL
, NULL
, NULL
},
4657 { gen_helper_sve_ld1hss_be_r
, NULL
, NULL
, NULL
},
4658 { gen_helper_sve_ld1ss_be_r
, gen_helper_sve_ld2ss_be_r
,
4659 gen_helper_sve_ld3ss_be_r
, gen_helper_sve_ld4ss_be_r
},
4660 { gen_helper_sve_ld1sdu_be_r
, NULL
, NULL
, NULL
},
4662 { gen_helper_sve_ld1bds_r
, NULL
, NULL
, NULL
},
4663 { gen_helper_sve_ld1bss_r
, NULL
, NULL
, NULL
},
4664 { gen_helper_sve_ld1bhs_r
, NULL
, NULL
, NULL
},
4665 { gen_helper_sve_ld1dd_be_r
, gen_helper_sve_ld2dd_be_r
,
4666 gen_helper_sve_ld3dd_be_r
, gen_helper_sve_ld4dd_be_r
} }
4668 gen_helper_gvec_mem
*fn
= fns
[s
->be_data
== MO_BE
][dtype
][nreg
];
4670 /* While there are holes in the table, they are not
4671 * accessible via the instruction encoding.
4674 do_mem_zpa(s
, zt
, pg
, addr
, dtype
, fn
);
4677 static bool trans_LD_zprr(DisasContext
*s
, arg_rprr_load
*a
)
4682 if (sve_access_check(s
)) {
4683 TCGv_i64 addr
= new_tmp_a64(s
);
4684 tcg_gen_shli_i64(addr
, cpu_reg(s
, a
->rm
), dtype_msz(a
->dtype
));
4685 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
4686 do_ld_zpa(s
, a
->rd
, a
->pg
, addr
, a
->dtype
, a
->nreg
);
4691 static bool trans_LD_zpri(DisasContext
*s
, arg_rpri_load
*a
)
4693 if (sve_access_check(s
)) {
4694 int vsz
= vec_full_reg_size(s
);
4695 int elements
= vsz
>> dtype_esz
[a
->dtype
];
4696 TCGv_i64 addr
= new_tmp_a64(s
);
4698 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
),
4699 (a
->imm
* elements
* (a
->nreg
+ 1))
4700 << dtype_msz(a
->dtype
));
4701 do_ld_zpa(s
, a
->rd
, a
->pg
, addr
, a
->dtype
, a
->nreg
);
4706 static bool trans_LDFF1_zprr(DisasContext
*s
, arg_rprr_load
*a
)
4708 static gen_helper_gvec_mem
* const fns
[2][16] = {
4710 { gen_helper_sve_ldff1bb_r
,
4711 gen_helper_sve_ldff1bhu_r
,
4712 gen_helper_sve_ldff1bsu_r
,
4713 gen_helper_sve_ldff1bdu_r
,
4715 gen_helper_sve_ldff1sds_le_r
,
4716 gen_helper_sve_ldff1hh_le_r
,
4717 gen_helper_sve_ldff1hsu_le_r
,
4718 gen_helper_sve_ldff1hdu_le_r
,
4720 gen_helper_sve_ldff1hds_le_r
,
4721 gen_helper_sve_ldff1hss_le_r
,
4722 gen_helper_sve_ldff1ss_le_r
,
4723 gen_helper_sve_ldff1sdu_le_r
,
4725 gen_helper_sve_ldff1bds_r
,
4726 gen_helper_sve_ldff1bss_r
,
4727 gen_helper_sve_ldff1bhs_r
,
4728 gen_helper_sve_ldff1dd_le_r
},
4731 { gen_helper_sve_ldff1bb_r
,
4732 gen_helper_sve_ldff1bhu_r
,
4733 gen_helper_sve_ldff1bsu_r
,
4734 gen_helper_sve_ldff1bdu_r
,
4736 gen_helper_sve_ldff1sds_be_r
,
4737 gen_helper_sve_ldff1hh_be_r
,
4738 gen_helper_sve_ldff1hsu_be_r
,
4739 gen_helper_sve_ldff1hdu_be_r
,
4741 gen_helper_sve_ldff1hds_be_r
,
4742 gen_helper_sve_ldff1hss_be_r
,
4743 gen_helper_sve_ldff1ss_be_r
,
4744 gen_helper_sve_ldff1sdu_be_r
,
4746 gen_helper_sve_ldff1bds_r
,
4747 gen_helper_sve_ldff1bss_r
,
4748 gen_helper_sve_ldff1bhs_r
,
4749 gen_helper_sve_ldff1dd_be_r
},
4752 if (sve_access_check(s
)) {
4753 TCGv_i64 addr
= new_tmp_a64(s
);
4754 tcg_gen_shli_i64(addr
, cpu_reg(s
, a
->rm
), dtype_msz(a
->dtype
));
4755 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
4756 do_mem_zpa(s
, a
->rd
, a
->pg
, addr
, a
->dtype
,
4757 fns
[s
->be_data
== MO_BE
][a
->dtype
]);
4762 static bool trans_LDNF1_zpri(DisasContext
*s
, arg_rpri_load
*a
)
4764 static gen_helper_gvec_mem
* const fns
[2][16] = {
4766 { gen_helper_sve_ldnf1bb_r
,
4767 gen_helper_sve_ldnf1bhu_r
,
4768 gen_helper_sve_ldnf1bsu_r
,
4769 gen_helper_sve_ldnf1bdu_r
,
4771 gen_helper_sve_ldnf1sds_le_r
,
4772 gen_helper_sve_ldnf1hh_le_r
,
4773 gen_helper_sve_ldnf1hsu_le_r
,
4774 gen_helper_sve_ldnf1hdu_le_r
,
4776 gen_helper_sve_ldnf1hds_le_r
,
4777 gen_helper_sve_ldnf1hss_le_r
,
4778 gen_helper_sve_ldnf1ss_le_r
,
4779 gen_helper_sve_ldnf1sdu_le_r
,
4781 gen_helper_sve_ldnf1bds_r
,
4782 gen_helper_sve_ldnf1bss_r
,
4783 gen_helper_sve_ldnf1bhs_r
,
4784 gen_helper_sve_ldnf1dd_le_r
},
4787 { gen_helper_sve_ldnf1bb_r
,
4788 gen_helper_sve_ldnf1bhu_r
,
4789 gen_helper_sve_ldnf1bsu_r
,
4790 gen_helper_sve_ldnf1bdu_r
,
4792 gen_helper_sve_ldnf1sds_be_r
,
4793 gen_helper_sve_ldnf1hh_be_r
,
4794 gen_helper_sve_ldnf1hsu_be_r
,
4795 gen_helper_sve_ldnf1hdu_be_r
,
4797 gen_helper_sve_ldnf1hds_be_r
,
4798 gen_helper_sve_ldnf1hss_be_r
,
4799 gen_helper_sve_ldnf1ss_be_r
,
4800 gen_helper_sve_ldnf1sdu_be_r
,
4802 gen_helper_sve_ldnf1bds_r
,
4803 gen_helper_sve_ldnf1bss_r
,
4804 gen_helper_sve_ldnf1bhs_r
,
4805 gen_helper_sve_ldnf1dd_be_r
},
4808 if (sve_access_check(s
)) {
4809 int vsz
= vec_full_reg_size(s
);
4810 int elements
= vsz
>> dtype_esz
[a
->dtype
];
4811 int off
= (a
->imm
* elements
) << dtype_msz(a
->dtype
);
4812 TCGv_i64 addr
= new_tmp_a64(s
);
4814 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
), off
);
4815 do_mem_zpa(s
, a
->rd
, a
->pg
, addr
, a
->dtype
,
4816 fns
[s
->be_data
== MO_BE
][a
->dtype
]);
4821 static void do_ldrq(DisasContext
*s
, int zt
, int pg
, TCGv_i64 addr
, int msz
)
4823 static gen_helper_gvec_mem
* const fns
[2][4] = {
4824 { gen_helper_sve_ld1bb_r
, gen_helper_sve_ld1hh_le_r
,
4825 gen_helper_sve_ld1ss_le_r
, gen_helper_sve_ld1dd_le_r
},
4826 { gen_helper_sve_ld1bb_r
, gen_helper_sve_ld1hh_be_r
,
4827 gen_helper_sve_ld1ss_be_r
, gen_helper_sve_ld1dd_be_r
},
4829 unsigned vsz
= vec_full_reg_size(s
);
4834 /* Load the first quadword using the normal predicated load helpers. */
4835 desc
= sve_memopidx(s
, msz_dtype(msz
));
4836 desc
|= zt
<< MEMOPIDX_SHIFT
;
4837 desc
= simd_desc(16, 16, desc
);
4838 t_desc
= tcg_const_i32(desc
);
4840 poff
= pred_full_reg_offset(s
, pg
);
4843 * Zero-extend the first 16 bits of the predicate into a temporary.
4844 * This avoids triggering an assert making sure we don't have bits
4845 * set within a predicate beyond VQ, but we have lowered VQ to 1
4846 * for this load operation.
4848 TCGv_i64 tmp
= tcg_temp_new_i64();
4849 #ifdef HOST_WORDS_BIGENDIAN
4852 tcg_gen_ld16u_i64(tmp
, cpu_env
, poff
);
4854 poff
= offsetof(CPUARMState
, vfp
.preg_tmp
);
4855 tcg_gen_st_i64(tmp
, cpu_env
, poff
);
4856 tcg_temp_free_i64(tmp
);
4859 t_pg
= tcg_temp_new_ptr();
4860 tcg_gen_addi_ptr(t_pg
, cpu_env
, poff
);
4862 fns
[s
->be_data
== MO_BE
][msz
](cpu_env
, t_pg
, addr
, t_desc
);
4864 tcg_temp_free_ptr(t_pg
);
4865 tcg_temp_free_i32(t_desc
);
4867 /* Replicate that first quadword. */
4869 unsigned dofs
= vec_full_reg_offset(s
, zt
);
4870 tcg_gen_gvec_dup_mem(4, dofs
+ 16, dofs
, vsz
- 16, vsz
- 16);
4874 static bool trans_LD1RQ_zprr(DisasContext
*s
, arg_rprr_load
*a
)
4879 if (sve_access_check(s
)) {
4880 int msz
= dtype_msz(a
->dtype
);
4881 TCGv_i64 addr
= new_tmp_a64(s
);
4882 tcg_gen_shli_i64(addr
, cpu_reg(s
, a
->rm
), msz
);
4883 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
4884 do_ldrq(s
, a
->rd
, a
->pg
, addr
, msz
);
4889 static bool trans_LD1RQ_zpri(DisasContext
*s
, arg_rpri_load
*a
)
4891 if (sve_access_check(s
)) {
4892 TCGv_i64 addr
= new_tmp_a64(s
);
4893 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
), a
->imm
* 16);
4894 do_ldrq(s
, a
->rd
, a
->pg
, addr
, dtype_msz(a
->dtype
));
4899 /* Load and broadcast element. */
4900 static bool trans_LD1R_zpri(DisasContext
*s
, arg_rpri_load
*a
)
4902 if (!sve_access_check(s
)) {
4906 unsigned vsz
= vec_full_reg_size(s
);
4907 unsigned psz
= pred_full_reg_size(s
);
4908 unsigned esz
= dtype_esz
[a
->dtype
];
4909 unsigned msz
= dtype_msz(a
->dtype
);
4910 TCGLabel
*over
= gen_new_label();
4913 /* If the guarding predicate has no bits set, no load occurs. */
4915 /* Reduce the pred_esz_masks value simply to reduce the
4916 * size of the code generated here.
4918 uint64_t psz_mask
= MAKE_64BIT_MASK(0, psz
* 8);
4919 temp
= tcg_temp_new_i64();
4920 tcg_gen_ld_i64(temp
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
4921 tcg_gen_andi_i64(temp
, temp
, pred_esz_masks
[esz
] & psz_mask
);
4922 tcg_gen_brcondi_i64(TCG_COND_EQ
, temp
, 0, over
);
4923 tcg_temp_free_i64(temp
);
4925 TCGv_i32 t32
= tcg_temp_new_i32();
4926 find_last_active(s
, t32
, esz
, a
->pg
);
4927 tcg_gen_brcondi_i32(TCG_COND_LT
, t32
, 0, over
);
4928 tcg_temp_free_i32(t32
);
4931 /* Load the data. */
4932 temp
= tcg_temp_new_i64();
4933 tcg_gen_addi_i64(temp
, cpu_reg_sp(s
, a
->rn
), a
->imm
<< msz
);
4934 tcg_gen_qemu_ld_i64(temp
, temp
, get_mem_index(s
),
4935 s
->be_data
| dtype_mop
[a
->dtype
]);
4937 /* Broadcast to *all* elements. */
4938 tcg_gen_gvec_dup_i64(esz
, vec_full_reg_offset(s
, a
->rd
),
4940 tcg_temp_free_i64(temp
);
4942 /* Zero the inactive elements. */
4943 gen_set_label(over
);
4944 do_movz_zpz(s
, a
->rd
, a
->rd
, a
->pg
, esz
);
4948 static void do_st_zpa(DisasContext
*s
, int zt
, int pg
, TCGv_i64 addr
,
4949 int msz
, int esz
, int nreg
)
4951 static gen_helper_gvec_mem
* const fn_single
[2][4][4] = {
4952 { { gen_helper_sve_st1bb_r
,
4953 gen_helper_sve_st1bh_r
,
4954 gen_helper_sve_st1bs_r
,
4955 gen_helper_sve_st1bd_r
},
4957 gen_helper_sve_st1hh_le_r
,
4958 gen_helper_sve_st1hs_le_r
,
4959 gen_helper_sve_st1hd_le_r
},
4961 gen_helper_sve_st1ss_le_r
,
4962 gen_helper_sve_st1sd_le_r
},
4964 gen_helper_sve_st1dd_le_r
} },
4965 { { gen_helper_sve_st1bb_r
,
4966 gen_helper_sve_st1bh_r
,
4967 gen_helper_sve_st1bs_r
,
4968 gen_helper_sve_st1bd_r
},
4970 gen_helper_sve_st1hh_be_r
,
4971 gen_helper_sve_st1hs_be_r
,
4972 gen_helper_sve_st1hd_be_r
},
4974 gen_helper_sve_st1ss_be_r
,
4975 gen_helper_sve_st1sd_be_r
},
4977 gen_helper_sve_st1dd_be_r
} },
4979 static gen_helper_gvec_mem
* const fn_multiple
[2][3][4] = {
4980 { { gen_helper_sve_st2bb_r
,
4981 gen_helper_sve_st2hh_le_r
,
4982 gen_helper_sve_st2ss_le_r
,
4983 gen_helper_sve_st2dd_le_r
},
4984 { gen_helper_sve_st3bb_r
,
4985 gen_helper_sve_st3hh_le_r
,
4986 gen_helper_sve_st3ss_le_r
,
4987 gen_helper_sve_st3dd_le_r
},
4988 { gen_helper_sve_st4bb_r
,
4989 gen_helper_sve_st4hh_le_r
,
4990 gen_helper_sve_st4ss_le_r
,
4991 gen_helper_sve_st4dd_le_r
} },
4992 { { gen_helper_sve_st2bb_r
,
4993 gen_helper_sve_st2hh_be_r
,
4994 gen_helper_sve_st2ss_be_r
,
4995 gen_helper_sve_st2dd_be_r
},
4996 { gen_helper_sve_st3bb_r
,
4997 gen_helper_sve_st3hh_be_r
,
4998 gen_helper_sve_st3ss_be_r
,
4999 gen_helper_sve_st3dd_be_r
},
5000 { gen_helper_sve_st4bb_r
,
5001 gen_helper_sve_st4hh_be_r
,
5002 gen_helper_sve_st4ss_be_r
,
5003 gen_helper_sve_st4dd_be_r
} },
5005 gen_helper_gvec_mem
*fn
;
5006 int be
= s
->be_data
== MO_BE
;
5010 fn
= fn_single
[be
][msz
][esz
];
5012 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5014 fn
= fn_multiple
[be
][nreg
- 1][msz
];
5017 do_mem_zpa(s
, zt
, pg
, addr
, msz_dtype(msz
), fn
);
5020 static bool trans_ST_zprr(DisasContext
*s
, arg_rprr_store
*a
)
5022 if (a
->rm
== 31 || a
->msz
> a
->esz
) {
5025 if (sve_access_check(s
)) {
5026 TCGv_i64 addr
= new_tmp_a64(s
);
5027 tcg_gen_shli_i64(addr
, cpu_reg(s
, a
->rm
), a
->msz
);
5028 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
5029 do_st_zpa(s
, a
->rd
, a
->pg
, addr
, a
->msz
, a
->esz
, a
->nreg
);
5034 static bool trans_ST_zpri(DisasContext
*s
, arg_rpri_store
*a
)
5036 if (a
->msz
> a
->esz
) {
5039 if (sve_access_check(s
)) {
5040 int vsz
= vec_full_reg_size(s
);
5041 int elements
= vsz
>> a
->esz
;
5042 TCGv_i64 addr
= new_tmp_a64(s
);
5044 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
),
5045 (a
->imm
* elements
* (a
->nreg
+ 1)) << a
->msz
);
5046 do_st_zpa(s
, a
->rd
, a
->pg
, addr
, a
->msz
, a
->esz
, a
->nreg
);
5052 *** SVE gather loads / scatter stores
5055 static void do_mem_zpz(DisasContext
*s
, int zt
, int pg
, int zm
,
5056 int scale
, TCGv_i64 scalar
, int msz
,
5057 gen_helper_gvec_mem_scatter
*fn
)
5059 unsigned vsz
= vec_full_reg_size(s
);
5060 TCGv_ptr t_zm
= tcg_temp_new_ptr();
5061 TCGv_ptr t_pg
= tcg_temp_new_ptr();
5062 TCGv_ptr t_zt
= tcg_temp_new_ptr();
5066 desc
= sve_memopidx(s
, msz_dtype(msz
));
5067 desc
|= scale
<< MEMOPIDX_SHIFT
;
5068 desc
= simd_desc(vsz
, vsz
, desc
);
5069 t_desc
= tcg_const_i32(desc
);
5071 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
5072 tcg_gen_addi_ptr(t_zm
, cpu_env
, vec_full_reg_offset(s
, zm
));
5073 tcg_gen_addi_ptr(t_zt
, cpu_env
, vec_full_reg_offset(s
, zt
));
5074 fn(cpu_env
, t_zt
, t_pg
, t_zm
, scalar
, t_desc
);
5076 tcg_temp_free_ptr(t_zt
);
5077 tcg_temp_free_ptr(t_zm
);
5078 tcg_temp_free_ptr(t_pg
);
5079 tcg_temp_free_i32(t_desc
);
5082 /* Indexed by [be][ff][xs][u][msz]. */
5083 static gen_helper_gvec_mem_scatter
* const gather_load_fn32
[2][2][2][2][3] = {
5085 { { { { gen_helper_sve_ldbss_zsu
,
5086 gen_helper_sve_ldhss_le_zsu
,
5088 { gen_helper_sve_ldbsu_zsu
,
5089 gen_helper_sve_ldhsu_le_zsu
,
5090 gen_helper_sve_ldss_le_zsu
, } },
5091 { { gen_helper_sve_ldbss_zss
,
5092 gen_helper_sve_ldhss_le_zss
,
5094 { gen_helper_sve_ldbsu_zss
,
5095 gen_helper_sve_ldhsu_le_zss
,
5096 gen_helper_sve_ldss_le_zss
, } } },
5099 { { { gen_helper_sve_ldffbss_zsu
,
5100 gen_helper_sve_ldffhss_le_zsu
,
5102 { gen_helper_sve_ldffbsu_zsu
,
5103 gen_helper_sve_ldffhsu_le_zsu
,
5104 gen_helper_sve_ldffss_le_zsu
, } },
5105 { { gen_helper_sve_ldffbss_zss
,
5106 gen_helper_sve_ldffhss_le_zss
,
5108 { gen_helper_sve_ldffbsu_zss
,
5109 gen_helper_sve_ldffhsu_le_zss
,
5110 gen_helper_sve_ldffss_le_zss
, } } } },
5113 { { { { gen_helper_sve_ldbss_zsu
,
5114 gen_helper_sve_ldhss_be_zsu
,
5116 { gen_helper_sve_ldbsu_zsu
,
5117 gen_helper_sve_ldhsu_be_zsu
,
5118 gen_helper_sve_ldss_be_zsu
, } },
5119 { { gen_helper_sve_ldbss_zss
,
5120 gen_helper_sve_ldhss_be_zss
,
5122 { gen_helper_sve_ldbsu_zss
,
5123 gen_helper_sve_ldhsu_be_zss
,
5124 gen_helper_sve_ldss_be_zss
, } } },
5127 { { { gen_helper_sve_ldffbss_zsu
,
5128 gen_helper_sve_ldffhss_be_zsu
,
5130 { gen_helper_sve_ldffbsu_zsu
,
5131 gen_helper_sve_ldffhsu_be_zsu
,
5132 gen_helper_sve_ldffss_be_zsu
, } },
5133 { { gen_helper_sve_ldffbss_zss
,
5134 gen_helper_sve_ldffhss_be_zss
,
5136 { gen_helper_sve_ldffbsu_zss
,
5137 gen_helper_sve_ldffhsu_be_zss
,
5138 gen_helper_sve_ldffss_be_zss
, } } } },
5141 /* Note that we overload xs=2 to indicate 64-bit offset. */
5142 static gen_helper_gvec_mem_scatter
* const gather_load_fn64
[2][2][3][2][4] = {
5144 { { { { gen_helper_sve_ldbds_zsu
,
5145 gen_helper_sve_ldhds_le_zsu
,
5146 gen_helper_sve_ldsds_le_zsu
,
5148 { gen_helper_sve_ldbdu_zsu
,
5149 gen_helper_sve_ldhdu_le_zsu
,
5150 gen_helper_sve_ldsdu_le_zsu
,
5151 gen_helper_sve_lddd_le_zsu
, } },
5152 { { gen_helper_sve_ldbds_zss
,
5153 gen_helper_sve_ldhds_le_zss
,
5154 gen_helper_sve_ldsds_le_zss
,
5156 { gen_helper_sve_ldbdu_zss
,
5157 gen_helper_sve_ldhdu_le_zss
,
5158 gen_helper_sve_ldsdu_le_zss
,
5159 gen_helper_sve_lddd_le_zss
, } },
5160 { { gen_helper_sve_ldbds_zd
,
5161 gen_helper_sve_ldhds_le_zd
,
5162 gen_helper_sve_ldsds_le_zd
,
5164 { gen_helper_sve_ldbdu_zd
,
5165 gen_helper_sve_ldhdu_le_zd
,
5166 gen_helper_sve_ldsdu_le_zd
,
5167 gen_helper_sve_lddd_le_zd
, } } },
5170 { { { gen_helper_sve_ldffbds_zsu
,
5171 gen_helper_sve_ldffhds_le_zsu
,
5172 gen_helper_sve_ldffsds_le_zsu
,
5174 { gen_helper_sve_ldffbdu_zsu
,
5175 gen_helper_sve_ldffhdu_le_zsu
,
5176 gen_helper_sve_ldffsdu_le_zsu
,
5177 gen_helper_sve_ldffdd_le_zsu
, } },
5178 { { gen_helper_sve_ldffbds_zss
,
5179 gen_helper_sve_ldffhds_le_zss
,
5180 gen_helper_sve_ldffsds_le_zss
,
5182 { gen_helper_sve_ldffbdu_zss
,
5183 gen_helper_sve_ldffhdu_le_zss
,
5184 gen_helper_sve_ldffsdu_le_zss
,
5185 gen_helper_sve_ldffdd_le_zss
, } },
5186 { { gen_helper_sve_ldffbds_zd
,
5187 gen_helper_sve_ldffhds_le_zd
,
5188 gen_helper_sve_ldffsds_le_zd
,
5190 { gen_helper_sve_ldffbdu_zd
,
5191 gen_helper_sve_ldffhdu_le_zd
,
5192 gen_helper_sve_ldffsdu_le_zd
,
5193 gen_helper_sve_ldffdd_le_zd
, } } } },
5196 { { { { gen_helper_sve_ldbds_zsu
,
5197 gen_helper_sve_ldhds_be_zsu
,
5198 gen_helper_sve_ldsds_be_zsu
,
5200 { gen_helper_sve_ldbdu_zsu
,
5201 gen_helper_sve_ldhdu_be_zsu
,
5202 gen_helper_sve_ldsdu_be_zsu
,
5203 gen_helper_sve_lddd_be_zsu
, } },
5204 { { gen_helper_sve_ldbds_zss
,
5205 gen_helper_sve_ldhds_be_zss
,
5206 gen_helper_sve_ldsds_be_zss
,
5208 { gen_helper_sve_ldbdu_zss
,
5209 gen_helper_sve_ldhdu_be_zss
,
5210 gen_helper_sve_ldsdu_be_zss
,
5211 gen_helper_sve_lddd_be_zss
, } },
5212 { { gen_helper_sve_ldbds_zd
,
5213 gen_helper_sve_ldhds_be_zd
,
5214 gen_helper_sve_ldsds_be_zd
,
5216 { gen_helper_sve_ldbdu_zd
,
5217 gen_helper_sve_ldhdu_be_zd
,
5218 gen_helper_sve_ldsdu_be_zd
,
5219 gen_helper_sve_lddd_be_zd
, } } },
5222 { { { gen_helper_sve_ldffbds_zsu
,
5223 gen_helper_sve_ldffhds_be_zsu
,
5224 gen_helper_sve_ldffsds_be_zsu
,
5226 { gen_helper_sve_ldffbdu_zsu
,
5227 gen_helper_sve_ldffhdu_be_zsu
,
5228 gen_helper_sve_ldffsdu_be_zsu
,
5229 gen_helper_sve_ldffdd_be_zsu
, } },
5230 { { gen_helper_sve_ldffbds_zss
,
5231 gen_helper_sve_ldffhds_be_zss
,
5232 gen_helper_sve_ldffsds_be_zss
,
5234 { gen_helper_sve_ldffbdu_zss
,
5235 gen_helper_sve_ldffhdu_be_zss
,
5236 gen_helper_sve_ldffsdu_be_zss
,
5237 gen_helper_sve_ldffdd_be_zss
, } },
5238 { { gen_helper_sve_ldffbds_zd
,
5239 gen_helper_sve_ldffhds_be_zd
,
5240 gen_helper_sve_ldffsds_be_zd
,
5242 { gen_helper_sve_ldffbdu_zd
,
5243 gen_helper_sve_ldffhdu_be_zd
,
5244 gen_helper_sve_ldffsdu_be_zd
,
5245 gen_helper_sve_ldffdd_be_zd
, } } } },
5248 static bool trans_LD1_zprz(DisasContext
*s
, arg_LD1_zprz
*a
)
5250 gen_helper_gvec_mem_scatter
*fn
= NULL
;
5251 int be
= s
->be_data
== MO_BE
;
5253 if (!sve_access_check(s
)) {
5259 fn
= gather_load_fn32
[be
][a
->ff
][a
->xs
][a
->u
][a
->msz
];
5262 fn
= gather_load_fn64
[be
][a
->ff
][a
->xs
][a
->u
][a
->msz
];
5267 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rm
, a
->scale
* a
->msz
,
5268 cpu_reg_sp(s
, a
->rn
), a
->msz
, fn
);
5272 static bool trans_LD1_zpiz(DisasContext
*s
, arg_LD1_zpiz
*a
)
5274 gen_helper_gvec_mem_scatter
*fn
= NULL
;
5275 int be
= s
->be_data
== MO_BE
;
5278 if (a
->esz
< a
->msz
|| (a
->esz
== a
->msz
&& !a
->u
)) {
5281 if (!sve_access_check(s
)) {
5287 fn
= gather_load_fn32
[be
][a
->ff
][0][a
->u
][a
->msz
];
5290 fn
= gather_load_fn64
[be
][a
->ff
][2][a
->u
][a
->msz
];
5295 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5296 * by loading the immediate into the scalar parameter.
5298 imm
= tcg_const_i64(a
->imm
<< a
->msz
);
5299 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rn
, 0, imm
, a
->msz
, fn
);
5300 tcg_temp_free_i64(imm
);
5304 /* Indexed by [be][xs][msz]. */
5305 static gen_helper_gvec_mem_scatter
* const scatter_store_fn32
[2][2][3] = {
5307 { { gen_helper_sve_stbs_zsu
,
5308 gen_helper_sve_sths_le_zsu
,
5309 gen_helper_sve_stss_le_zsu
, },
5310 { gen_helper_sve_stbs_zss
,
5311 gen_helper_sve_sths_le_zss
,
5312 gen_helper_sve_stss_le_zss
, } },
5314 { { gen_helper_sve_stbs_zsu
,
5315 gen_helper_sve_sths_be_zsu
,
5316 gen_helper_sve_stss_be_zsu
, },
5317 { gen_helper_sve_stbs_zss
,
5318 gen_helper_sve_sths_be_zss
,
5319 gen_helper_sve_stss_be_zss
, } },
5322 /* Note that we overload xs=2 to indicate 64-bit offset. */
5323 static gen_helper_gvec_mem_scatter
* const scatter_store_fn64
[2][3][4] = {
5325 { { gen_helper_sve_stbd_zsu
,
5326 gen_helper_sve_sthd_le_zsu
,
5327 gen_helper_sve_stsd_le_zsu
,
5328 gen_helper_sve_stdd_le_zsu
, },
5329 { gen_helper_sve_stbd_zss
,
5330 gen_helper_sve_sthd_le_zss
,
5331 gen_helper_sve_stsd_le_zss
,
5332 gen_helper_sve_stdd_le_zss
, },
5333 { gen_helper_sve_stbd_zd
,
5334 gen_helper_sve_sthd_le_zd
,
5335 gen_helper_sve_stsd_le_zd
,
5336 gen_helper_sve_stdd_le_zd
, } },
5338 { { gen_helper_sve_stbd_zsu
,
5339 gen_helper_sve_sthd_be_zsu
,
5340 gen_helper_sve_stsd_be_zsu
,
5341 gen_helper_sve_stdd_be_zsu
, },
5342 { gen_helper_sve_stbd_zss
,
5343 gen_helper_sve_sthd_be_zss
,
5344 gen_helper_sve_stsd_be_zss
,
5345 gen_helper_sve_stdd_be_zss
, },
5346 { gen_helper_sve_stbd_zd
,
5347 gen_helper_sve_sthd_be_zd
,
5348 gen_helper_sve_stsd_be_zd
,
5349 gen_helper_sve_stdd_be_zd
, } },
5352 static bool trans_ST1_zprz(DisasContext
*s
, arg_ST1_zprz
*a
)
5354 gen_helper_gvec_mem_scatter
*fn
;
5355 int be
= s
->be_data
== MO_BE
;
5357 if (a
->esz
< a
->msz
|| (a
->msz
== 0 && a
->scale
)) {
5360 if (!sve_access_check(s
)) {
5365 fn
= scatter_store_fn32
[be
][a
->xs
][a
->msz
];
5368 fn
= scatter_store_fn64
[be
][a
->xs
][a
->msz
];
5371 g_assert_not_reached();
5373 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rm
, a
->scale
* a
->msz
,
5374 cpu_reg_sp(s
, a
->rn
), a
->msz
, fn
);
5378 static bool trans_ST1_zpiz(DisasContext
*s
, arg_ST1_zpiz
*a
)
5380 gen_helper_gvec_mem_scatter
*fn
= NULL
;
5381 int be
= s
->be_data
== MO_BE
;
5384 if (a
->esz
< a
->msz
) {
5387 if (!sve_access_check(s
)) {
5393 fn
= scatter_store_fn32
[be
][0][a
->msz
];
5396 fn
= scatter_store_fn64
[be
][2][a
->msz
];
5401 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5402 * by loading the immediate into the scalar parameter.
5404 imm
= tcg_const_i64(a
->imm
<< a
->msz
);
5405 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rn
, 0, imm
, a
->msz
, fn
);
5406 tcg_temp_free_i64(imm
);
5414 static bool trans_PRF(DisasContext
*s
, arg_PRF
*a
)
5416 /* Prefetch is a nop within QEMU. */
5417 (void)sve_access_check(s
);
5421 static bool trans_PRF_rr(DisasContext
*s
, arg_PRF_rr
*a
)
5426 /* Prefetch is a nop within QEMU. */
5427 (void)sve_access_check(s
);
5434 * TODO: The implementation so far could handle predicated merging movprfx.
5435 * The helper functions as written take an extra source register to
5436 * use in the operation, but the result is only written when predication
5437 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
5438 * to allow the final write back to the destination to be unconditional.
5439 * For predicated zeroing movprfx, we need to rearrange the helpers to
5440 * allow the final write back to zero inactives.
5442 * In the meantime, just emit the moves.
5445 static bool trans_MOVPRFX(DisasContext
*s
, arg_MOVPRFX
*a
)
5447 return do_mov_z(s
, a
->rd
, a
->rn
);
5450 static bool trans_MOVPRFX_m(DisasContext
*s
, arg_rpr_esz
*a
)
5452 if (sve_access_check(s
)) {
5453 do_sel_z(s
, a
->rd
, a
->rn
, a
->rd
, a
->pg
, a
->esz
);
5458 static bool trans_MOVPRFX_z(DisasContext
*s
, arg_rpr_esz
*a
)
5460 if (sve_access_check(s
)) {
5461 do_movz_zpz(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
);