2 * AArch64 SVE translation
4 * Copyright (c) 2018 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
22 #include "exec/exec-all.h"
24 #include "tcg-op-gvec.h"
25 #include "tcg-gvec-desc.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
37 * Helpers for extracting complex instruction fields.
40 /* See e.g. ASR (immediate, predicated).
41 * Returns -1 for unallocated encoding; diagnose later.
43 static int tszimm_esz(int x
)
45 x
>>= 3; /* discard imm3 */
49 static int tszimm_shr(int x
)
51 return (16 << tszimm_esz(x
)) - x
;
54 /* See e.g. LSL (immediate, predicated). */
55 static int tszimm_shl(int x
)
57 return x
- (8 << tszimm_esz(x
));
60 static inline int plus1(int x
)
66 * Include the generated decoder.
69 #include "decode-sve.inc.c"
72 * Implement all of the translator functions referenced by the decoder.
75 /* Return the offset info CPUARMState of the predicate vector register Pn.
76 * Note for this purpose, FFR is P16.
78 static inline int pred_full_reg_offset(DisasContext
*s
, int regno
)
80 return offsetof(CPUARMState
, vfp
.pregs
[regno
]);
83 /* Return the byte size of the whole predicate register, VL / 64. */
84 static inline int pred_full_reg_size(DisasContext
*s
)
86 return s
->sve_len
>> 3;
89 /* Round up the size of a register to a size allowed by
90 * the tcg vector infrastructure. Any operation which uses this
91 * size may assume that the bits above pred_full_reg_size are zero,
92 * and must leave them the same way.
94 * Note that this is not needed for the vector registers as they
95 * are always properly sized for tcg vectors.
97 static int size_for_gvec(int size
)
102 return QEMU_ALIGN_UP(size
, 16);
106 static int pred_gvec_reg_size(DisasContext
*s
)
108 return size_for_gvec(pred_full_reg_size(s
));
111 /* Invoke a vector expander on two Zregs. */
112 static bool do_vector2_z(DisasContext
*s
, GVecGen2Fn
*gvec_fn
,
113 int esz
, int rd
, int rn
)
115 if (sve_access_check(s
)) {
116 unsigned vsz
= vec_full_reg_size(s
);
117 gvec_fn(esz
, vec_full_reg_offset(s
, rd
),
118 vec_full_reg_offset(s
, rn
), vsz
, vsz
);
123 /* Invoke a vector expander on three Zregs. */
124 static bool do_vector3_z(DisasContext
*s
, GVecGen3Fn
*gvec_fn
,
125 int esz
, int rd
, int rn
, int rm
)
127 if (sve_access_check(s
)) {
128 unsigned vsz
= vec_full_reg_size(s
);
129 gvec_fn(esz
, vec_full_reg_offset(s
, rd
),
130 vec_full_reg_offset(s
, rn
),
131 vec_full_reg_offset(s
, rm
), vsz
, vsz
);
136 /* Invoke a vector move on two Zregs. */
137 static bool do_mov_z(DisasContext
*s
, int rd
, int rn
)
139 return do_vector2_z(s
, tcg_gen_gvec_mov
, 0, rd
, rn
);
142 /* Initialize a Zreg with replications of a 64-bit immediate. */
143 static void do_dupi_z(DisasContext
*s
, int rd
, uint64_t word
)
145 unsigned vsz
= vec_full_reg_size(s
);
146 tcg_gen_gvec_dup64i(vec_full_reg_offset(s
, rd
), vsz
, vsz
, word
);
149 /* Invoke a vector expander on two Pregs. */
150 static bool do_vector2_p(DisasContext
*s
, GVecGen2Fn
*gvec_fn
,
151 int esz
, int rd
, int rn
)
153 if (sve_access_check(s
)) {
154 unsigned psz
= pred_gvec_reg_size(s
);
155 gvec_fn(esz
, pred_full_reg_offset(s
, rd
),
156 pred_full_reg_offset(s
, rn
), psz
, psz
);
161 /* Invoke a vector expander on three Pregs. */
162 static bool do_vector3_p(DisasContext
*s
, GVecGen3Fn
*gvec_fn
,
163 int esz
, int rd
, int rn
, int rm
)
165 if (sve_access_check(s
)) {
166 unsigned psz
= pred_gvec_reg_size(s
);
167 gvec_fn(esz
, pred_full_reg_offset(s
, rd
),
168 pred_full_reg_offset(s
, rn
),
169 pred_full_reg_offset(s
, rm
), psz
, psz
);
174 /* Invoke a vector operation on four Pregs. */
175 static bool do_vecop4_p(DisasContext
*s
, const GVecGen4
*gvec_op
,
176 int rd
, int rn
, int rm
, int rg
)
178 if (sve_access_check(s
)) {
179 unsigned psz
= pred_gvec_reg_size(s
);
180 tcg_gen_gvec_4(pred_full_reg_offset(s
, rd
),
181 pred_full_reg_offset(s
, rn
),
182 pred_full_reg_offset(s
, rm
),
183 pred_full_reg_offset(s
, rg
),
189 /* Invoke a vector move on two Pregs. */
190 static bool do_mov_p(DisasContext
*s
, int rd
, int rn
)
192 return do_vector2_p(s
, tcg_gen_gvec_mov
, 0, rd
, rn
);
195 /* Set the cpu flags as per a return from an SVE helper. */
196 static void do_pred_flags(TCGv_i32 t
)
198 tcg_gen_mov_i32(cpu_NF
, t
);
199 tcg_gen_andi_i32(cpu_ZF
, t
, 2);
200 tcg_gen_andi_i32(cpu_CF
, t
, 1);
201 tcg_gen_movi_i32(cpu_VF
, 0);
204 /* Subroutines computing the ARM PredTest psuedofunction. */
205 static void do_predtest1(TCGv_i64 d
, TCGv_i64 g
)
207 TCGv_i32 t
= tcg_temp_new_i32();
209 gen_helper_sve_predtest1(t
, d
, g
);
211 tcg_temp_free_i32(t
);
214 static void do_predtest(DisasContext
*s
, int dofs
, int gofs
, int words
)
216 TCGv_ptr dptr
= tcg_temp_new_ptr();
217 TCGv_ptr gptr
= tcg_temp_new_ptr();
220 tcg_gen_addi_ptr(dptr
, cpu_env
, dofs
);
221 tcg_gen_addi_ptr(gptr
, cpu_env
, gofs
);
222 t
= tcg_const_i32(words
);
224 gen_helper_sve_predtest(t
, dptr
, gptr
, t
);
225 tcg_temp_free_ptr(dptr
);
226 tcg_temp_free_ptr(gptr
);
229 tcg_temp_free_i32(t
);
232 /* For each element size, the bits within a predicate word that are active. */
233 const uint64_t pred_esz_masks
[4] = {
234 0xffffffffffffffffull
, 0x5555555555555555ull
,
235 0x1111111111111111ull
, 0x0101010101010101ull
239 *** SVE Logical - Unpredicated Group
242 static bool trans_AND_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
244 return do_vector3_z(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->rm
);
247 static bool trans_ORR_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
249 if (a
->rn
== a
->rm
) { /* MOV */
250 return do_mov_z(s
, a
->rd
, a
->rn
);
252 return do_vector3_z(s
, tcg_gen_gvec_or
, 0, a
->rd
, a
->rn
, a
->rm
);
256 static bool trans_EOR_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
258 return do_vector3_z(s
, tcg_gen_gvec_xor
, 0, a
->rd
, a
->rn
, a
->rm
);
261 static bool trans_BIC_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
263 return do_vector3_z(s
, tcg_gen_gvec_andc
, 0, a
->rd
, a
->rn
, a
->rm
);
267 *** SVE Integer Arithmetic - Unpredicated Group
270 static bool trans_ADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
272 return do_vector3_z(s
, tcg_gen_gvec_add
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
275 static bool trans_SUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
277 return do_vector3_z(s
, tcg_gen_gvec_sub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
280 static bool trans_SQADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
282 return do_vector3_z(s
, tcg_gen_gvec_ssadd
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
285 static bool trans_SQSUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
287 return do_vector3_z(s
, tcg_gen_gvec_sssub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
290 static bool trans_UQADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
292 return do_vector3_z(s
, tcg_gen_gvec_usadd
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
295 static bool trans_UQSUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
297 return do_vector3_z(s
, tcg_gen_gvec_ussub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
301 *** SVE Integer Arithmetic - Binary Predicated Group
304 static bool do_zpzz_ool(DisasContext
*s
, arg_rprr_esz
*a
, gen_helper_gvec_4
*fn
)
306 unsigned vsz
= vec_full_reg_size(s
);
310 if (sve_access_check(s
)) {
311 tcg_gen_gvec_4_ool(vec_full_reg_offset(s
, a
->rd
),
312 vec_full_reg_offset(s
, a
->rn
),
313 vec_full_reg_offset(s
, a
->rm
),
314 pred_full_reg_offset(s
, a
->pg
),
320 #define DO_ZPZZ(NAME, name) \
321 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
324 static gen_helper_gvec_4 * const fns[4] = { \
325 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
326 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
328 return do_zpzz_ool(s, a, fns[a->esz]); \
347 DO_ZPZZ(SMULH
, smulh
)
348 DO_ZPZZ(UMULH
, umulh
)
354 static bool trans_SDIV_zpzz(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
356 static gen_helper_gvec_4
* const fns
[4] = {
357 NULL
, NULL
, gen_helper_sve_sdiv_zpzz_s
, gen_helper_sve_sdiv_zpzz_d
359 return do_zpzz_ool(s
, a
, fns
[a
->esz
]);
362 static bool trans_UDIV_zpzz(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
364 static gen_helper_gvec_4
* const fns
[4] = {
365 NULL
, NULL
, gen_helper_sve_udiv_zpzz_s
, gen_helper_sve_udiv_zpzz_d
367 return do_zpzz_ool(s
, a
, fns
[a
->esz
]);
373 *** SVE Integer Arithmetic - Unary Predicated Group
376 static bool do_zpz_ool(DisasContext
*s
, arg_rpr_esz
*a
, gen_helper_gvec_3
*fn
)
381 if (sve_access_check(s
)) {
382 unsigned vsz
= vec_full_reg_size(s
);
383 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
384 vec_full_reg_offset(s
, a
->rn
),
385 pred_full_reg_offset(s
, a
->pg
),
391 #define DO_ZPZ(NAME, name) \
392 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
394 static gen_helper_gvec_3 * const fns[4] = { \
395 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
396 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
398 return do_zpz_ool(s, a, fns[a->esz]); \
403 DO_ZPZ(CNT_zpz
, cnt_zpz
)
405 DO_ZPZ(NOT_zpz
, not_zpz
)
409 static bool trans_FABS(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
411 static gen_helper_gvec_3
* const fns
[4] = {
413 gen_helper_sve_fabs_h
,
414 gen_helper_sve_fabs_s
,
415 gen_helper_sve_fabs_d
417 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
420 static bool trans_FNEG(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
422 static gen_helper_gvec_3
* const fns
[4] = {
424 gen_helper_sve_fneg_h
,
425 gen_helper_sve_fneg_s
,
426 gen_helper_sve_fneg_d
428 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
431 static bool trans_SXTB(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
433 static gen_helper_gvec_3
* const fns
[4] = {
435 gen_helper_sve_sxtb_h
,
436 gen_helper_sve_sxtb_s
,
437 gen_helper_sve_sxtb_d
439 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
442 static bool trans_UXTB(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
444 static gen_helper_gvec_3
* const fns
[4] = {
446 gen_helper_sve_uxtb_h
,
447 gen_helper_sve_uxtb_s
,
448 gen_helper_sve_uxtb_d
450 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
453 static bool trans_SXTH(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
455 static gen_helper_gvec_3
* const fns
[4] = {
457 gen_helper_sve_sxth_s
,
458 gen_helper_sve_sxth_d
460 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
463 static bool trans_UXTH(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
465 static gen_helper_gvec_3
* const fns
[4] = {
467 gen_helper_sve_uxth_s
,
468 gen_helper_sve_uxth_d
470 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
473 static bool trans_SXTW(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
475 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_sxtw_d
: NULL
);
478 static bool trans_UXTW(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
480 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_uxtw_d
: NULL
);
486 *** SVE Integer Reduction Group
489 typedef void gen_helper_gvec_reduc(TCGv_i64
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
490 static bool do_vpz_ool(DisasContext
*s
, arg_rpr_esz
*a
,
491 gen_helper_gvec_reduc
*fn
)
493 unsigned vsz
= vec_full_reg_size(s
);
501 if (!sve_access_check(s
)) {
505 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
506 temp
= tcg_temp_new_i64();
507 t_zn
= tcg_temp_new_ptr();
508 t_pg
= tcg_temp_new_ptr();
510 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
511 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
512 fn(temp
, t_zn
, t_pg
, desc
);
513 tcg_temp_free_ptr(t_zn
);
514 tcg_temp_free_ptr(t_pg
);
515 tcg_temp_free_i32(desc
);
517 write_fp_dreg(s
, a
->rd
, temp
);
518 tcg_temp_free_i64(temp
);
522 #define DO_VPZ(NAME, name) \
523 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
525 static gen_helper_gvec_reduc * const fns[4] = { \
526 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
527 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
529 return do_vpz_ool(s, a, fns[a->esz]); \
542 static bool trans_SADDV(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
544 static gen_helper_gvec_reduc
* const fns
[4] = {
545 gen_helper_sve_saddv_b
, gen_helper_sve_saddv_h
,
546 gen_helper_sve_saddv_s
, NULL
548 return do_vpz_ool(s
, a
, fns
[a
->esz
]);
554 *** SVE Shift by Immediate - Predicated Group
557 /* Store zero into every active element of Zd. We will use this for two
558 * and three-operand predicated instructions for which logic dictates a
561 static bool do_clr_zp(DisasContext
*s
, int rd
, int pg
, int esz
)
563 static gen_helper_gvec_2
* const fns
[4] = {
564 gen_helper_sve_clr_b
, gen_helper_sve_clr_h
,
565 gen_helper_sve_clr_s
, gen_helper_sve_clr_d
,
567 if (sve_access_check(s
)) {
568 unsigned vsz
= vec_full_reg_size(s
);
569 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, rd
),
570 pred_full_reg_offset(s
, pg
),
571 vsz
, vsz
, 0, fns
[esz
]);
576 static bool do_zpzi_ool(DisasContext
*s
, arg_rpri_esz
*a
,
577 gen_helper_gvec_3
*fn
)
579 if (sve_access_check(s
)) {
580 unsigned vsz
= vec_full_reg_size(s
);
581 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
582 vec_full_reg_offset(s
, a
->rn
),
583 pred_full_reg_offset(s
, a
->pg
),
584 vsz
, vsz
, a
->imm
, fn
);
589 static bool trans_ASR_zpzi(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
591 static gen_helper_gvec_3
* const fns
[4] = {
592 gen_helper_sve_asr_zpzi_b
, gen_helper_sve_asr_zpzi_h
,
593 gen_helper_sve_asr_zpzi_s
, gen_helper_sve_asr_zpzi_d
,
596 /* Invalid tsz encoding -- see tszimm_esz. */
599 /* Shift by element size is architecturally valid. For
600 arithmetic right-shift, it's the same as by one less. */
601 a
->imm
= MIN(a
->imm
, (8 << a
->esz
) - 1);
602 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
605 static bool trans_LSR_zpzi(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
607 static gen_helper_gvec_3
* const fns
[4] = {
608 gen_helper_sve_lsr_zpzi_b
, gen_helper_sve_lsr_zpzi_h
,
609 gen_helper_sve_lsr_zpzi_s
, gen_helper_sve_lsr_zpzi_d
,
614 /* Shift by element size is architecturally valid.
615 For logical shifts, it is a zeroing operation. */
616 if (a
->imm
>= (8 << a
->esz
)) {
617 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
619 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
623 static bool trans_LSL_zpzi(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
625 static gen_helper_gvec_3
* const fns
[4] = {
626 gen_helper_sve_lsl_zpzi_b
, gen_helper_sve_lsl_zpzi_h
,
627 gen_helper_sve_lsl_zpzi_s
, gen_helper_sve_lsl_zpzi_d
,
632 /* Shift by element size is architecturally valid.
633 For logical shifts, it is a zeroing operation. */
634 if (a
->imm
>= (8 << a
->esz
)) {
635 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
637 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
641 static bool trans_ASRD(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
643 static gen_helper_gvec_3
* const fns
[4] = {
644 gen_helper_sve_asrd_b
, gen_helper_sve_asrd_h
,
645 gen_helper_sve_asrd_s
, gen_helper_sve_asrd_d
,
650 /* Shift by element size is architecturally valid. For arithmetic
651 right shift for division, it is a zeroing operation. */
652 if (a
->imm
>= (8 << a
->esz
)) {
653 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
655 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
660 *** SVE Bitwise Shift - Predicated Group
663 #define DO_ZPZW(NAME, name) \
664 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
667 static gen_helper_gvec_4 * const fns[3] = { \
668 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
669 gen_helper_sve_##name##_zpzw_s, \
671 if (a->esz < 0 || a->esz >= 3) { \
674 return do_zpzz_ool(s, a, fns[a->esz]); \
684 *** SVE Bitwise Shift - Unpredicated Group
687 static bool do_shift_imm(DisasContext
*s
, arg_rri_esz
*a
, bool asr
,
688 void (*gvec_fn
)(unsigned, uint32_t, uint32_t,
689 int64_t, uint32_t, uint32_t))
692 /* Invalid tsz encoding -- see tszimm_esz. */
695 if (sve_access_check(s
)) {
696 unsigned vsz
= vec_full_reg_size(s
);
697 /* Shift by element size is architecturally valid. For
698 arithmetic right-shift, it's the same as by one less.
699 Otherwise it is a zeroing operation. */
700 if (a
->imm
>= 8 << a
->esz
) {
702 a
->imm
= (8 << a
->esz
) - 1;
704 do_dupi_z(s
, a
->rd
, 0);
708 gvec_fn(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
709 vec_full_reg_offset(s
, a
->rn
), a
->imm
, vsz
, vsz
);
714 static bool trans_ASR_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
716 return do_shift_imm(s
, a
, true, tcg_gen_gvec_sari
);
719 static bool trans_LSR_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
721 return do_shift_imm(s
, a
, false, tcg_gen_gvec_shri
);
724 static bool trans_LSL_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
726 return do_shift_imm(s
, a
, false, tcg_gen_gvec_shli
);
729 static bool do_zzw_ool(DisasContext
*s
, arg_rrr_esz
*a
, gen_helper_gvec_3
*fn
)
734 if (sve_access_check(s
)) {
735 unsigned vsz
= vec_full_reg_size(s
);
736 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
737 vec_full_reg_offset(s
, a
->rn
),
738 vec_full_reg_offset(s
, a
->rm
),
744 #define DO_ZZW(NAME, name) \
745 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
748 static gen_helper_gvec_3 * const fns[4] = { \
749 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
750 gen_helper_sve_##name##_zzw_s, NULL \
752 return do_zzw_ool(s, a, fns[a->esz]); \
762 *** SVE Integer Multiply-Add Group
765 static bool do_zpzzz_ool(DisasContext
*s
, arg_rprrr_esz
*a
,
766 gen_helper_gvec_5
*fn
)
768 if (sve_access_check(s
)) {
769 unsigned vsz
= vec_full_reg_size(s
);
770 tcg_gen_gvec_5_ool(vec_full_reg_offset(s
, a
->rd
),
771 vec_full_reg_offset(s
, a
->ra
),
772 vec_full_reg_offset(s
, a
->rn
),
773 vec_full_reg_offset(s
, a
->rm
),
774 pred_full_reg_offset(s
, a
->pg
),
780 #define DO_ZPZZZ(NAME, name) \
781 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
783 static gen_helper_gvec_5 * const fns[4] = { \
784 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
785 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
787 return do_zpzzz_ool(s, a, fns[a->esz]); \
796 *** SVE Index Generation Group
799 static void do_index(DisasContext
*s
, int esz
, int rd
,
800 TCGv_i64 start
, TCGv_i64 incr
)
802 unsigned vsz
= vec_full_reg_size(s
);
803 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
804 TCGv_ptr t_zd
= tcg_temp_new_ptr();
806 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, rd
));
808 gen_helper_sve_index_d(t_zd
, start
, incr
, desc
);
810 typedef void index_fn(TCGv_ptr
, TCGv_i32
, TCGv_i32
, TCGv_i32
);
811 static index_fn
* const fns
[3] = {
812 gen_helper_sve_index_b
,
813 gen_helper_sve_index_h
,
814 gen_helper_sve_index_s
,
816 TCGv_i32 s32
= tcg_temp_new_i32();
817 TCGv_i32 i32
= tcg_temp_new_i32();
819 tcg_gen_extrl_i64_i32(s32
, start
);
820 tcg_gen_extrl_i64_i32(i32
, incr
);
821 fns
[esz
](t_zd
, s32
, i32
, desc
);
823 tcg_temp_free_i32(s32
);
824 tcg_temp_free_i32(i32
);
826 tcg_temp_free_ptr(t_zd
);
827 tcg_temp_free_i32(desc
);
830 static bool trans_INDEX_ii(DisasContext
*s
, arg_INDEX_ii
*a
, uint32_t insn
)
832 if (sve_access_check(s
)) {
833 TCGv_i64 start
= tcg_const_i64(a
->imm1
);
834 TCGv_i64 incr
= tcg_const_i64(a
->imm2
);
835 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
836 tcg_temp_free_i64(start
);
837 tcg_temp_free_i64(incr
);
842 static bool trans_INDEX_ir(DisasContext
*s
, arg_INDEX_ir
*a
, uint32_t insn
)
844 if (sve_access_check(s
)) {
845 TCGv_i64 start
= tcg_const_i64(a
->imm
);
846 TCGv_i64 incr
= cpu_reg(s
, a
->rm
);
847 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
848 tcg_temp_free_i64(start
);
853 static bool trans_INDEX_ri(DisasContext
*s
, arg_INDEX_ri
*a
, uint32_t insn
)
855 if (sve_access_check(s
)) {
856 TCGv_i64 start
= cpu_reg(s
, a
->rn
);
857 TCGv_i64 incr
= tcg_const_i64(a
->imm
);
858 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
859 tcg_temp_free_i64(incr
);
864 static bool trans_INDEX_rr(DisasContext
*s
, arg_INDEX_rr
*a
, uint32_t insn
)
866 if (sve_access_check(s
)) {
867 TCGv_i64 start
= cpu_reg(s
, a
->rn
);
868 TCGv_i64 incr
= cpu_reg(s
, a
->rm
);
869 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
875 *** SVE Stack Allocation Group
878 static bool trans_ADDVL(DisasContext
*s
, arg_ADDVL
*a
, uint32_t insn
)
880 TCGv_i64 rd
= cpu_reg_sp(s
, a
->rd
);
881 TCGv_i64 rn
= cpu_reg_sp(s
, a
->rn
);
882 tcg_gen_addi_i64(rd
, rn
, a
->imm
* vec_full_reg_size(s
));
886 static bool trans_ADDPL(DisasContext
*s
, arg_ADDPL
*a
, uint32_t insn
)
888 TCGv_i64 rd
= cpu_reg_sp(s
, a
->rd
);
889 TCGv_i64 rn
= cpu_reg_sp(s
, a
->rn
);
890 tcg_gen_addi_i64(rd
, rn
, a
->imm
* pred_full_reg_size(s
));
894 static bool trans_RDVL(DisasContext
*s
, arg_RDVL
*a
, uint32_t insn
)
896 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
897 tcg_gen_movi_i64(reg
, a
->imm
* vec_full_reg_size(s
));
902 *** SVE Compute Vector Address Group
905 static bool do_adr(DisasContext
*s
, arg_rrri
*a
, gen_helper_gvec_3
*fn
)
907 if (sve_access_check(s
)) {
908 unsigned vsz
= vec_full_reg_size(s
);
909 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
910 vec_full_reg_offset(s
, a
->rn
),
911 vec_full_reg_offset(s
, a
->rm
),
912 vsz
, vsz
, a
->imm
, fn
);
917 static bool trans_ADR_p32(DisasContext
*s
, arg_rrri
*a
, uint32_t insn
)
919 return do_adr(s
, a
, gen_helper_sve_adr_p32
);
922 static bool trans_ADR_p64(DisasContext
*s
, arg_rrri
*a
, uint32_t insn
)
924 return do_adr(s
, a
, gen_helper_sve_adr_p64
);
927 static bool trans_ADR_s32(DisasContext
*s
, arg_rrri
*a
, uint32_t insn
)
929 return do_adr(s
, a
, gen_helper_sve_adr_s32
);
932 static bool trans_ADR_u32(DisasContext
*s
, arg_rrri
*a
, uint32_t insn
)
934 return do_adr(s
, a
, gen_helper_sve_adr_u32
);
938 *** SVE Integer Misc - Unpredicated Group
941 static bool trans_FEXPA(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
943 static gen_helper_gvec_2
* const fns
[4] = {
945 gen_helper_sve_fexpa_h
,
946 gen_helper_sve_fexpa_s
,
947 gen_helper_sve_fexpa_d
,
952 if (sve_access_check(s
)) {
953 unsigned vsz
= vec_full_reg_size(s
);
954 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, a
->rd
),
955 vec_full_reg_offset(s
, a
->rn
),
956 vsz
, vsz
, 0, fns
[a
->esz
]);
961 static bool trans_FTSSEL(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
963 static gen_helper_gvec_3
* const fns
[4] = {
965 gen_helper_sve_ftssel_h
,
966 gen_helper_sve_ftssel_s
,
967 gen_helper_sve_ftssel_d
,
972 if (sve_access_check(s
)) {
973 unsigned vsz
= vec_full_reg_size(s
);
974 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
975 vec_full_reg_offset(s
, a
->rn
),
976 vec_full_reg_offset(s
, a
->rm
),
977 vsz
, vsz
, 0, fns
[a
->esz
]);
983 *** SVE Predicate Logical Operations Group
986 static bool do_pppp_flags(DisasContext
*s
, arg_rprr_s
*a
,
987 const GVecGen4
*gvec_op
)
989 if (!sve_access_check(s
)) {
993 unsigned psz
= pred_gvec_reg_size(s
);
994 int dofs
= pred_full_reg_offset(s
, a
->rd
);
995 int nofs
= pred_full_reg_offset(s
, a
->rn
);
996 int mofs
= pred_full_reg_offset(s
, a
->rm
);
997 int gofs
= pred_full_reg_offset(s
, a
->pg
);
1000 /* Do the operation and the flags generation in temps. */
1001 TCGv_i64 pd
= tcg_temp_new_i64();
1002 TCGv_i64 pn
= tcg_temp_new_i64();
1003 TCGv_i64 pm
= tcg_temp_new_i64();
1004 TCGv_i64 pg
= tcg_temp_new_i64();
1006 tcg_gen_ld_i64(pn
, cpu_env
, nofs
);
1007 tcg_gen_ld_i64(pm
, cpu_env
, mofs
);
1008 tcg_gen_ld_i64(pg
, cpu_env
, gofs
);
1010 gvec_op
->fni8(pd
, pn
, pm
, pg
);
1011 tcg_gen_st_i64(pd
, cpu_env
, dofs
);
1013 do_predtest1(pd
, pg
);
1015 tcg_temp_free_i64(pd
);
1016 tcg_temp_free_i64(pn
);
1017 tcg_temp_free_i64(pm
);
1018 tcg_temp_free_i64(pg
);
1020 /* The operation and flags generation is large. The computation
1021 * of the flags depends on the original contents of the guarding
1022 * predicate. If the destination overwrites the guarding predicate,
1023 * then the easiest way to get this right is to save a copy.
1026 if (a
->rd
== a
->pg
) {
1027 tofs
= offsetof(CPUARMState
, vfp
.preg_tmp
);
1028 tcg_gen_gvec_mov(0, tofs
, gofs
, psz
, psz
);
1031 tcg_gen_gvec_4(dofs
, nofs
, mofs
, gofs
, psz
, psz
, gvec_op
);
1032 do_predtest(s
, dofs
, tofs
, psz
/ 8);
1037 static void gen_and_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1039 tcg_gen_and_i64(pd
, pn
, pm
);
1040 tcg_gen_and_i64(pd
, pd
, pg
);
1043 static void gen_and_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1044 TCGv_vec pm
, TCGv_vec pg
)
1046 tcg_gen_and_vec(vece
, pd
, pn
, pm
);
1047 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1050 static bool trans_AND_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1052 static const GVecGen4 op
= {
1053 .fni8
= gen_and_pg_i64
,
1054 .fniv
= gen_and_pg_vec
,
1055 .fno
= gen_helper_sve_and_pppp
,
1056 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1059 return do_pppp_flags(s
, a
, &op
);
1060 } else if (a
->rn
== a
->rm
) {
1061 if (a
->pg
== a
->rn
) {
1062 return do_mov_p(s
, a
->rd
, a
->rn
);
1064 return do_vector3_p(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->pg
);
1066 } else if (a
->pg
== a
->rn
|| a
->pg
== a
->rm
) {
1067 return do_vector3_p(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->rm
);
1069 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1073 static void gen_bic_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1075 tcg_gen_andc_i64(pd
, pn
, pm
);
1076 tcg_gen_and_i64(pd
, pd
, pg
);
1079 static void gen_bic_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1080 TCGv_vec pm
, TCGv_vec pg
)
1082 tcg_gen_andc_vec(vece
, pd
, pn
, pm
);
1083 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1086 static bool trans_BIC_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1088 static const GVecGen4 op
= {
1089 .fni8
= gen_bic_pg_i64
,
1090 .fniv
= gen_bic_pg_vec
,
1091 .fno
= gen_helper_sve_bic_pppp
,
1092 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1095 return do_pppp_flags(s
, a
, &op
);
1096 } else if (a
->pg
== a
->rn
) {
1097 return do_vector3_p(s
, tcg_gen_gvec_andc
, 0, a
->rd
, a
->rn
, a
->rm
);
1099 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1103 static void gen_eor_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1105 tcg_gen_xor_i64(pd
, pn
, pm
);
1106 tcg_gen_and_i64(pd
, pd
, pg
);
1109 static void gen_eor_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1110 TCGv_vec pm
, TCGv_vec pg
)
1112 tcg_gen_xor_vec(vece
, pd
, pn
, pm
);
1113 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1116 static bool trans_EOR_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1118 static const GVecGen4 op
= {
1119 .fni8
= gen_eor_pg_i64
,
1120 .fniv
= gen_eor_pg_vec
,
1121 .fno
= gen_helper_sve_eor_pppp
,
1122 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1125 return do_pppp_flags(s
, a
, &op
);
1127 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1131 static void gen_sel_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1133 tcg_gen_and_i64(pn
, pn
, pg
);
1134 tcg_gen_andc_i64(pm
, pm
, pg
);
1135 tcg_gen_or_i64(pd
, pn
, pm
);
1138 static void gen_sel_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1139 TCGv_vec pm
, TCGv_vec pg
)
1141 tcg_gen_and_vec(vece
, pn
, pn
, pg
);
1142 tcg_gen_andc_vec(vece
, pm
, pm
, pg
);
1143 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1146 static bool trans_SEL_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1148 static const GVecGen4 op
= {
1149 .fni8
= gen_sel_pg_i64
,
1150 .fniv
= gen_sel_pg_vec
,
1151 .fno
= gen_helper_sve_sel_pppp
,
1152 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1157 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1161 static void gen_orr_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1163 tcg_gen_or_i64(pd
, pn
, pm
);
1164 tcg_gen_and_i64(pd
, pd
, pg
);
1167 static void gen_orr_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1168 TCGv_vec pm
, TCGv_vec pg
)
1170 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1171 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1174 static bool trans_ORR_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1176 static const GVecGen4 op
= {
1177 .fni8
= gen_orr_pg_i64
,
1178 .fniv
= gen_orr_pg_vec
,
1179 .fno
= gen_helper_sve_orr_pppp
,
1180 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1183 return do_pppp_flags(s
, a
, &op
);
1184 } else if (a
->pg
== a
->rn
&& a
->rn
== a
->rm
) {
1185 return do_mov_p(s
, a
->rd
, a
->rn
);
1187 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1191 static void gen_orn_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1193 tcg_gen_orc_i64(pd
, pn
, pm
);
1194 tcg_gen_and_i64(pd
, pd
, pg
);
1197 static void gen_orn_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1198 TCGv_vec pm
, TCGv_vec pg
)
1200 tcg_gen_orc_vec(vece
, pd
, pn
, pm
);
1201 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1204 static bool trans_ORN_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1206 static const GVecGen4 op
= {
1207 .fni8
= gen_orn_pg_i64
,
1208 .fniv
= gen_orn_pg_vec
,
1209 .fno
= gen_helper_sve_orn_pppp
,
1210 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1213 return do_pppp_flags(s
, a
, &op
);
1215 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1219 static void gen_nor_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1221 tcg_gen_or_i64(pd
, pn
, pm
);
1222 tcg_gen_andc_i64(pd
, pg
, pd
);
1225 static void gen_nor_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1226 TCGv_vec pm
, TCGv_vec pg
)
1228 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1229 tcg_gen_andc_vec(vece
, pd
, pg
, pd
);
1232 static bool trans_NOR_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1234 static const GVecGen4 op
= {
1235 .fni8
= gen_nor_pg_i64
,
1236 .fniv
= gen_nor_pg_vec
,
1237 .fno
= gen_helper_sve_nor_pppp
,
1238 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1241 return do_pppp_flags(s
, a
, &op
);
1243 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1247 static void gen_nand_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1249 tcg_gen_and_i64(pd
, pn
, pm
);
1250 tcg_gen_andc_i64(pd
, pg
, pd
);
1253 static void gen_nand_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1254 TCGv_vec pm
, TCGv_vec pg
)
1256 tcg_gen_and_vec(vece
, pd
, pn
, pm
);
1257 tcg_gen_andc_vec(vece
, pd
, pg
, pd
);
1260 static bool trans_NAND_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1262 static const GVecGen4 op
= {
1263 .fni8
= gen_nand_pg_i64
,
1264 .fniv
= gen_nand_pg_vec
,
1265 .fno
= gen_helper_sve_nand_pppp
,
1266 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1269 return do_pppp_flags(s
, a
, &op
);
1271 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1276 *** SVE Predicate Misc Group
1279 static bool trans_PTEST(DisasContext
*s
, arg_PTEST
*a
, uint32_t insn
)
1281 if (sve_access_check(s
)) {
1282 int nofs
= pred_full_reg_offset(s
, a
->rn
);
1283 int gofs
= pred_full_reg_offset(s
, a
->pg
);
1284 int words
= DIV_ROUND_UP(pred_full_reg_size(s
), 8);
1287 TCGv_i64 pn
= tcg_temp_new_i64();
1288 TCGv_i64 pg
= tcg_temp_new_i64();
1290 tcg_gen_ld_i64(pn
, cpu_env
, nofs
);
1291 tcg_gen_ld_i64(pg
, cpu_env
, gofs
);
1292 do_predtest1(pn
, pg
);
1294 tcg_temp_free_i64(pn
);
1295 tcg_temp_free_i64(pg
);
1297 do_predtest(s
, nofs
, gofs
, words
);
1303 /* See the ARM pseudocode DecodePredCount. */
1304 static unsigned decode_pred_count(unsigned fullsz
, int pattern
, int esz
)
1306 unsigned elements
= fullsz
>> esz
;
1310 case 0x0: /* POW2 */
1311 return pow2floor(elements
);
1322 case 0x9: /* VL16 */
1323 case 0xa: /* VL32 */
1324 case 0xb: /* VL64 */
1325 case 0xc: /* VL128 */
1326 case 0xd: /* VL256 */
1327 bound
= 16 << (pattern
- 9);
1329 case 0x1d: /* MUL4 */
1330 return elements
- elements
% 4;
1331 case 0x1e: /* MUL3 */
1332 return elements
- elements
% 3;
1333 case 0x1f: /* ALL */
1335 default: /* #uimm5 */
1338 return elements
>= bound
? bound
: 0;
1341 /* This handles all of the predicate initialization instructions,
1342 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1343 * so that decode_pred_count returns 0. For SETFFR, we will have
1344 * set RD == 16 == FFR.
1346 static bool do_predset(DisasContext
*s
, int esz
, int rd
, int pat
, bool setflag
)
1348 if (!sve_access_check(s
)) {
1352 unsigned fullsz
= vec_full_reg_size(s
);
1353 unsigned ofs
= pred_full_reg_offset(s
, rd
);
1354 unsigned numelem
, setsz
, i
;
1355 uint64_t word
, lastword
;
1358 numelem
= decode_pred_count(fullsz
, pat
, esz
);
1360 /* Determine what we must store into each bit, and how many. */
1362 lastword
= word
= 0;
1365 setsz
= numelem
<< esz
;
1366 lastword
= word
= pred_esz_masks
[esz
];
1368 lastword
&= ~(-1ull << (setsz
% 64));
1372 t
= tcg_temp_new_i64();
1374 tcg_gen_movi_i64(t
, lastword
);
1375 tcg_gen_st_i64(t
, cpu_env
, ofs
);
1379 if (word
== lastword
) {
1380 unsigned maxsz
= size_for_gvec(fullsz
/ 8);
1381 unsigned oprsz
= size_for_gvec(setsz
/ 8);
1383 if (oprsz
* 8 == setsz
) {
1384 tcg_gen_gvec_dup64i(ofs
, oprsz
, maxsz
, word
);
1387 if (oprsz
* 8 == setsz
+ 8) {
1388 tcg_gen_gvec_dup64i(ofs
, oprsz
, maxsz
, word
);
1389 tcg_gen_movi_i64(t
, 0);
1390 tcg_gen_st_i64(t
, cpu_env
, ofs
+ oprsz
- 8);
1398 tcg_gen_movi_i64(t
, word
);
1399 for (i
= 0; i
< setsz
; i
+= 8) {
1400 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1402 if (lastword
!= word
) {
1403 tcg_gen_movi_i64(t
, lastword
);
1404 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1408 tcg_gen_movi_i64(t
, 0);
1409 for (; i
< fullsz
; i
+= 8) {
1410 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1415 tcg_temp_free_i64(t
);
1419 tcg_gen_movi_i32(cpu_NF
, -(word
!= 0));
1420 tcg_gen_movi_i32(cpu_CF
, word
== 0);
1421 tcg_gen_movi_i32(cpu_VF
, 0);
1422 tcg_gen_mov_i32(cpu_ZF
, cpu_NF
);
1427 static bool trans_PTRUE(DisasContext
*s
, arg_PTRUE
*a
, uint32_t insn
)
1429 return do_predset(s
, a
->esz
, a
->rd
, a
->pat
, a
->s
);
1432 static bool trans_SETFFR(DisasContext
*s
, arg_SETFFR
*a
, uint32_t insn
)
1434 /* Note pat == 31 is #all, to set all elements. */
1435 return do_predset(s
, 0, FFR_PRED_NUM
, 31, false);
1438 static bool trans_PFALSE(DisasContext
*s
, arg_PFALSE
*a
, uint32_t insn
)
1440 /* Note pat == 32 is #unimp, to set no elements. */
1441 return do_predset(s
, 0, a
->rd
, 32, false);
1444 static bool trans_RDFFR_p(DisasContext
*s
, arg_RDFFR_p
*a
, uint32_t insn
)
1446 /* The path through do_pppp_flags is complicated enough to want to avoid
1447 * duplication. Frob the arguments into the form of a predicated AND.
1449 arg_rprr_s alt_a
= {
1450 .rd
= a
->rd
, .pg
= a
->pg
, .s
= a
->s
,
1451 .rn
= FFR_PRED_NUM
, .rm
= FFR_PRED_NUM
,
1453 return trans_AND_pppp(s
, &alt_a
, insn
);
1456 static bool trans_RDFFR(DisasContext
*s
, arg_RDFFR
*a
, uint32_t insn
)
1458 return do_mov_p(s
, a
->rd
, FFR_PRED_NUM
);
1461 static bool trans_WRFFR(DisasContext
*s
, arg_WRFFR
*a
, uint32_t insn
)
1463 return do_mov_p(s
, FFR_PRED_NUM
, a
->rn
);
1466 static bool do_pfirst_pnext(DisasContext
*s
, arg_rr_esz
*a
,
1467 void (*gen_fn
)(TCGv_i32
, TCGv_ptr
,
1468 TCGv_ptr
, TCGv_i32
))
1470 if (!sve_access_check(s
)) {
1474 TCGv_ptr t_pd
= tcg_temp_new_ptr();
1475 TCGv_ptr t_pg
= tcg_temp_new_ptr();
1479 desc
= DIV_ROUND_UP(pred_full_reg_size(s
), 8);
1480 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
1482 tcg_gen_addi_ptr(t_pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
1483 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
1484 t
= tcg_const_i32(desc
);
1486 gen_fn(t
, t_pd
, t_pg
, t
);
1487 tcg_temp_free_ptr(t_pd
);
1488 tcg_temp_free_ptr(t_pg
);
1491 tcg_temp_free_i32(t
);
1495 static bool trans_PFIRST(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
1497 return do_pfirst_pnext(s
, a
, gen_helper_sve_pfirst
);
1500 static bool trans_PNEXT(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
1502 return do_pfirst_pnext(s
, a
, gen_helper_sve_pnext
);
1506 *** SVE Element Count Group
1509 /* Perform an inline saturating addition of a 32-bit value within
1510 * a 64-bit register. The second operand is known to be positive,
1511 * which halves the comparisions we must perform to bound the result.
1513 static void do_sat_addsub_32(TCGv_i64 reg
, TCGv_i64 val
, bool u
, bool d
)
1519 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1521 tcg_gen_ext32u_i64(reg
, reg
);
1523 tcg_gen_ext32s_i64(reg
, reg
);
1526 tcg_gen_sub_i64(reg
, reg
, val
);
1527 ibound
= (u
? 0 : INT32_MIN
);
1530 tcg_gen_add_i64(reg
, reg
, val
);
1531 ibound
= (u
? UINT32_MAX
: INT32_MAX
);
1534 bound
= tcg_const_i64(ibound
);
1535 tcg_gen_movcond_i64(cond
, reg
, reg
, bound
, bound
, reg
);
1536 tcg_temp_free_i64(bound
);
1539 /* Similarly with 64-bit values. */
1540 static void do_sat_addsub_64(TCGv_i64 reg
, TCGv_i64 val
, bool u
, bool d
)
1542 TCGv_i64 t0
= tcg_temp_new_i64();
1543 TCGv_i64 t1
= tcg_temp_new_i64();
1548 tcg_gen_sub_i64(t0
, reg
, val
);
1549 tcg_gen_movi_i64(t1
, 0);
1550 tcg_gen_movcond_i64(TCG_COND_LTU
, reg
, reg
, val
, t1
, t0
);
1552 tcg_gen_add_i64(t0
, reg
, val
);
1553 tcg_gen_movi_i64(t1
, -1);
1554 tcg_gen_movcond_i64(TCG_COND_LTU
, reg
, t0
, reg
, t1
, t0
);
1558 /* Detect signed overflow for subtraction. */
1559 tcg_gen_xor_i64(t0
, reg
, val
);
1560 tcg_gen_sub_i64(t1
, reg
, val
);
1561 tcg_gen_xor_i64(reg
, reg
, t0
);
1562 tcg_gen_and_i64(t0
, t0
, reg
);
1564 /* Bound the result. */
1565 tcg_gen_movi_i64(reg
, INT64_MIN
);
1566 t2
= tcg_const_i64(0);
1567 tcg_gen_movcond_i64(TCG_COND_LT
, reg
, t0
, t2
, reg
, t1
);
1569 /* Detect signed overflow for addition. */
1570 tcg_gen_xor_i64(t0
, reg
, val
);
1571 tcg_gen_add_i64(reg
, reg
, val
);
1572 tcg_gen_xor_i64(t1
, reg
, val
);
1573 tcg_gen_andc_i64(t0
, t1
, t0
);
1575 /* Bound the result. */
1576 tcg_gen_movi_i64(t1
, INT64_MAX
);
1577 t2
= tcg_const_i64(0);
1578 tcg_gen_movcond_i64(TCG_COND_LT
, reg
, t0
, t2
, t1
, reg
);
1580 tcg_temp_free_i64(t2
);
1582 tcg_temp_free_i64(t0
);
1583 tcg_temp_free_i64(t1
);
1586 /* Similarly with a vector and a scalar operand. */
1587 static void do_sat_addsub_vec(DisasContext
*s
, int esz
, int rd
, int rn
,
1588 TCGv_i64 val
, bool u
, bool d
)
1590 unsigned vsz
= vec_full_reg_size(s
);
1591 TCGv_ptr dptr
, nptr
;
1595 dptr
= tcg_temp_new_ptr();
1596 nptr
= tcg_temp_new_ptr();
1597 tcg_gen_addi_ptr(dptr
, cpu_env
, vec_full_reg_offset(s
, rd
));
1598 tcg_gen_addi_ptr(nptr
, cpu_env
, vec_full_reg_offset(s
, rn
));
1599 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
1603 t32
= tcg_temp_new_i32();
1604 tcg_gen_extrl_i64_i32(t32
, val
);
1606 tcg_gen_neg_i32(t32
, t32
);
1609 gen_helper_sve_uqaddi_b(dptr
, nptr
, t32
, desc
);
1611 gen_helper_sve_sqaddi_b(dptr
, nptr
, t32
, desc
);
1613 tcg_temp_free_i32(t32
);
1617 t32
= tcg_temp_new_i32();
1618 tcg_gen_extrl_i64_i32(t32
, val
);
1620 tcg_gen_neg_i32(t32
, t32
);
1623 gen_helper_sve_uqaddi_h(dptr
, nptr
, t32
, desc
);
1625 gen_helper_sve_sqaddi_h(dptr
, nptr
, t32
, desc
);
1627 tcg_temp_free_i32(t32
);
1631 t64
= tcg_temp_new_i64();
1633 tcg_gen_neg_i64(t64
, val
);
1635 tcg_gen_mov_i64(t64
, val
);
1638 gen_helper_sve_uqaddi_s(dptr
, nptr
, t64
, desc
);
1640 gen_helper_sve_sqaddi_s(dptr
, nptr
, t64
, desc
);
1642 tcg_temp_free_i64(t64
);
1648 gen_helper_sve_uqsubi_d(dptr
, nptr
, val
, desc
);
1650 gen_helper_sve_uqaddi_d(dptr
, nptr
, val
, desc
);
1653 t64
= tcg_temp_new_i64();
1654 tcg_gen_neg_i64(t64
, val
);
1655 gen_helper_sve_sqaddi_d(dptr
, nptr
, t64
, desc
);
1656 tcg_temp_free_i64(t64
);
1658 gen_helper_sve_sqaddi_d(dptr
, nptr
, val
, desc
);
1663 g_assert_not_reached();
1666 tcg_temp_free_ptr(dptr
);
1667 tcg_temp_free_ptr(nptr
);
1668 tcg_temp_free_i32(desc
);
1671 static bool trans_CNT_r(DisasContext
*s
, arg_CNT_r
*a
, uint32_t insn
)
1673 if (sve_access_check(s
)) {
1674 unsigned fullsz
= vec_full_reg_size(s
);
1675 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1676 tcg_gen_movi_i64(cpu_reg(s
, a
->rd
), numelem
* a
->imm
);
1681 static bool trans_INCDEC_r(DisasContext
*s
, arg_incdec_cnt
*a
, uint32_t insn
)
1683 if (sve_access_check(s
)) {
1684 unsigned fullsz
= vec_full_reg_size(s
);
1685 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1686 int inc
= numelem
* a
->imm
* (a
->d
? -1 : 1);
1687 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1689 tcg_gen_addi_i64(reg
, reg
, inc
);
1694 static bool trans_SINCDEC_r_32(DisasContext
*s
, arg_incdec_cnt
*a
,
1697 if (!sve_access_check(s
)) {
1701 unsigned fullsz
= vec_full_reg_size(s
);
1702 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1703 int inc
= numelem
* a
->imm
;
1704 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1706 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1709 tcg_gen_ext32u_i64(reg
, reg
);
1711 tcg_gen_ext32s_i64(reg
, reg
);
1714 TCGv_i64 t
= tcg_const_i64(inc
);
1715 do_sat_addsub_32(reg
, t
, a
->u
, a
->d
);
1716 tcg_temp_free_i64(t
);
1721 static bool trans_SINCDEC_r_64(DisasContext
*s
, arg_incdec_cnt
*a
,
1724 if (!sve_access_check(s
)) {
1728 unsigned fullsz
= vec_full_reg_size(s
);
1729 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1730 int inc
= numelem
* a
->imm
;
1731 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1734 TCGv_i64 t
= tcg_const_i64(inc
);
1735 do_sat_addsub_64(reg
, t
, a
->u
, a
->d
);
1736 tcg_temp_free_i64(t
);
1741 static bool trans_INCDEC_v(DisasContext
*s
, arg_incdec2_cnt
*a
, uint32_t insn
)
1747 unsigned fullsz
= vec_full_reg_size(s
);
1748 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1749 int inc
= numelem
* a
->imm
;
1752 if (sve_access_check(s
)) {
1753 TCGv_i64 t
= tcg_const_i64(a
->d
? -inc
: inc
);
1754 tcg_gen_gvec_adds(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
1755 vec_full_reg_offset(s
, a
->rn
),
1757 tcg_temp_free_i64(t
);
1760 do_mov_z(s
, a
->rd
, a
->rn
);
1765 static bool trans_SINCDEC_v(DisasContext
*s
, arg_incdec2_cnt
*a
,
1772 unsigned fullsz
= vec_full_reg_size(s
);
1773 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1774 int inc
= numelem
* a
->imm
;
1777 if (sve_access_check(s
)) {
1778 TCGv_i64 t
= tcg_const_i64(inc
);
1779 do_sat_addsub_vec(s
, a
->esz
, a
->rd
, a
->rn
, t
, a
->u
, a
->d
);
1780 tcg_temp_free_i64(t
);
1783 do_mov_z(s
, a
->rd
, a
->rn
);
1789 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
1792 /* Subroutine loading a vector register at VOFS of LEN bytes.
1793 * The load should begin at the address Rn + IMM.
1796 static void do_ldr(DisasContext
*s
, uint32_t vofs
, uint32_t len
,
1799 uint32_t len_align
= QEMU_ALIGN_DOWN(len
, 8);
1800 uint32_t len_remain
= len
% 8;
1801 uint32_t nparts
= len
/ 8 + ctpop8(len_remain
);
1802 int midx
= get_mem_index(s
);
1803 TCGv_i64 addr
, t0
, t1
;
1805 addr
= tcg_temp_new_i64();
1806 t0
= tcg_temp_new_i64();
1808 /* Note that unpredicated load/store of vector/predicate registers
1809 * are defined as a stream of bytes, which equates to little-endian
1810 * operations on larger quantities. There is no nice way to force
1811 * a little-endian load for aarch64_be-linux-user out of line.
1813 * Attempt to keep code expansion to a minimum by limiting the
1814 * amount of unrolling done.
1819 for (i
= 0; i
< len_align
; i
+= 8) {
1820 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ i
);
1821 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LEQ
);
1822 tcg_gen_st_i64(t0
, cpu_env
, vofs
+ i
);
1825 TCGLabel
*loop
= gen_new_label();
1826 TCGv_ptr tp
, i
= tcg_const_local_ptr(0);
1828 gen_set_label(loop
);
1830 /* Minimize the number of local temps that must be re-read from
1831 * the stack each iteration. Instead, re-compute values other
1832 * than the loop counter.
1834 tp
= tcg_temp_new_ptr();
1835 tcg_gen_addi_ptr(tp
, i
, imm
);
1836 tcg_gen_extu_ptr_i64(addr
, tp
);
1837 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, rn
));
1839 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LEQ
);
1841 tcg_gen_add_ptr(tp
, cpu_env
, i
);
1842 tcg_gen_addi_ptr(i
, i
, 8);
1843 tcg_gen_st_i64(t0
, tp
, vofs
);
1844 tcg_temp_free_ptr(tp
);
1846 tcg_gen_brcondi_ptr(TCG_COND_LTU
, i
, len_align
, loop
);
1847 tcg_temp_free_ptr(i
);
1850 /* Predicate register loads can be any multiple of 2.
1851 * Note that we still store the entire 64-bit unit into cpu_env.
1854 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ len_align
);
1856 switch (len_remain
) {
1860 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LE
| ctz32(len_remain
));
1864 t1
= tcg_temp_new_i64();
1865 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LEUL
);
1866 tcg_gen_addi_i64(addr
, addr
, 4);
1867 tcg_gen_qemu_ld_i64(t1
, addr
, midx
, MO_LEUW
);
1868 tcg_gen_deposit_i64(t0
, t0
, t1
, 32, 32);
1869 tcg_temp_free_i64(t1
);
1873 g_assert_not_reached();
1875 tcg_gen_st_i64(t0
, cpu_env
, vofs
+ len_align
);
1877 tcg_temp_free_i64(addr
);
1878 tcg_temp_free_i64(t0
);
1881 static bool trans_LDR_zri(DisasContext
*s
, arg_rri
*a
, uint32_t insn
)
1883 if (sve_access_check(s
)) {
1884 int size
= vec_full_reg_size(s
);
1885 int off
= vec_full_reg_offset(s
, a
->rd
);
1886 do_ldr(s
, off
, size
, a
->rn
, a
->imm
* size
);
1891 static bool trans_LDR_pri(DisasContext
*s
, arg_rri
*a
, uint32_t insn
)
1893 if (sve_access_check(s
)) {
1894 int size
= pred_full_reg_size(s
);
1895 int off
= pred_full_reg_offset(s
, a
->rd
);
1896 do_ldr(s
, off
, size
, a
->rn
, a
->imm
* size
);