2 * AArch64 SVE translation
4 * Copyright (c) 2018 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
22 #include "exec/exec-all.h"
24 #include "tcg-op-gvec.h"
25 #include "tcg-gvec-desc.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
37 * Helpers for extracting complex instruction fields.
40 /* See e.g. ASR (immediate, predicated).
41 * Returns -1 for unallocated encoding; diagnose later.
43 static int tszimm_esz(int x
)
45 x
>>= 3; /* discard imm3 */
49 static int tszimm_shr(int x
)
51 return (16 << tszimm_esz(x
)) - x
;
54 /* See e.g. LSL (immediate, predicated). */
55 static int tszimm_shl(int x
)
57 return x
- (8 << tszimm_esz(x
));
60 static inline int plus1(int x
)
65 /* The SH bit is in bit 8. Extract the low 8 and shift. */
66 static inline int expand_imm_sh8s(int x
)
68 return (int8_t)x
<< (x
& 0x100 ? 8 : 0);
72 * Include the generated decoder.
75 #include "decode-sve.inc.c"
78 * Implement all of the translator functions referenced by the decoder.
81 /* Return the offset info CPUARMState of the predicate vector register Pn.
82 * Note for this purpose, FFR is P16.
84 static inline int pred_full_reg_offset(DisasContext
*s
, int regno
)
86 return offsetof(CPUARMState
, vfp
.pregs
[regno
]);
89 /* Return the byte size of the whole predicate register, VL / 64. */
90 static inline int pred_full_reg_size(DisasContext
*s
)
92 return s
->sve_len
>> 3;
95 /* Round up the size of a register to a size allowed by
96 * the tcg vector infrastructure. Any operation which uses this
97 * size may assume that the bits above pred_full_reg_size are zero,
98 * and must leave them the same way.
100 * Note that this is not needed for the vector registers as they
101 * are always properly sized for tcg vectors.
103 static int size_for_gvec(int size
)
108 return QEMU_ALIGN_UP(size
, 16);
112 static int pred_gvec_reg_size(DisasContext
*s
)
114 return size_for_gvec(pred_full_reg_size(s
));
117 /* Invoke a vector expander on two Zregs. */
118 static bool do_vector2_z(DisasContext
*s
, GVecGen2Fn
*gvec_fn
,
119 int esz
, int rd
, int rn
)
121 if (sve_access_check(s
)) {
122 unsigned vsz
= vec_full_reg_size(s
);
123 gvec_fn(esz
, vec_full_reg_offset(s
, rd
),
124 vec_full_reg_offset(s
, rn
), vsz
, vsz
);
129 /* Invoke a vector expander on three Zregs. */
130 static bool do_vector3_z(DisasContext
*s
, GVecGen3Fn
*gvec_fn
,
131 int esz
, int rd
, int rn
, int rm
)
133 if (sve_access_check(s
)) {
134 unsigned vsz
= vec_full_reg_size(s
);
135 gvec_fn(esz
, vec_full_reg_offset(s
, rd
),
136 vec_full_reg_offset(s
, rn
),
137 vec_full_reg_offset(s
, rm
), vsz
, vsz
);
142 /* Invoke a vector move on two Zregs. */
143 static bool do_mov_z(DisasContext
*s
, int rd
, int rn
)
145 return do_vector2_z(s
, tcg_gen_gvec_mov
, 0, rd
, rn
);
148 /* Initialize a Zreg with replications of a 64-bit immediate. */
149 static void do_dupi_z(DisasContext
*s
, int rd
, uint64_t word
)
151 unsigned vsz
= vec_full_reg_size(s
);
152 tcg_gen_gvec_dup64i(vec_full_reg_offset(s
, rd
), vsz
, vsz
, word
);
155 /* Invoke a vector expander on two Pregs. */
156 static bool do_vector2_p(DisasContext
*s
, GVecGen2Fn
*gvec_fn
,
157 int esz
, int rd
, int rn
)
159 if (sve_access_check(s
)) {
160 unsigned psz
= pred_gvec_reg_size(s
);
161 gvec_fn(esz
, pred_full_reg_offset(s
, rd
),
162 pred_full_reg_offset(s
, rn
), psz
, psz
);
167 /* Invoke a vector expander on three Pregs. */
168 static bool do_vector3_p(DisasContext
*s
, GVecGen3Fn
*gvec_fn
,
169 int esz
, int rd
, int rn
, int rm
)
171 if (sve_access_check(s
)) {
172 unsigned psz
= pred_gvec_reg_size(s
);
173 gvec_fn(esz
, pred_full_reg_offset(s
, rd
),
174 pred_full_reg_offset(s
, rn
),
175 pred_full_reg_offset(s
, rm
), psz
, psz
);
180 /* Invoke a vector operation on four Pregs. */
181 static bool do_vecop4_p(DisasContext
*s
, const GVecGen4
*gvec_op
,
182 int rd
, int rn
, int rm
, int rg
)
184 if (sve_access_check(s
)) {
185 unsigned psz
= pred_gvec_reg_size(s
);
186 tcg_gen_gvec_4(pred_full_reg_offset(s
, rd
),
187 pred_full_reg_offset(s
, rn
),
188 pred_full_reg_offset(s
, rm
),
189 pred_full_reg_offset(s
, rg
),
195 /* Invoke a vector move on two Pregs. */
196 static bool do_mov_p(DisasContext
*s
, int rd
, int rn
)
198 return do_vector2_p(s
, tcg_gen_gvec_mov
, 0, rd
, rn
);
201 /* Set the cpu flags as per a return from an SVE helper. */
202 static void do_pred_flags(TCGv_i32 t
)
204 tcg_gen_mov_i32(cpu_NF
, t
);
205 tcg_gen_andi_i32(cpu_ZF
, t
, 2);
206 tcg_gen_andi_i32(cpu_CF
, t
, 1);
207 tcg_gen_movi_i32(cpu_VF
, 0);
210 /* Subroutines computing the ARM PredTest psuedofunction. */
211 static void do_predtest1(TCGv_i64 d
, TCGv_i64 g
)
213 TCGv_i32 t
= tcg_temp_new_i32();
215 gen_helper_sve_predtest1(t
, d
, g
);
217 tcg_temp_free_i32(t
);
220 static void do_predtest(DisasContext
*s
, int dofs
, int gofs
, int words
)
222 TCGv_ptr dptr
= tcg_temp_new_ptr();
223 TCGv_ptr gptr
= tcg_temp_new_ptr();
226 tcg_gen_addi_ptr(dptr
, cpu_env
, dofs
);
227 tcg_gen_addi_ptr(gptr
, cpu_env
, gofs
);
228 t
= tcg_const_i32(words
);
230 gen_helper_sve_predtest(t
, dptr
, gptr
, t
);
231 tcg_temp_free_ptr(dptr
);
232 tcg_temp_free_ptr(gptr
);
235 tcg_temp_free_i32(t
);
238 /* For each element size, the bits within a predicate word that are active. */
239 const uint64_t pred_esz_masks
[4] = {
240 0xffffffffffffffffull
, 0x5555555555555555ull
,
241 0x1111111111111111ull
, 0x0101010101010101ull
245 *** SVE Logical - Unpredicated Group
248 static bool trans_AND_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
250 return do_vector3_z(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->rm
);
253 static bool trans_ORR_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
255 if (a
->rn
== a
->rm
) { /* MOV */
256 return do_mov_z(s
, a
->rd
, a
->rn
);
258 return do_vector3_z(s
, tcg_gen_gvec_or
, 0, a
->rd
, a
->rn
, a
->rm
);
262 static bool trans_EOR_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
264 return do_vector3_z(s
, tcg_gen_gvec_xor
, 0, a
->rd
, a
->rn
, a
->rm
);
267 static bool trans_BIC_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
269 return do_vector3_z(s
, tcg_gen_gvec_andc
, 0, a
->rd
, a
->rn
, a
->rm
);
273 *** SVE Integer Arithmetic - Unpredicated Group
276 static bool trans_ADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
278 return do_vector3_z(s
, tcg_gen_gvec_add
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
281 static bool trans_SUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
283 return do_vector3_z(s
, tcg_gen_gvec_sub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
286 static bool trans_SQADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
288 return do_vector3_z(s
, tcg_gen_gvec_ssadd
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
291 static bool trans_SQSUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
293 return do_vector3_z(s
, tcg_gen_gvec_sssub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
296 static bool trans_UQADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
298 return do_vector3_z(s
, tcg_gen_gvec_usadd
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
301 static bool trans_UQSUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
303 return do_vector3_z(s
, tcg_gen_gvec_ussub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
307 *** SVE Integer Arithmetic - Binary Predicated Group
310 static bool do_zpzz_ool(DisasContext
*s
, arg_rprr_esz
*a
, gen_helper_gvec_4
*fn
)
312 unsigned vsz
= vec_full_reg_size(s
);
316 if (sve_access_check(s
)) {
317 tcg_gen_gvec_4_ool(vec_full_reg_offset(s
, a
->rd
),
318 vec_full_reg_offset(s
, a
->rn
),
319 vec_full_reg_offset(s
, a
->rm
),
320 pred_full_reg_offset(s
, a
->pg
),
326 #define DO_ZPZZ(NAME, name) \
327 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
330 static gen_helper_gvec_4 * const fns[4] = { \
331 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
332 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
334 return do_zpzz_ool(s, a, fns[a->esz]); \
353 DO_ZPZZ(SMULH
, smulh
)
354 DO_ZPZZ(UMULH
, umulh
)
360 static bool trans_SDIV_zpzz(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
362 static gen_helper_gvec_4
* const fns
[4] = {
363 NULL
, NULL
, gen_helper_sve_sdiv_zpzz_s
, gen_helper_sve_sdiv_zpzz_d
365 return do_zpzz_ool(s
, a
, fns
[a
->esz
]);
368 static bool trans_UDIV_zpzz(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
370 static gen_helper_gvec_4
* const fns
[4] = {
371 NULL
, NULL
, gen_helper_sve_udiv_zpzz_s
, gen_helper_sve_udiv_zpzz_d
373 return do_zpzz_ool(s
, a
, fns
[a
->esz
]);
381 *** SVE Integer Arithmetic - Unary Predicated Group
384 static bool do_zpz_ool(DisasContext
*s
, arg_rpr_esz
*a
, gen_helper_gvec_3
*fn
)
389 if (sve_access_check(s
)) {
390 unsigned vsz
= vec_full_reg_size(s
);
391 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
392 vec_full_reg_offset(s
, a
->rn
),
393 pred_full_reg_offset(s
, a
->pg
),
399 #define DO_ZPZ(NAME, name) \
400 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
402 static gen_helper_gvec_3 * const fns[4] = { \
403 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
404 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
406 return do_zpz_ool(s, a, fns[a->esz]); \
411 DO_ZPZ(CNT_zpz
, cnt_zpz
)
413 DO_ZPZ(NOT_zpz
, not_zpz
)
417 static bool trans_FABS(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
419 static gen_helper_gvec_3
* const fns
[4] = {
421 gen_helper_sve_fabs_h
,
422 gen_helper_sve_fabs_s
,
423 gen_helper_sve_fabs_d
425 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
428 static bool trans_FNEG(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
430 static gen_helper_gvec_3
* const fns
[4] = {
432 gen_helper_sve_fneg_h
,
433 gen_helper_sve_fneg_s
,
434 gen_helper_sve_fneg_d
436 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
439 static bool trans_SXTB(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
441 static gen_helper_gvec_3
* const fns
[4] = {
443 gen_helper_sve_sxtb_h
,
444 gen_helper_sve_sxtb_s
,
445 gen_helper_sve_sxtb_d
447 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
450 static bool trans_UXTB(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
452 static gen_helper_gvec_3
* const fns
[4] = {
454 gen_helper_sve_uxtb_h
,
455 gen_helper_sve_uxtb_s
,
456 gen_helper_sve_uxtb_d
458 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
461 static bool trans_SXTH(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
463 static gen_helper_gvec_3
* const fns
[4] = {
465 gen_helper_sve_sxth_s
,
466 gen_helper_sve_sxth_d
468 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
471 static bool trans_UXTH(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
473 static gen_helper_gvec_3
* const fns
[4] = {
475 gen_helper_sve_uxth_s
,
476 gen_helper_sve_uxth_d
478 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
481 static bool trans_SXTW(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
483 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_sxtw_d
: NULL
);
486 static bool trans_UXTW(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
488 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_uxtw_d
: NULL
);
494 *** SVE Integer Reduction Group
497 typedef void gen_helper_gvec_reduc(TCGv_i64
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
498 static bool do_vpz_ool(DisasContext
*s
, arg_rpr_esz
*a
,
499 gen_helper_gvec_reduc
*fn
)
501 unsigned vsz
= vec_full_reg_size(s
);
509 if (!sve_access_check(s
)) {
513 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
514 temp
= tcg_temp_new_i64();
515 t_zn
= tcg_temp_new_ptr();
516 t_pg
= tcg_temp_new_ptr();
518 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
519 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
520 fn(temp
, t_zn
, t_pg
, desc
);
521 tcg_temp_free_ptr(t_zn
);
522 tcg_temp_free_ptr(t_pg
);
523 tcg_temp_free_i32(desc
);
525 write_fp_dreg(s
, a
->rd
, temp
);
526 tcg_temp_free_i64(temp
);
530 #define DO_VPZ(NAME, name) \
531 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
533 static gen_helper_gvec_reduc * const fns[4] = { \
534 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
535 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
537 return do_vpz_ool(s, a, fns[a->esz]); \
550 static bool trans_SADDV(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
552 static gen_helper_gvec_reduc
* const fns
[4] = {
553 gen_helper_sve_saddv_b
, gen_helper_sve_saddv_h
,
554 gen_helper_sve_saddv_s
, NULL
556 return do_vpz_ool(s
, a
, fns
[a
->esz
]);
562 *** SVE Shift by Immediate - Predicated Group
565 /* Store zero into every active element of Zd. We will use this for two
566 * and three-operand predicated instructions for which logic dictates a
569 static bool do_clr_zp(DisasContext
*s
, int rd
, int pg
, int esz
)
571 static gen_helper_gvec_2
* const fns
[4] = {
572 gen_helper_sve_clr_b
, gen_helper_sve_clr_h
,
573 gen_helper_sve_clr_s
, gen_helper_sve_clr_d
,
575 if (sve_access_check(s
)) {
576 unsigned vsz
= vec_full_reg_size(s
);
577 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, rd
),
578 pred_full_reg_offset(s
, pg
),
579 vsz
, vsz
, 0, fns
[esz
]);
584 static bool do_zpzi_ool(DisasContext
*s
, arg_rpri_esz
*a
,
585 gen_helper_gvec_3
*fn
)
587 if (sve_access_check(s
)) {
588 unsigned vsz
= vec_full_reg_size(s
);
589 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
590 vec_full_reg_offset(s
, a
->rn
),
591 pred_full_reg_offset(s
, a
->pg
),
592 vsz
, vsz
, a
->imm
, fn
);
597 static bool trans_ASR_zpzi(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
599 static gen_helper_gvec_3
* const fns
[4] = {
600 gen_helper_sve_asr_zpzi_b
, gen_helper_sve_asr_zpzi_h
,
601 gen_helper_sve_asr_zpzi_s
, gen_helper_sve_asr_zpzi_d
,
604 /* Invalid tsz encoding -- see tszimm_esz. */
607 /* Shift by element size is architecturally valid. For
608 arithmetic right-shift, it's the same as by one less. */
609 a
->imm
= MIN(a
->imm
, (8 << a
->esz
) - 1);
610 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
613 static bool trans_LSR_zpzi(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
615 static gen_helper_gvec_3
* const fns
[4] = {
616 gen_helper_sve_lsr_zpzi_b
, gen_helper_sve_lsr_zpzi_h
,
617 gen_helper_sve_lsr_zpzi_s
, gen_helper_sve_lsr_zpzi_d
,
622 /* Shift by element size is architecturally valid.
623 For logical shifts, it is a zeroing operation. */
624 if (a
->imm
>= (8 << a
->esz
)) {
625 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
627 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
631 static bool trans_LSL_zpzi(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
633 static gen_helper_gvec_3
* const fns
[4] = {
634 gen_helper_sve_lsl_zpzi_b
, gen_helper_sve_lsl_zpzi_h
,
635 gen_helper_sve_lsl_zpzi_s
, gen_helper_sve_lsl_zpzi_d
,
640 /* Shift by element size is architecturally valid.
641 For logical shifts, it is a zeroing operation. */
642 if (a
->imm
>= (8 << a
->esz
)) {
643 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
645 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
649 static bool trans_ASRD(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
651 static gen_helper_gvec_3
* const fns
[4] = {
652 gen_helper_sve_asrd_b
, gen_helper_sve_asrd_h
,
653 gen_helper_sve_asrd_s
, gen_helper_sve_asrd_d
,
658 /* Shift by element size is architecturally valid. For arithmetic
659 right shift for division, it is a zeroing operation. */
660 if (a
->imm
>= (8 << a
->esz
)) {
661 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
663 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
668 *** SVE Bitwise Shift - Predicated Group
671 #define DO_ZPZW(NAME, name) \
672 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
675 static gen_helper_gvec_4 * const fns[3] = { \
676 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
677 gen_helper_sve_##name##_zpzw_s, \
679 if (a->esz < 0 || a->esz >= 3) { \
682 return do_zpzz_ool(s, a, fns[a->esz]); \
692 *** SVE Bitwise Shift - Unpredicated Group
695 static bool do_shift_imm(DisasContext
*s
, arg_rri_esz
*a
, bool asr
,
696 void (*gvec_fn
)(unsigned, uint32_t, uint32_t,
697 int64_t, uint32_t, uint32_t))
700 /* Invalid tsz encoding -- see tszimm_esz. */
703 if (sve_access_check(s
)) {
704 unsigned vsz
= vec_full_reg_size(s
);
705 /* Shift by element size is architecturally valid. For
706 arithmetic right-shift, it's the same as by one less.
707 Otherwise it is a zeroing operation. */
708 if (a
->imm
>= 8 << a
->esz
) {
710 a
->imm
= (8 << a
->esz
) - 1;
712 do_dupi_z(s
, a
->rd
, 0);
716 gvec_fn(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
717 vec_full_reg_offset(s
, a
->rn
), a
->imm
, vsz
, vsz
);
722 static bool trans_ASR_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
724 return do_shift_imm(s
, a
, true, tcg_gen_gvec_sari
);
727 static bool trans_LSR_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
729 return do_shift_imm(s
, a
, false, tcg_gen_gvec_shri
);
732 static bool trans_LSL_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
734 return do_shift_imm(s
, a
, false, tcg_gen_gvec_shli
);
737 static bool do_zzw_ool(DisasContext
*s
, arg_rrr_esz
*a
, gen_helper_gvec_3
*fn
)
742 if (sve_access_check(s
)) {
743 unsigned vsz
= vec_full_reg_size(s
);
744 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
745 vec_full_reg_offset(s
, a
->rn
),
746 vec_full_reg_offset(s
, a
->rm
),
752 #define DO_ZZW(NAME, name) \
753 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
756 static gen_helper_gvec_3 * const fns[4] = { \
757 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
758 gen_helper_sve_##name##_zzw_s, NULL \
760 return do_zzw_ool(s, a, fns[a->esz]); \
770 *** SVE Integer Multiply-Add Group
773 static bool do_zpzzz_ool(DisasContext
*s
, arg_rprrr_esz
*a
,
774 gen_helper_gvec_5
*fn
)
776 if (sve_access_check(s
)) {
777 unsigned vsz
= vec_full_reg_size(s
);
778 tcg_gen_gvec_5_ool(vec_full_reg_offset(s
, a
->rd
),
779 vec_full_reg_offset(s
, a
->ra
),
780 vec_full_reg_offset(s
, a
->rn
),
781 vec_full_reg_offset(s
, a
->rm
),
782 pred_full_reg_offset(s
, a
->pg
),
788 #define DO_ZPZZZ(NAME, name) \
789 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
791 static gen_helper_gvec_5 * const fns[4] = { \
792 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
793 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
795 return do_zpzzz_ool(s, a, fns[a->esz]); \
804 *** SVE Index Generation Group
807 static void do_index(DisasContext
*s
, int esz
, int rd
,
808 TCGv_i64 start
, TCGv_i64 incr
)
810 unsigned vsz
= vec_full_reg_size(s
);
811 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
812 TCGv_ptr t_zd
= tcg_temp_new_ptr();
814 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, rd
));
816 gen_helper_sve_index_d(t_zd
, start
, incr
, desc
);
818 typedef void index_fn(TCGv_ptr
, TCGv_i32
, TCGv_i32
, TCGv_i32
);
819 static index_fn
* const fns
[3] = {
820 gen_helper_sve_index_b
,
821 gen_helper_sve_index_h
,
822 gen_helper_sve_index_s
,
824 TCGv_i32 s32
= tcg_temp_new_i32();
825 TCGv_i32 i32
= tcg_temp_new_i32();
827 tcg_gen_extrl_i64_i32(s32
, start
);
828 tcg_gen_extrl_i64_i32(i32
, incr
);
829 fns
[esz
](t_zd
, s32
, i32
, desc
);
831 tcg_temp_free_i32(s32
);
832 tcg_temp_free_i32(i32
);
834 tcg_temp_free_ptr(t_zd
);
835 tcg_temp_free_i32(desc
);
838 static bool trans_INDEX_ii(DisasContext
*s
, arg_INDEX_ii
*a
, uint32_t insn
)
840 if (sve_access_check(s
)) {
841 TCGv_i64 start
= tcg_const_i64(a
->imm1
);
842 TCGv_i64 incr
= tcg_const_i64(a
->imm2
);
843 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
844 tcg_temp_free_i64(start
);
845 tcg_temp_free_i64(incr
);
850 static bool trans_INDEX_ir(DisasContext
*s
, arg_INDEX_ir
*a
, uint32_t insn
)
852 if (sve_access_check(s
)) {
853 TCGv_i64 start
= tcg_const_i64(a
->imm
);
854 TCGv_i64 incr
= cpu_reg(s
, a
->rm
);
855 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
856 tcg_temp_free_i64(start
);
861 static bool trans_INDEX_ri(DisasContext
*s
, arg_INDEX_ri
*a
, uint32_t insn
)
863 if (sve_access_check(s
)) {
864 TCGv_i64 start
= cpu_reg(s
, a
->rn
);
865 TCGv_i64 incr
= tcg_const_i64(a
->imm
);
866 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
867 tcg_temp_free_i64(incr
);
872 static bool trans_INDEX_rr(DisasContext
*s
, arg_INDEX_rr
*a
, uint32_t insn
)
874 if (sve_access_check(s
)) {
875 TCGv_i64 start
= cpu_reg(s
, a
->rn
);
876 TCGv_i64 incr
= cpu_reg(s
, a
->rm
);
877 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
883 *** SVE Stack Allocation Group
886 static bool trans_ADDVL(DisasContext
*s
, arg_ADDVL
*a
, uint32_t insn
)
888 TCGv_i64 rd
= cpu_reg_sp(s
, a
->rd
);
889 TCGv_i64 rn
= cpu_reg_sp(s
, a
->rn
);
890 tcg_gen_addi_i64(rd
, rn
, a
->imm
* vec_full_reg_size(s
));
894 static bool trans_ADDPL(DisasContext
*s
, arg_ADDPL
*a
, uint32_t insn
)
896 TCGv_i64 rd
= cpu_reg_sp(s
, a
->rd
);
897 TCGv_i64 rn
= cpu_reg_sp(s
, a
->rn
);
898 tcg_gen_addi_i64(rd
, rn
, a
->imm
* pred_full_reg_size(s
));
902 static bool trans_RDVL(DisasContext
*s
, arg_RDVL
*a
, uint32_t insn
)
904 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
905 tcg_gen_movi_i64(reg
, a
->imm
* vec_full_reg_size(s
));
910 *** SVE Compute Vector Address Group
913 static bool do_adr(DisasContext
*s
, arg_rrri
*a
, gen_helper_gvec_3
*fn
)
915 if (sve_access_check(s
)) {
916 unsigned vsz
= vec_full_reg_size(s
);
917 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
918 vec_full_reg_offset(s
, a
->rn
),
919 vec_full_reg_offset(s
, a
->rm
),
920 vsz
, vsz
, a
->imm
, fn
);
925 static bool trans_ADR_p32(DisasContext
*s
, arg_rrri
*a
, uint32_t insn
)
927 return do_adr(s
, a
, gen_helper_sve_adr_p32
);
930 static bool trans_ADR_p64(DisasContext
*s
, arg_rrri
*a
, uint32_t insn
)
932 return do_adr(s
, a
, gen_helper_sve_adr_p64
);
935 static bool trans_ADR_s32(DisasContext
*s
, arg_rrri
*a
, uint32_t insn
)
937 return do_adr(s
, a
, gen_helper_sve_adr_s32
);
940 static bool trans_ADR_u32(DisasContext
*s
, arg_rrri
*a
, uint32_t insn
)
942 return do_adr(s
, a
, gen_helper_sve_adr_u32
);
946 *** SVE Integer Misc - Unpredicated Group
949 static bool trans_FEXPA(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
951 static gen_helper_gvec_2
* const fns
[4] = {
953 gen_helper_sve_fexpa_h
,
954 gen_helper_sve_fexpa_s
,
955 gen_helper_sve_fexpa_d
,
960 if (sve_access_check(s
)) {
961 unsigned vsz
= vec_full_reg_size(s
);
962 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, a
->rd
),
963 vec_full_reg_offset(s
, a
->rn
),
964 vsz
, vsz
, 0, fns
[a
->esz
]);
969 static bool trans_FTSSEL(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
971 static gen_helper_gvec_3
* const fns
[4] = {
973 gen_helper_sve_ftssel_h
,
974 gen_helper_sve_ftssel_s
,
975 gen_helper_sve_ftssel_d
,
980 if (sve_access_check(s
)) {
981 unsigned vsz
= vec_full_reg_size(s
);
982 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
983 vec_full_reg_offset(s
, a
->rn
),
984 vec_full_reg_offset(s
, a
->rm
),
985 vsz
, vsz
, 0, fns
[a
->esz
]);
991 *** SVE Predicate Logical Operations Group
994 static bool do_pppp_flags(DisasContext
*s
, arg_rprr_s
*a
,
995 const GVecGen4
*gvec_op
)
997 if (!sve_access_check(s
)) {
1001 unsigned psz
= pred_gvec_reg_size(s
);
1002 int dofs
= pred_full_reg_offset(s
, a
->rd
);
1003 int nofs
= pred_full_reg_offset(s
, a
->rn
);
1004 int mofs
= pred_full_reg_offset(s
, a
->rm
);
1005 int gofs
= pred_full_reg_offset(s
, a
->pg
);
1008 /* Do the operation and the flags generation in temps. */
1009 TCGv_i64 pd
= tcg_temp_new_i64();
1010 TCGv_i64 pn
= tcg_temp_new_i64();
1011 TCGv_i64 pm
= tcg_temp_new_i64();
1012 TCGv_i64 pg
= tcg_temp_new_i64();
1014 tcg_gen_ld_i64(pn
, cpu_env
, nofs
);
1015 tcg_gen_ld_i64(pm
, cpu_env
, mofs
);
1016 tcg_gen_ld_i64(pg
, cpu_env
, gofs
);
1018 gvec_op
->fni8(pd
, pn
, pm
, pg
);
1019 tcg_gen_st_i64(pd
, cpu_env
, dofs
);
1021 do_predtest1(pd
, pg
);
1023 tcg_temp_free_i64(pd
);
1024 tcg_temp_free_i64(pn
);
1025 tcg_temp_free_i64(pm
);
1026 tcg_temp_free_i64(pg
);
1028 /* The operation and flags generation is large. The computation
1029 * of the flags depends on the original contents of the guarding
1030 * predicate. If the destination overwrites the guarding predicate,
1031 * then the easiest way to get this right is to save a copy.
1034 if (a
->rd
== a
->pg
) {
1035 tofs
= offsetof(CPUARMState
, vfp
.preg_tmp
);
1036 tcg_gen_gvec_mov(0, tofs
, gofs
, psz
, psz
);
1039 tcg_gen_gvec_4(dofs
, nofs
, mofs
, gofs
, psz
, psz
, gvec_op
);
1040 do_predtest(s
, dofs
, tofs
, psz
/ 8);
1045 static void gen_and_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1047 tcg_gen_and_i64(pd
, pn
, pm
);
1048 tcg_gen_and_i64(pd
, pd
, pg
);
1051 static void gen_and_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1052 TCGv_vec pm
, TCGv_vec pg
)
1054 tcg_gen_and_vec(vece
, pd
, pn
, pm
);
1055 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1058 static bool trans_AND_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1060 static const GVecGen4 op
= {
1061 .fni8
= gen_and_pg_i64
,
1062 .fniv
= gen_and_pg_vec
,
1063 .fno
= gen_helper_sve_and_pppp
,
1064 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1067 return do_pppp_flags(s
, a
, &op
);
1068 } else if (a
->rn
== a
->rm
) {
1069 if (a
->pg
== a
->rn
) {
1070 return do_mov_p(s
, a
->rd
, a
->rn
);
1072 return do_vector3_p(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->pg
);
1074 } else if (a
->pg
== a
->rn
|| a
->pg
== a
->rm
) {
1075 return do_vector3_p(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->rm
);
1077 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1081 static void gen_bic_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1083 tcg_gen_andc_i64(pd
, pn
, pm
);
1084 tcg_gen_and_i64(pd
, pd
, pg
);
1087 static void gen_bic_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1088 TCGv_vec pm
, TCGv_vec pg
)
1090 tcg_gen_andc_vec(vece
, pd
, pn
, pm
);
1091 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1094 static bool trans_BIC_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1096 static const GVecGen4 op
= {
1097 .fni8
= gen_bic_pg_i64
,
1098 .fniv
= gen_bic_pg_vec
,
1099 .fno
= gen_helper_sve_bic_pppp
,
1100 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1103 return do_pppp_flags(s
, a
, &op
);
1104 } else if (a
->pg
== a
->rn
) {
1105 return do_vector3_p(s
, tcg_gen_gvec_andc
, 0, a
->rd
, a
->rn
, a
->rm
);
1107 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1111 static void gen_eor_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1113 tcg_gen_xor_i64(pd
, pn
, pm
);
1114 tcg_gen_and_i64(pd
, pd
, pg
);
1117 static void gen_eor_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1118 TCGv_vec pm
, TCGv_vec pg
)
1120 tcg_gen_xor_vec(vece
, pd
, pn
, pm
);
1121 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1124 static bool trans_EOR_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1126 static const GVecGen4 op
= {
1127 .fni8
= gen_eor_pg_i64
,
1128 .fniv
= gen_eor_pg_vec
,
1129 .fno
= gen_helper_sve_eor_pppp
,
1130 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1133 return do_pppp_flags(s
, a
, &op
);
1135 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1139 static void gen_sel_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1141 tcg_gen_and_i64(pn
, pn
, pg
);
1142 tcg_gen_andc_i64(pm
, pm
, pg
);
1143 tcg_gen_or_i64(pd
, pn
, pm
);
1146 static void gen_sel_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1147 TCGv_vec pm
, TCGv_vec pg
)
1149 tcg_gen_and_vec(vece
, pn
, pn
, pg
);
1150 tcg_gen_andc_vec(vece
, pm
, pm
, pg
);
1151 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1154 static bool trans_SEL_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1156 static const GVecGen4 op
= {
1157 .fni8
= gen_sel_pg_i64
,
1158 .fniv
= gen_sel_pg_vec
,
1159 .fno
= gen_helper_sve_sel_pppp
,
1160 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1165 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1169 static void gen_orr_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1171 tcg_gen_or_i64(pd
, pn
, pm
);
1172 tcg_gen_and_i64(pd
, pd
, pg
);
1175 static void gen_orr_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1176 TCGv_vec pm
, TCGv_vec pg
)
1178 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1179 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1182 static bool trans_ORR_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1184 static const GVecGen4 op
= {
1185 .fni8
= gen_orr_pg_i64
,
1186 .fniv
= gen_orr_pg_vec
,
1187 .fno
= gen_helper_sve_orr_pppp
,
1188 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1191 return do_pppp_flags(s
, a
, &op
);
1192 } else if (a
->pg
== a
->rn
&& a
->rn
== a
->rm
) {
1193 return do_mov_p(s
, a
->rd
, a
->rn
);
1195 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1199 static void gen_orn_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1201 tcg_gen_orc_i64(pd
, pn
, pm
);
1202 tcg_gen_and_i64(pd
, pd
, pg
);
1205 static void gen_orn_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1206 TCGv_vec pm
, TCGv_vec pg
)
1208 tcg_gen_orc_vec(vece
, pd
, pn
, pm
);
1209 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1212 static bool trans_ORN_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1214 static const GVecGen4 op
= {
1215 .fni8
= gen_orn_pg_i64
,
1216 .fniv
= gen_orn_pg_vec
,
1217 .fno
= gen_helper_sve_orn_pppp
,
1218 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1221 return do_pppp_flags(s
, a
, &op
);
1223 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1227 static void gen_nor_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1229 tcg_gen_or_i64(pd
, pn
, pm
);
1230 tcg_gen_andc_i64(pd
, pg
, pd
);
1233 static void gen_nor_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1234 TCGv_vec pm
, TCGv_vec pg
)
1236 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1237 tcg_gen_andc_vec(vece
, pd
, pg
, pd
);
1240 static bool trans_NOR_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1242 static const GVecGen4 op
= {
1243 .fni8
= gen_nor_pg_i64
,
1244 .fniv
= gen_nor_pg_vec
,
1245 .fno
= gen_helper_sve_nor_pppp
,
1246 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1249 return do_pppp_flags(s
, a
, &op
);
1251 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1255 static void gen_nand_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1257 tcg_gen_and_i64(pd
, pn
, pm
);
1258 tcg_gen_andc_i64(pd
, pg
, pd
);
1261 static void gen_nand_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1262 TCGv_vec pm
, TCGv_vec pg
)
1264 tcg_gen_and_vec(vece
, pd
, pn
, pm
);
1265 tcg_gen_andc_vec(vece
, pd
, pg
, pd
);
1268 static bool trans_NAND_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1270 static const GVecGen4 op
= {
1271 .fni8
= gen_nand_pg_i64
,
1272 .fniv
= gen_nand_pg_vec
,
1273 .fno
= gen_helper_sve_nand_pppp
,
1274 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1277 return do_pppp_flags(s
, a
, &op
);
1279 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1284 *** SVE Predicate Misc Group
1287 static bool trans_PTEST(DisasContext
*s
, arg_PTEST
*a
, uint32_t insn
)
1289 if (sve_access_check(s
)) {
1290 int nofs
= pred_full_reg_offset(s
, a
->rn
);
1291 int gofs
= pred_full_reg_offset(s
, a
->pg
);
1292 int words
= DIV_ROUND_UP(pred_full_reg_size(s
), 8);
1295 TCGv_i64 pn
= tcg_temp_new_i64();
1296 TCGv_i64 pg
= tcg_temp_new_i64();
1298 tcg_gen_ld_i64(pn
, cpu_env
, nofs
);
1299 tcg_gen_ld_i64(pg
, cpu_env
, gofs
);
1300 do_predtest1(pn
, pg
);
1302 tcg_temp_free_i64(pn
);
1303 tcg_temp_free_i64(pg
);
1305 do_predtest(s
, nofs
, gofs
, words
);
1311 /* See the ARM pseudocode DecodePredCount. */
1312 static unsigned decode_pred_count(unsigned fullsz
, int pattern
, int esz
)
1314 unsigned elements
= fullsz
>> esz
;
1318 case 0x0: /* POW2 */
1319 return pow2floor(elements
);
1330 case 0x9: /* VL16 */
1331 case 0xa: /* VL32 */
1332 case 0xb: /* VL64 */
1333 case 0xc: /* VL128 */
1334 case 0xd: /* VL256 */
1335 bound
= 16 << (pattern
- 9);
1337 case 0x1d: /* MUL4 */
1338 return elements
- elements
% 4;
1339 case 0x1e: /* MUL3 */
1340 return elements
- elements
% 3;
1341 case 0x1f: /* ALL */
1343 default: /* #uimm5 */
1346 return elements
>= bound
? bound
: 0;
1349 /* This handles all of the predicate initialization instructions,
1350 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1351 * so that decode_pred_count returns 0. For SETFFR, we will have
1352 * set RD == 16 == FFR.
1354 static bool do_predset(DisasContext
*s
, int esz
, int rd
, int pat
, bool setflag
)
1356 if (!sve_access_check(s
)) {
1360 unsigned fullsz
= vec_full_reg_size(s
);
1361 unsigned ofs
= pred_full_reg_offset(s
, rd
);
1362 unsigned numelem
, setsz
, i
;
1363 uint64_t word
, lastword
;
1366 numelem
= decode_pred_count(fullsz
, pat
, esz
);
1368 /* Determine what we must store into each bit, and how many. */
1370 lastword
= word
= 0;
1373 setsz
= numelem
<< esz
;
1374 lastword
= word
= pred_esz_masks
[esz
];
1376 lastword
&= ~(-1ull << (setsz
% 64));
1380 t
= tcg_temp_new_i64();
1382 tcg_gen_movi_i64(t
, lastword
);
1383 tcg_gen_st_i64(t
, cpu_env
, ofs
);
1387 if (word
== lastword
) {
1388 unsigned maxsz
= size_for_gvec(fullsz
/ 8);
1389 unsigned oprsz
= size_for_gvec(setsz
/ 8);
1391 if (oprsz
* 8 == setsz
) {
1392 tcg_gen_gvec_dup64i(ofs
, oprsz
, maxsz
, word
);
1395 if (oprsz
* 8 == setsz
+ 8) {
1396 tcg_gen_gvec_dup64i(ofs
, oprsz
, maxsz
, word
);
1397 tcg_gen_movi_i64(t
, 0);
1398 tcg_gen_st_i64(t
, cpu_env
, ofs
+ oprsz
- 8);
1406 tcg_gen_movi_i64(t
, word
);
1407 for (i
= 0; i
< setsz
; i
+= 8) {
1408 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1410 if (lastword
!= word
) {
1411 tcg_gen_movi_i64(t
, lastword
);
1412 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1416 tcg_gen_movi_i64(t
, 0);
1417 for (; i
< fullsz
; i
+= 8) {
1418 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1423 tcg_temp_free_i64(t
);
1427 tcg_gen_movi_i32(cpu_NF
, -(word
!= 0));
1428 tcg_gen_movi_i32(cpu_CF
, word
== 0);
1429 tcg_gen_movi_i32(cpu_VF
, 0);
1430 tcg_gen_mov_i32(cpu_ZF
, cpu_NF
);
1435 static bool trans_PTRUE(DisasContext
*s
, arg_PTRUE
*a
, uint32_t insn
)
1437 return do_predset(s
, a
->esz
, a
->rd
, a
->pat
, a
->s
);
1440 static bool trans_SETFFR(DisasContext
*s
, arg_SETFFR
*a
, uint32_t insn
)
1442 /* Note pat == 31 is #all, to set all elements. */
1443 return do_predset(s
, 0, FFR_PRED_NUM
, 31, false);
1446 static bool trans_PFALSE(DisasContext
*s
, arg_PFALSE
*a
, uint32_t insn
)
1448 /* Note pat == 32 is #unimp, to set no elements. */
1449 return do_predset(s
, 0, a
->rd
, 32, false);
1452 static bool trans_RDFFR_p(DisasContext
*s
, arg_RDFFR_p
*a
, uint32_t insn
)
1454 /* The path through do_pppp_flags is complicated enough to want to avoid
1455 * duplication. Frob the arguments into the form of a predicated AND.
1457 arg_rprr_s alt_a
= {
1458 .rd
= a
->rd
, .pg
= a
->pg
, .s
= a
->s
,
1459 .rn
= FFR_PRED_NUM
, .rm
= FFR_PRED_NUM
,
1461 return trans_AND_pppp(s
, &alt_a
, insn
);
1464 static bool trans_RDFFR(DisasContext
*s
, arg_RDFFR
*a
, uint32_t insn
)
1466 return do_mov_p(s
, a
->rd
, FFR_PRED_NUM
);
1469 static bool trans_WRFFR(DisasContext
*s
, arg_WRFFR
*a
, uint32_t insn
)
1471 return do_mov_p(s
, FFR_PRED_NUM
, a
->rn
);
1474 static bool do_pfirst_pnext(DisasContext
*s
, arg_rr_esz
*a
,
1475 void (*gen_fn
)(TCGv_i32
, TCGv_ptr
,
1476 TCGv_ptr
, TCGv_i32
))
1478 if (!sve_access_check(s
)) {
1482 TCGv_ptr t_pd
= tcg_temp_new_ptr();
1483 TCGv_ptr t_pg
= tcg_temp_new_ptr();
1487 desc
= DIV_ROUND_UP(pred_full_reg_size(s
), 8);
1488 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
1490 tcg_gen_addi_ptr(t_pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
1491 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
1492 t
= tcg_const_i32(desc
);
1494 gen_fn(t
, t_pd
, t_pg
, t
);
1495 tcg_temp_free_ptr(t_pd
);
1496 tcg_temp_free_ptr(t_pg
);
1499 tcg_temp_free_i32(t
);
1503 static bool trans_PFIRST(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
1505 return do_pfirst_pnext(s
, a
, gen_helper_sve_pfirst
);
1508 static bool trans_PNEXT(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
1510 return do_pfirst_pnext(s
, a
, gen_helper_sve_pnext
);
1514 *** SVE Element Count Group
1517 /* Perform an inline saturating addition of a 32-bit value within
1518 * a 64-bit register. The second operand is known to be positive,
1519 * which halves the comparisions we must perform to bound the result.
1521 static void do_sat_addsub_32(TCGv_i64 reg
, TCGv_i64 val
, bool u
, bool d
)
1527 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1529 tcg_gen_ext32u_i64(reg
, reg
);
1531 tcg_gen_ext32s_i64(reg
, reg
);
1534 tcg_gen_sub_i64(reg
, reg
, val
);
1535 ibound
= (u
? 0 : INT32_MIN
);
1538 tcg_gen_add_i64(reg
, reg
, val
);
1539 ibound
= (u
? UINT32_MAX
: INT32_MAX
);
1542 bound
= tcg_const_i64(ibound
);
1543 tcg_gen_movcond_i64(cond
, reg
, reg
, bound
, bound
, reg
);
1544 tcg_temp_free_i64(bound
);
1547 /* Similarly with 64-bit values. */
1548 static void do_sat_addsub_64(TCGv_i64 reg
, TCGv_i64 val
, bool u
, bool d
)
1550 TCGv_i64 t0
= tcg_temp_new_i64();
1551 TCGv_i64 t1
= tcg_temp_new_i64();
1556 tcg_gen_sub_i64(t0
, reg
, val
);
1557 tcg_gen_movi_i64(t1
, 0);
1558 tcg_gen_movcond_i64(TCG_COND_LTU
, reg
, reg
, val
, t1
, t0
);
1560 tcg_gen_add_i64(t0
, reg
, val
);
1561 tcg_gen_movi_i64(t1
, -1);
1562 tcg_gen_movcond_i64(TCG_COND_LTU
, reg
, t0
, reg
, t1
, t0
);
1566 /* Detect signed overflow for subtraction. */
1567 tcg_gen_xor_i64(t0
, reg
, val
);
1568 tcg_gen_sub_i64(t1
, reg
, val
);
1569 tcg_gen_xor_i64(reg
, reg
, t0
);
1570 tcg_gen_and_i64(t0
, t0
, reg
);
1572 /* Bound the result. */
1573 tcg_gen_movi_i64(reg
, INT64_MIN
);
1574 t2
= tcg_const_i64(0);
1575 tcg_gen_movcond_i64(TCG_COND_LT
, reg
, t0
, t2
, reg
, t1
);
1577 /* Detect signed overflow for addition. */
1578 tcg_gen_xor_i64(t0
, reg
, val
);
1579 tcg_gen_add_i64(reg
, reg
, val
);
1580 tcg_gen_xor_i64(t1
, reg
, val
);
1581 tcg_gen_andc_i64(t0
, t1
, t0
);
1583 /* Bound the result. */
1584 tcg_gen_movi_i64(t1
, INT64_MAX
);
1585 t2
= tcg_const_i64(0);
1586 tcg_gen_movcond_i64(TCG_COND_LT
, reg
, t0
, t2
, t1
, reg
);
1588 tcg_temp_free_i64(t2
);
1590 tcg_temp_free_i64(t0
);
1591 tcg_temp_free_i64(t1
);
1594 /* Similarly with a vector and a scalar operand. */
1595 static void do_sat_addsub_vec(DisasContext
*s
, int esz
, int rd
, int rn
,
1596 TCGv_i64 val
, bool u
, bool d
)
1598 unsigned vsz
= vec_full_reg_size(s
);
1599 TCGv_ptr dptr
, nptr
;
1603 dptr
= tcg_temp_new_ptr();
1604 nptr
= tcg_temp_new_ptr();
1605 tcg_gen_addi_ptr(dptr
, cpu_env
, vec_full_reg_offset(s
, rd
));
1606 tcg_gen_addi_ptr(nptr
, cpu_env
, vec_full_reg_offset(s
, rn
));
1607 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
1611 t32
= tcg_temp_new_i32();
1612 tcg_gen_extrl_i64_i32(t32
, val
);
1614 tcg_gen_neg_i32(t32
, t32
);
1617 gen_helper_sve_uqaddi_b(dptr
, nptr
, t32
, desc
);
1619 gen_helper_sve_sqaddi_b(dptr
, nptr
, t32
, desc
);
1621 tcg_temp_free_i32(t32
);
1625 t32
= tcg_temp_new_i32();
1626 tcg_gen_extrl_i64_i32(t32
, val
);
1628 tcg_gen_neg_i32(t32
, t32
);
1631 gen_helper_sve_uqaddi_h(dptr
, nptr
, t32
, desc
);
1633 gen_helper_sve_sqaddi_h(dptr
, nptr
, t32
, desc
);
1635 tcg_temp_free_i32(t32
);
1639 t64
= tcg_temp_new_i64();
1641 tcg_gen_neg_i64(t64
, val
);
1643 tcg_gen_mov_i64(t64
, val
);
1646 gen_helper_sve_uqaddi_s(dptr
, nptr
, t64
, desc
);
1648 gen_helper_sve_sqaddi_s(dptr
, nptr
, t64
, desc
);
1650 tcg_temp_free_i64(t64
);
1656 gen_helper_sve_uqsubi_d(dptr
, nptr
, val
, desc
);
1658 gen_helper_sve_uqaddi_d(dptr
, nptr
, val
, desc
);
1661 t64
= tcg_temp_new_i64();
1662 tcg_gen_neg_i64(t64
, val
);
1663 gen_helper_sve_sqaddi_d(dptr
, nptr
, t64
, desc
);
1664 tcg_temp_free_i64(t64
);
1666 gen_helper_sve_sqaddi_d(dptr
, nptr
, val
, desc
);
1671 g_assert_not_reached();
1674 tcg_temp_free_ptr(dptr
);
1675 tcg_temp_free_ptr(nptr
);
1676 tcg_temp_free_i32(desc
);
1679 static bool trans_CNT_r(DisasContext
*s
, arg_CNT_r
*a
, uint32_t insn
)
1681 if (sve_access_check(s
)) {
1682 unsigned fullsz
= vec_full_reg_size(s
);
1683 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1684 tcg_gen_movi_i64(cpu_reg(s
, a
->rd
), numelem
* a
->imm
);
1689 static bool trans_INCDEC_r(DisasContext
*s
, arg_incdec_cnt
*a
, uint32_t insn
)
1691 if (sve_access_check(s
)) {
1692 unsigned fullsz
= vec_full_reg_size(s
);
1693 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1694 int inc
= numelem
* a
->imm
* (a
->d
? -1 : 1);
1695 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1697 tcg_gen_addi_i64(reg
, reg
, inc
);
1702 static bool trans_SINCDEC_r_32(DisasContext
*s
, arg_incdec_cnt
*a
,
1705 if (!sve_access_check(s
)) {
1709 unsigned fullsz
= vec_full_reg_size(s
);
1710 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1711 int inc
= numelem
* a
->imm
;
1712 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1714 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1717 tcg_gen_ext32u_i64(reg
, reg
);
1719 tcg_gen_ext32s_i64(reg
, reg
);
1722 TCGv_i64 t
= tcg_const_i64(inc
);
1723 do_sat_addsub_32(reg
, t
, a
->u
, a
->d
);
1724 tcg_temp_free_i64(t
);
1729 static bool trans_SINCDEC_r_64(DisasContext
*s
, arg_incdec_cnt
*a
,
1732 if (!sve_access_check(s
)) {
1736 unsigned fullsz
= vec_full_reg_size(s
);
1737 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1738 int inc
= numelem
* a
->imm
;
1739 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1742 TCGv_i64 t
= tcg_const_i64(inc
);
1743 do_sat_addsub_64(reg
, t
, a
->u
, a
->d
);
1744 tcg_temp_free_i64(t
);
1749 static bool trans_INCDEC_v(DisasContext
*s
, arg_incdec2_cnt
*a
, uint32_t insn
)
1755 unsigned fullsz
= vec_full_reg_size(s
);
1756 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1757 int inc
= numelem
* a
->imm
;
1760 if (sve_access_check(s
)) {
1761 TCGv_i64 t
= tcg_const_i64(a
->d
? -inc
: inc
);
1762 tcg_gen_gvec_adds(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
1763 vec_full_reg_offset(s
, a
->rn
),
1765 tcg_temp_free_i64(t
);
1768 do_mov_z(s
, a
->rd
, a
->rn
);
1773 static bool trans_SINCDEC_v(DisasContext
*s
, arg_incdec2_cnt
*a
,
1780 unsigned fullsz
= vec_full_reg_size(s
);
1781 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1782 int inc
= numelem
* a
->imm
;
1785 if (sve_access_check(s
)) {
1786 TCGv_i64 t
= tcg_const_i64(inc
);
1787 do_sat_addsub_vec(s
, a
->esz
, a
->rd
, a
->rn
, t
, a
->u
, a
->d
);
1788 tcg_temp_free_i64(t
);
1791 do_mov_z(s
, a
->rd
, a
->rn
);
1797 *** SVE Bitwise Immediate Group
1800 static bool do_zz_dbm(DisasContext
*s
, arg_rr_dbm
*a
, GVecGen2iFn
*gvec_fn
)
1803 if (!logic_imm_decode_wmask(&imm
, extract32(a
->dbm
, 12, 1),
1804 extract32(a
->dbm
, 0, 6),
1805 extract32(a
->dbm
, 6, 6))) {
1808 if (sve_access_check(s
)) {
1809 unsigned vsz
= vec_full_reg_size(s
);
1810 gvec_fn(MO_64
, vec_full_reg_offset(s
, a
->rd
),
1811 vec_full_reg_offset(s
, a
->rn
), imm
, vsz
, vsz
);
1816 static bool trans_AND_zzi(DisasContext
*s
, arg_rr_dbm
*a
, uint32_t insn
)
1818 return do_zz_dbm(s
, a
, tcg_gen_gvec_andi
);
1821 static bool trans_ORR_zzi(DisasContext
*s
, arg_rr_dbm
*a
, uint32_t insn
)
1823 return do_zz_dbm(s
, a
, tcg_gen_gvec_ori
);
1826 static bool trans_EOR_zzi(DisasContext
*s
, arg_rr_dbm
*a
, uint32_t insn
)
1828 return do_zz_dbm(s
, a
, tcg_gen_gvec_xori
);
1831 static bool trans_DUPM(DisasContext
*s
, arg_DUPM
*a
, uint32_t insn
)
1834 if (!logic_imm_decode_wmask(&imm
, extract32(a
->dbm
, 12, 1),
1835 extract32(a
->dbm
, 0, 6),
1836 extract32(a
->dbm
, 6, 6))) {
1839 if (sve_access_check(s
)) {
1840 do_dupi_z(s
, a
->rd
, imm
);
1846 *** SVE Integer Wide Immediate - Predicated Group
1849 /* Implement all merging copies. This is used for CPY (immediate),
1850 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1852 static void do_cpy_m(DisasContext
*s
, int esz
, int rd
, int rn
, int pg
,
1855 typedef void gen_cpy(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i64
, TCGv_i32
);
1856 static gen_cpy
* const fns
[4] = {
1857 gen_helper_sve_cpy_m_b
, gen_helper_sve_cpy_m_h
,
1858 gen_helper_sve_cpy_m_s
, gen_helper_sve_cpy_m_d
,
1860 unsigned vsz
= vec_full_reg_size(s
);
1861 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
1862 TCGv_ptr t_zd
= tcg_temp_new_ptr();
1863 TCGv_ptr t_zn
= tcg_temp_new_ptr();
1864 TCGv_ptr t_pg
= tcg_temp_new_ptr();
1866 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, rd
));
1867 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, rn
));
1868 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
1870 fns
[esz
](t_zd
, t_zn
, t_pg
, val
, desc
);
1872 tcg_temp_free_ptr(t_zd
);
1873 tcg_temp_free_ptr(t_zn
);
1874 tcg_temp_free_ptr(t_pg
);
1875 tcg_temp_free_i32(desc
);
1878 static bool trans_FCPY(DisasContext
*s
, arg_FCPY
*a
, uint32_t insn
)
1883 if (sve_access_check(s
)) {
1884 /* Decode the VFP immediate. */
1885 uint64_t imm
= vfp_expand_imm(a
->esz
, a
->imm
);
1886 TCGv_i64 t_imm
= tcg_const_i64(imm
);
1887 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rn
, a
->pg
, t_imm
);
1888 tcg_temp_free_i64(t_imm
);
1893 static bool trans_CPY_m_i(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
1895 if (a
->esz
== 0 && extract32(insn
, 13, 1)) {
1898 if (sve_access_check(s
)) {
1899 TCGv_i64 t_imm
= tcg_const_i64(a
->imm
);
1900 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rn
, a
->pg
, t_imm
);
1901 tcg_temp_free_i64(t_imm
);
1906 static bool trans_CPY_z_i(DisasContext
*s
, arg_CPY_z_i
*a
, uint32_t insn
)
1908 static gen_helper_gvec_2i
* const fns
[4] = {
1909 gen_helper_sve_cpy_z_b
, gen_helper_sve_cpy_z_h
,
1910 gen_helper_sve_cpy_z_s
, gen_helper_sve_cpy_z_d
,
1913 if (a
->esz
== 0 && extract32(insn
, 13, 1)) {
1916 if (sve_access_check(s
)) {
1917 unsigned vsz
= vec_full_reg_size(s
);
1918 TCGv_i64 t_imm
= tcg_const_i64(a
->imm
);
1919 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s
, a
->rd
),
1920 pred_full_reg_offset(s
, a
->pg
),
1921 t_imm
, vsz
, vsz
, 0, fns
[a
->esz
]);
1922 tcg_temp_free_i64(t_imm
);
1928 *** SVE Permute Extract Group
1931 static bool trans_EXT(DisasContext
*s
, arg_EXT
*a
, uint32_t insn
)
1933 if (!sve_access_check(s
)) {
1937 unsigned vsz
= vec_full_reg_size(s
);
1938 unsigned n_ofs
= a
->imm
>= vsz
? 0 : a
->imm
;
1939 unsigned n_siz
= vsz
- n_ofs
;
1940 unsigned d
= vec_full_reg_offset(s
, a
->rd
);
1941 unsigned n
= vec_full_reg_offset(s
, a
->rn
);
1942 unsigned m
= vec_full_reg_offset(s
, a
->rm
);
1944 /* Use host vector move insns if we have appropriate sizes
1945 * and no unfortunate overlap.
1948 && n_ofs
== size_for_gvec(n_ofs
)
1949 && n_siz
== size_for_gvec(n_siz
)
1950 && (d
!= n
|| n_siz
<= n_ofs
)) {
1951 tcg_gen_gvec_mov(0, d
, n
+ n_ofs
, n_siz
, n_siz
);
1953 tcg_gen_gvec_mov(0, d
+ n_siz
, m
, n_ofs
, n_ofs
);
1956 tcg_gen_gvec_3_ool(d
, n
, m
, vsz
, vsz
, n_ofs
, gen_helper_sve_ext
);
1962 *** SVE Permute - Unpredicated Group
1965 static bool trans_DUP_s(DisasContext
*s
, arg_DUP_s
*a
, uint32_t insn
)
1967 if (sve_access_check(s
)) {
1968 unsigned vsz
= vec_full_reg_size(s
);
1969 tcg_gen_gvec_dup_i64(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
1970 vsz
, vsz
, cpu_reg_sp(s
, a
->rn
));
1975 static bool trans_DUP_x(DisasContext
*s
, arg_DUP_x
*a
, uint32_t insn
)
1977 if ((a
->imm
& 0x1f) == 0) {
1980 if (sve_access_check(s
)) {
1981 unsigned vsz
= vec_full_reg_size(s
);
1982 unsigned dofs
= vec_full_reg_offset(s
, a
->rd
);
1983 unsigned esz
, index
;
1985 esz
= ctz32(a
->imm
);
1986 index
= a
->imm
>> (esz
+ 1);
1988 if ((index
<< esz
) < vsz
) {
1989 unsigned nofs
= vec_reg_offset(s
, a
->rn
, index
, esz
);
1990 tcg_gen_gvec_dup_mem(esz
, dofs
, nofs
, vsz
, vsz
);
1992 tcg_gen_gvec_dup64i(dofs
, vsz
, vsz
, 0);
1998 static void do_insr_i64(DisasContext
*s
, arg_rrr_esz
*a
, TCGv_i64 val
)
2000 typedef void gen_insr(TCGv_ptr
, TCGv_ptr
, TCGv_i64
, TCGv_i32
);
2001 static gen_insr
* const fns
[4] = {
2002 gen_helper_sve_insr_b
, gen_helper_sve_insr_h
,
2003 gen_helper_sve_insr_s
, gen_helper_sve_insr_d
,
2005 unsigned vsz
= vec_full_reg_size(s
);
2006 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
2007 TCGv_ptr t_zd
= tcg_temp_new_ptr();
2008 TCGv_ptr t_zn
= tcg_temp_new_ptr();
2010 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, a
->rd
));
2011 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
2013 fns
[a
->esz
](t_zd
, t_zn
, val
, desc
);
2015 tcg_temp_free_ptr(t_zd
);
2016 tcg_temp_free_ptr(t_zn
);
2017 tcg_temp_free_i32(desc
);
2020 static bool trans_INSR_f(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2022 if (sve_access_check(s
)) {
2023 TCGv_i64 t
= tcg_temp_new_i64();
2024 tcg_gen_ld_i64(t
, cpu_env
, vec_reg_offset(s
, a
->rm
, 0, MO_64
));
2025 do_insr_i64(s
, a
, t
);
2026 tcg_temp_free_i64(t
);
2031 static bool trans_INSR_r(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2033 if (sve_access_check(s
)) {
2034 do_insr_i64(s
, a
, cpu_reg(s
, a
->rm
));
2039 static bool trans_REV_v(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
2041 static gen_helper_gvec_2
* const fns
[4] = {
2042 gen_helper_sve_rev_b
, gen_helper_sve_rev_h
,
2043 gen_helper_sve_rev_s
, gen_helper_sve_rev_d
2046 if (sve_access_check(s
)) {
2047 unsigned vsz
= vec_full_reg_size(s
);
2048 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, a
->rd
),
2049 vec_full_reg_offset(s
, a
->rn
),
2050 vsz
, vsz
, 0, fns
[a
->esz
]);
2055 static bool trans_TBL(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2057 static gen_helper_gvec_3
* const fns
[4] = {
2058 gen_helper_sve_tbl_b
, gen_helper_sve_tbl_h
,
2059 gen_helper_sve_tbl_s
, gen_helper_sve_tbl_d
2062 if (sve_access_check(s
)) {
2063 unsigned vsz
= vec_full_reg_size(s
);
2064 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
2065 vec_full_reg_offset(s
, a
->rn
),
2066 vec_full_reg_offset(s
, a
->rm
),
2067 vsz
, vsz
, 0, fns
[a
->esz
]);
2072 static bool trans_UNPK(DisasContext
*s
, arg_UNPK
*a
, uint32_t insn
)
2074 static gen_helper_gvec_2
* const fns
[4][2] = {
2076 { gen_helper_sve_sunpk_h
, gen_helper_sve_uunpk_h
},
2077 { gen_helper_sve_sunpk_s
, gen_helper_sve_uunpk_s
},
2078 { gen_helper_sve_sunpk_d
, gen_helper_sve_uunpk_d
},
2084 if (sve_access_check(s
)) {
2085 unsigned vsz
= vec_full_reg_size(s
);
2086 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, a
->rd
),
2087 vec_full_reg_offset(s
, a
->rn
)
2088 + (a
->h
? vsz
/ 2 : 0),
2089 vsz
, vsz
, 0, fns
[a
->esz
][a
->u
]);
2095 *** SVE Permute - Predicates Group
2098 static bool do_perm_pred3(DisasContext
*s
, arg_rrr_esz
*a
, bool high_odd
,
2099 gen_helper_gvec_3
*fn
)
2101 if (!sve_access_check(s
)) {
2105 unsigned vsz
= pred_full_reg_size(s
);
2107 /* Predicate sizes may be smaller and cannot use simd_desc.
2108 We cannot round up, as we do elsewhere, because we need
2109 the exact size for ZIP2 and REV. We retain the style for
2110 the other helpers for consistency. */
2111 TCGv_ptr t_d
= tcg_temp_new_ptr();
2112 TCGv_ptr t_n
= tcg_temp_new_ptr();
2113 TCGv_ptr t_m
= tcg_temp_new_ptr();
2118 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
2119 desc
= deposit32(desc
, SIMD_DATA_SHIFT
+ 2, 2, high_odd
);
2121 tcg_gen_addi_ptr(t_d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2122 tcg_gen_addi_ptr(t_n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2123 tcg_gen_addi_ptr(t_m
, cpu_env
, pred_full_reg_offset(s
, a
->rm
));
2124 t_desc
= tcg_const_i32(desc
);
2126 fn(t_d
, t_n
, t_m
, t_desc
);
2128 tcg_temp_free_ptr(t_d
);
2129 tcg_temp_free_ptr(t_n
);
2130 tcg_temp_free_ptr(t_m
);
2131 tcg_temp_free_i32(t_desc
);
2135 static bool do_perm_pred2(DisasContext
*s
, arg_rr_esz
*a
, bool high_odd
,
2136 gen_helper_gvec_2
*fn
)
2138 if (!sve_access_check(s
)) {
2142 unsigned vsz
= pred_full_reg_size(s
);
2143 TCGv_ptr t_d
= tcg_temp_new_ptr();
2144 TCGv_ptr t_n
= tcg_temp_new_ptr();
2148 tcg_gen_addi_ptr(t_d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2149 tcg_gen_addi_ptr(t_n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2151 /* Predicate sizes may be smaller and cannot use simd_desc.
2152 We cannot round up, as we do elsewhere, because we need
2153 the exact size for ZIP2 and REV. We retain the style for
2154 the other helpers for consistency. */
2157 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
2158 desc
= deposit32(desc
, SIMD_DATA_SHIFT
+ 2, 2, high_odd
);
2159 t_desc
= tcg_const_i32(desc
);
2161 fn(t_d
, t_n
, t_desc
);
2163 tcg_temp_free_i32(t_desc
);
2164 tcg_temp_free_ptr(t_d
);
2165 tcg_temp_free_ptr(t_n
);
2169 static bool trans_ZIP1_p(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2171 return do_perm_pred3(s
, a
, 0, gen_helper_sve_zip_p
);
2174 static bool trans_ZIP2_p(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2176 return do_perm_pred3(s
, a
, 1, gen_helper_sve_zip_p
);
2179 static bool trans_UZP1_p(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2181 return do_perm_pred3(s
, a
, 0, gen_helper_sve_uzp_p
);
2184 static bool trans_UZP2_p(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2186 return do_perm_pred3(s
, a
, 1, gen_helper_sve_uzp_p
);
2189 static bool trans_TRN1_p(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2191 return do_perm_pred3(s
, a
, 0, gen_helper_sve_trn_p
);
2194 static bool trans_TRN2_p(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2196 return do_perm_pred3(s
, a
, 1, gen_helper_sve_trn_p
);
2199 static bool trans_REV_p(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
2201 return do_perm_pred2(s
, a
, 0, gen_helper_sve_rev_p
);
2204 static bool trans_PUNPKLO(DisasContext
*s
, arg_PUNPKLO
*a
, uint32_t insn
)
2206 return do_perm_pred2(s
, a
, 0, gen_helper_sve_punpk_p
);
2209 static bool trans_PUNPKHI(DisasContext
*s
, arg_PUNPKHI
*a
, uint32_t insn
)
2211 return do_perm_pred2(s
, a
, 1, gen_helper_sve_punpk_p
);
2215 *** SVE Permute - Interleaving Group
2218 static bool do_zip(DisasContext
*s
, arg_rrr_esz
*a
, bool high
)
2220 static gen_helper_gvec_3
* const fns
[4] = {
2221 gen_helper_sve_zip_b
, gen_helper_sve_zip_h
,
2222 gen_helper_sve_zip_s
, gen_helper_sve_zip_d
,
2225 if (sve_access_check(s
)) {
2226 unsigned vsz
= vec_full_reg_size(s
);
2227 unsigned high_ofs
= high
? vsz
/ 2 : 0;
2228 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
2229 vec_full_reg_offset(s
, a
->rn
) + high_ofs
,
2230 vec_full_reg_offset(s
, a
->rm
) + high_ofs
,
2231 vsz
, vsz
, 0, fns
[a
->esz
]);
2236 static bool do_zzz_data_ool(DisasContext
*s
, arg_rrr_esz
*a
, int data
,
2237 gen_helper_gvec_3
*fn
)
2239 if (sve_access_check(s
)) {
2240 unsigned vsz
= vec_full_reg_size(s
);
2241 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
2242 vec_full_reg_offset(s
, a
->rn
),
2243 vec_full_reg_offset(s
, a
->rm
),
2244 vsz
, vsz
, data
, fn
);
2249 static bool trans_ZIP1_z(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2251 return do_zip(s
, a
, false);
2254 static bool trans_ZIP2_z(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2256 return do_zip(s
, a
, true);
2259 static gen_helper_gvec_3
* const uzp_fns
[4] = {
2260 gen_helper_sve_uzp_b
, gen_helper_sve_uzp_h
,
2261 gen_helper_sve_uzp_s
, gen_helper_sve_uzp_d
,
2264 static bool trans_UZP1_z(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2266 return do_zzz_data_ool(s
, a
, 0, uzp_fns
[a
->esz
]);
2269 static bool trans_UZP2_z(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2271 return do_zzz_data_ool(s
, a
, 1 << a
->esz
, uzp_fns
[a
->esz
]);
2274 static gen_helper_gvec_3
* const trn_fns
[4] = {
2275 gen_helper_sve_trn_b
, gen_helper_sve_trn_h
,
2276 gen_helper_sve_trn_s
, gen_helper_sve_trn_d
,
2279 static bool trans_TRN1_z(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2281 return do_zzz_data_ool(s
, a
, 0, trn_fns
[a
->esz
]);
2284 static bool trans_TRN2_z(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2286 return do_zzz_data_ool(s
, a
, 1 << a
->esz
, trn_fns
[a
->esz
]);
2290 *** SVE Permute Vector - Predicated Group
2293 static bool trans_COMPACT(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2295 static gen_helper_gvec_3
* const fns
[4] = {
2296 NULL
, NULL
, gen_helper_sve_compact_s
, gen_helper_sve_compact_d
2298 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2301 /* Call the helper that computes the ARM LastActiveElement pseudocode
2302 * function, scaled by the element size. This includes the not found
2303 * indication; e.g. not found for esz=3 is -8.
2305 static void find_last_active(DisasContext
*s
, TCGv_i32 ret
, int esz
, int pg
)
2307 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2308 * round up, as we do elsewhere, because we need the exact size.
2310 TCGv_ptr t_p
= tcg_temp_new_ptr();
2312 unsigned vsz
= pred_full_reg_size(s
);
2316 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, esz
);
2318 tcg_gen_addi_ptr(t_p
, cpu_env
, pred_full_reg_offset(s
, pg
));
2319 t_desc
= tcg_const_i32(desc
);
2321 gen_helper_sve_last_active_element(ret
, t_p
, t_desc
);
2323 tcg_temp_free_i32(t_desc
);
2324 tcg_temp_free_ptr(t_p
);
2327 /* Increment LAST to the offset of the next element in the vector,
2328 * wrapping around to 0.
2330 static void incr_last_active(DisasContext
*s
, TCGv_i32 last
, int esz
)
2332 unsigned vsz
= vec_full_reg_size(s
);
2334 tcg_gen_addi_i32(last
, last
, 1 << esz
);
2335 if (is_power_of_2(vsz
)) {
2336 tcg_gen_andi_i32(last
, last
, vsz
- 1);
2338 TCGv_i32 max
= tcg_const_i32(vsz
);
2339 TCGv_i32 zero
= tcg_const_i32(0);
2340 tcg_gen_movcond_i32(TCG_COND_GEU
, last
, last
, max
, zero
, last
);
2341 tcg_temp_free_i32(max
);
2342 tcg_temp_free_i32(zero
);
2346 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2347 static void wrap_last_active(DisasContext
*s
, TCGv_i32 last
, int esz
)
2349 unsigned vsz
= vec_full_reg_size(s
);
2351 if (is_power_of_2(vsz
)) {
2352 tcg_gen_andi_i32(last
, last
, vsz
- 1);
2354 TCGv_i32 max
= tcg_const_i32(vsz
- (1 << esz
));
2355 TCGv_i32 zero
= tcg_const_i32(0);
2356 tcg_gen_movcond_i32(TCG_COND_LT
, last
, last
, zero
, max
, last
);
2357 tcg_temp_free_i32(max
);
2358 tcg_temp_free_i32(zero
);
2362 /* Load an unsigned element of ESZ from BASE+OFS. */
2363 static TCGv_i64
load_esz(TCGv_ptr base
, int ofs
, int esz
)
2365 TCGv_i64 r
= tcg_temp_new_i64();
2369 tcg_gen_ld8u_i64(r
, base
, ofs
);
2372 tcg_gen_ld16u_i64(r
, base
, ofs
);
2375 tcg_gen_ld32u_i64(r
, base
, ofs
);
2378 tcg_gen_ld_i64(r
, base
, ofs
);
2381 g_assert_not_reached();
2386 /* Load an unsigned element of ESZ from RM[LAST]. */
2387 static TCGv_i64
load_last_active(DisasContext
*s
, TCGv_i32 last
,
2390 TCGv_ptr p
= tcg_temp_new_ptr();
2393 /* Convert offset into vector into offset into ENV.
2394 * The final adjustment for the vector register base
2395 * is added via constant offset to the load.
2397 #ifdef HOST_WORDS_BIGENDIAN
2398 /* Adjust for element ordering. See vec_reg_offset. */
2400 tcg_gen_xori_i32(last
, last
, 8 - (1 << esz
));
2403 tcg_gen_ext_i32_ptr(p
, last
);
2404 tcg_gen_add_ptr(p
, p
, cpu_env
);
2406 r
= load_esz(p
, vec_full_reg_offset(s
, rm
), esz
);
2407 tcg_temp_free_ptr(p
);
2412 /* Compute CLAST for a Zreg. */
2413 static bool do_clast_vector(DisasContext
*s
, arg_rprr_esz
*a
, bool before
)
2418 unsigned vsz
, esz
= a
->esz
;
2420 if (!sve_access_check(s
)) {
2424 last
= tcg_temp_local_new_i32();
2425 over
= gen_new_label();
2427 find_last_active(s
, last
, esz
, a
->pg
);
2429 /* There is of course no movcond for a 2048-bit vector,
2430 * so we must branch over the actual store.
2432 tcg_gen_brcondi_i32(TCG_COND_LT
, last
, 0, over
);
2435 incr_last_active(s
, last
, esz
);
2438 ele
= load_last_active(s
, last
, a
->rm
, esz
);
2439 tcg_temp_free_i32(last
);
2441 vsz
= vec_full_reg_size(s
);
2442 tcg_gen_gvec_dup_i64(esz
, vec_full_reg_offset(s
, a
->rd
), vsz
, vsz
, ele
);
2443 tcg_temp_free_i64(ele
);
2445 /* If this insn used MOVPRFX, we may need a second move. */
2446 if (a
->rd
!= a
->rn
) {
2447 TCGLabel
*done
= gen_new_label();
2450 gen_set_label(over
);
2451 do_mov_z(s
, a
->rd
, a
->rn
);
2453 gen_set_label(done
);
2455 gen_set_label(over
);
2460 static bool trans_CLASTA_z(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
2462 return do_clast_vector(s
, a
, false);
2465 static bool trans_CLASTB_z(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
2467 return do_clast_vector(s
, a
, true);
2470 /* Compute CLAST for a scalar. */
2471 static void do_clast_scalar(DisasContext
*s
, int esz
, int pg
, int rm
,
2472 bool before
, TCGv_i64 reg_val
)
2474 TCGv_i32 last
= tcg_temp_new_i32();
2475 TCGv_i64 ele
, cmp
, zero
;
2477 find_last_active(s
, last
, esz
, pg
);
2479 /* Extend the original value of last prior to incrementing. */
2480 cmp
= tcg_temp_new_i64();
2481 tcg_gen_ext_i32_i64(cmp
, last
);
2484 incr_last_active(s
, last
, esz
);
2487 /* The conceit here is that while last < 0 indicates not found, after
2488 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2489 * from which we can load garbage. We then discard the garbage with
2490 * a conditional move.
2492 ele
= load_last_active(s
, last
, rm
, esz
);
2493 tcg_temp_free_i32(last
);
2495 zero
= tcg_const_i64(0);
2496 tcg_gen_movcond_i64(TCG_COND_GE
, reg_val
, cmp
, zero
, ele
, reg_val
);
2498 tcg_temp_free_i64(zero
);
2499 tcg_temp_free_i64(cmp
);
2500 tcg_temp_free_i64(ele
);
2503 /* Compute CLAST for a Vreg. */
2504 static bool do_clast_fp(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2506 if (sve_access_check(s
)) {
2508 int ofs
= vec_reg_offset(s
, a
->rd
, 0, esz
);
2509 TCGv_i64 reg
= load_esz(cpu_env
, ofs
, esz
);
2511 do_clast_scalar(s
, esz
, a
->pg
, a
->rn
, before
, reg
);
2512 write_fp_dreg(s
, a
->rd
, reg
);
2513 tcg_temp_free_i64(reg
);
2518 static bool trans_CLASTA_v(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2520 return do_clast_fp(s
, a
, false);
2523 static bool trans_CLASTB_v(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2525 return do_clast_fp(s
, a
, true);
2528 /* Compute CLAST for a Xreg. */
2529 static bool do_clast_general(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2533 if (!sve_access_check(s
)) {
2537 reg
= cpu_reg(s
, a
->rd
);
2540 tcg_gen_ext8u_i64(reg
, reg
);
2543 tcg_gen_ext16u_i64(reg
, reg
);
2546 tcg_gen_ext32u_i64(reg
, reg
);
2551 g_assert_not_reached();
2554 do_clast_scalar(s
, a
->esz
, a
->pg
, a
->rn
, before
, reg
);
2558 static bool trans_CLASTA_r(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2560 return do_clast_general(s
, a
, false);
2563 static bool trans_CLASTB_r(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2565 return do_clast_general(s
, a
, true);
2568 /* Compute LAST for a scalar. */
2569 static TCGv_i64
do_last_scalar(DisasContext
*s
, int esz
,
2570 int pg
, int rm
, bool before
)
2572 TCGv_i32 last
= tcg_temp_new_i32();
2575 find_last_active(s
, last
, esz
, pg
);
2577 wrap_last_active(s
, last
, esz
);
2579 incr_last_active(s
, last
, esz
);
2582 ret
= load_last_active(s
, last
, rm
, esz
);
2583 tcg_temp_free_i32(last
);
2587 /* Compute LAST for a Vreg. */
2588 static bool do_last_fp(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2590 if (sve_access_check(s
)) {
2591 TCGv_i64 val
= do_last_scalar(s
, a
->esz
, a
->pg
, a
->rn
, before
);
2592 write_fp_dreg(s
, a
->rd
, val
);
2593 tcg_temp_free_i64(val
);
2598 static bool trans_LASTA_v(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2600 return do_last_fp(s
, a
, false);
2603 static bool trans_LASTB_v(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2605 return do_last_fp(s
, a
, true);
2608 /* Compute LAST for a Xreg. */
2609 static bool do_last_general(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2611 if (sve_access_check(s
)) {
2612 TCGv_i64 val
= do_last_scalar(s
, a
->esz
, a
->pg
, a
->rn
, before
);
2613 tcg_gen_mov_i64(cpu_reg(s
, a
->rd
), val
);
2614 tcg_temp_free_i64(val
);
2619 static bool trans_LASTA_r(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2621 return do_last_general(s
, a
, false);
2624 static bool trans_LASTB_r(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2626 return do_last_general(s
, a
, true);
2629 static bool trans_CPY_m_r(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2631 if (sve_access_check(s
)) {
2632 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rd
, a
->pg
, cpu_reg_sp(s
, a
->rn
));
2637 static bool trans_CPY_m_v(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2639 if (sve_access_check(s
)) {
2640 int ofs
= vec_reg_offset(s
, a
->rn
, 0, a
->esz
);
2641 TCGv_i64 t
= load_esz(cpu_env
, ofs
, a
->esz
);
2642 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rd
, a
->pg
, t
);
2643 tcg_temp_free_i64(t
);
2648 static bool trans_REVB(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2650 static gen_helper_gvec_3
* const fns
[4] = {
2652 gen_helper_sve_revb_h
,
2653 gen_helper_sve_revb_s
,
2654 gen_helper_sve_revb_d
,
2656 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2659 static bool trans_REVH(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2661 static gen_helper_gvec_3
* const fns
[4] = {
2664 gen_helper_sve_revh_s
,
2665 gen_helper_sve_revh_d
,
2667 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2670 static bool trans_REVW(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2672 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_revw_d
: NULL
);
2675 static bool trans_RBIT(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2677 static gen_helper_gvec_3
* const fns
[4] = {
2678 gen_helper_sve_rbit_b
,
2679 gen_helper_sve_rbit_h
,
2680 gen_helper_sve_rbit_s
,
2681 gen_helper_sve_rbit_d
,
2683 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2686 static bool trans_SPLICE(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
2688 if (sve_access_check(s
)) {
2689 unsigned vsz
= vec_full_reg_size(s
);
2690 tcg_gen_gvec_4_ool(vec_full_reg_offset(s
, a
->rd
),
2691 vec_full_reg_offset(s
, a
->rn
),
2692 vec_full_reg_offset(s
, a
->rm
),
2693 pred_full_reg_offset(s
, a
->pg
),
2694 vsz
, vsz
, a
->esz
, gen_helper_sve_splice
);
2700 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
2703 /* Subroutine loading a vector register at VOFS of LEN bytes.
2704 * The load should begin at the address Rn + IMM.
2707 static void do_ldr(DisasContext
*s
, uint32_t vofs
, uint32_t len
,
2710 uint32_t len_align
= QEMU_ALIGN_DOWN(len
, 8);
2711 uint32_t len_remain
= len
% 8;
2712 uint32_t nparts
= len
/ 8 + ctpop8(len_remain
);
2713 int midx
= get_mem_index(s
);
2714 TCGv_i64 addr
, t0
, t1
;
2716 addr
= tcg_temp_new_i64();
2717 t0
= tcg_temp_new_i64();
2719 /* Note that unpredicated load/store of vector/predicate registers
2720 * are defined as a stream of bytes, which equates to little-endian
2721 * operations on larger quantities. There is no nice way to force
2722 * a little-endian load for aarch64_be-linux-user out of line.
2724 * Attempt to keep code expansion to a minimum by limiting the
2725 * amount of unrolling done.
2730 for (i
= 0; i
< len_align
; i
+= 8) {
2731 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ i
);
2732 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LEQ
);
2733 tcg_gen_st_i64(t0
, cpu_env
, vofs
+ i
);
2736 TCGLabel
*loop
= gen_new_label();
2737 TCGv_ptr tp
, i
= tcg_const_local_ptr(0);
2739 gen_set_label(loop
);
2741 /* Minimize the number of local temps that must be re-read from
2742 * the stack each iteration. Instead, re-compute values other
2743 * than the loop counter.
2745 tp
= tcg_temp_new_ptr();
2746 tcg_gen_addi_ptr(tp
, i
, imm
);
2747 tcg_gen_extu_ptr_i64(addr
, tp
);
2748 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, rn
));
2750 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LEQ
);
2752 tcg_gen_add_ptr(tp
, cpu_env
, i
);
2753 tcg_gen_addi_ptr(i
, i
, 8);
2754 tcg_gen_st_i64(t0
, tp
, vofs
);
2755 tcg_temp_free_ptr(tp
);
2757 tcg_gen_brcondi_ptr(TCG_COND_LTU
, i
, len_align
, loop
);
2758 tcg_temp_free_ptr(i
);
2761 /* Predicate register loads can be any multiple of 2.
2762 * Note that we still store the entire 64-bit unit into cpu_env.
2765 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ len_align
);
2767 switch (len_remain
) {
2771 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LE
| ctz32(len_remain
));
2775 t1
= tcg_temp_new_i64();
2776 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LEUL
);
2777 tcg_gen_addi_i64(addr
, addr
, 4);
2778 tcg_gen_qemu_ld_i64(t1
, addr
, midx
, MO_LEUW
);
2779 tcg_gen_deposit_i64(t0
, t0
, t1
, 32, 32);
2780 tcg_temp_free_i64(t1
);
2784 g_assert_not_reached();
2786 tcg_gen_st_i64(t0
, cpu_env
, vofs
+ len_align
);
2788 tcg_temp_free_i64(addr
);
2789 tcg_temp_free_i64(t0
);
2792 static bool trans_LDR_zri(DisasContext
*s
, arg_rri
*a
, uint32_t insn
)
2794 if (sve_access_check(s
)) {
2795 int size
= vec_full_reg_size(s
);
2796 int off
= vec_full_reg_offset(s
, a
->rd
);
2797 do_ldr(s
, off
, size
, a
->rn
, a
->imm
* size
);
2802 static bool trans_LDR_pri(DisasContext
*s
, arg_rri
*a
, uint32_t insn
)
2804 if (sve_access_check(s
)) {
2805 int size
= pred_full_reg_size(s
);
2806 int off
= pred_full_reg_offset(s
, a
->rd
);
2807 do_ldr(s
, off
, size
, a
->rn
, a
->imm
* size
);