hw/arm/aspeed: Rename AspeedBoardState as AspeedMachineState
[qemu/ar7.git] / target / arm / translate-sve.c
blobac7b3119e5fa1408fe9198c332bedcac33bc8cbb
1 /*
2 * AArch64 SVE translation
4 * Copyright (c) 2018 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg/tcg-op.h"
24 #include "tcg/tcg-op-gvec.h"
25 #include "tcg/tcg-gvec-desc.h"
26 #include "qemu/log.h"
27 #include "arm_ldst.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
32 #include "exec/log.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
35 #include "fpu/softfloat.h"
38 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
39 TCGv_i64, uint32_t, uint32_t);
41 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
42 TCGv_ptr, TCGv_i32);
43 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
44 TCGv_ptr, TCGv_ptr, TCGv_i32);
46 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
47 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
48 TCGv_ptr, TCGv_i64, TCGv_i32);
51 * Helpers for extracting complex instruction fields.
54 /* See e.g. ASR (immediate, predicated).
55 * Returns -1 for unallocated encoding; diagnose later.
57 static int tszimm_esz(DisasContext *s, int x)
59 x >>= 3; /* discard imm3 */
60 return 31 - clz32(x);
63 static int tszimm_shr(DisasContext *s, int x)
65 return (16 << tszimm_esz(s, x)) - x;
68 /* See e.g. LSL (immediate, predicated). */
69 static int tszimm_shl(DisasContext *s, int x)
71 return x - (8 << tszimm_esz(s, x));
74 static inline int plus1(DisasContext *s, int x)
76 return x + 1;
79 /* The SH bit is in bit 8. Extract the low 8 and shift. */
80 static inline int expand_imm_sh8s(DisasContext *s, int x)
82 return (int8_t)x << (x & 0x100 ? 8 : 0);
85 static inline int expand_imm_sh8u(DisasContext *s, int x)
87 return (uint8_t)x << (x & 0x100 ? 8 : 0);
90 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
91 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
93 static inline int msz_dtype(DisasContext *s, int msz)
95 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
96 return dtype[msz];
100 * Include the generated decoder.
103 #include "decode-sve.inc.c"
106 * Implement all of the translator functions referenced by the decoder.
109 /* Return the offset info CPUARMState of the predicate vector register Pn.
110 * Note for this purpose, FFR is P16.
112 static inline int pred_full_reg_offset(DisasContext *s, int regno)
114 return offsetof(CPUARMState, vfp.pregs[regno]);
117 /* Return the byte size of the whole predicate register, VL / 64. */
118 static inline int pred_full_reg_size(DisasContext *s)
120 return s->sve_len >> 3;
123 /* Round up the size of a register to a size allowed by
124 * the tcg vector infrastructure. Any operation which uses this
125 * size may assume that the bits above pred_full_reg_size are zero,
126 * and must leave them the same way.
128 * Note that this is not needed for the vector registers as they
129 * are always properly sized for tcg vectors.
131 static int size_for_gvec(int size)
133 if (size <= 8) {
134 return 8;
135 } else {
136 return QEMU_ALIGN_UP(size, 16);
140 static int pred_gvec_reg_size(DisasContext *s)
142 return size_for_gvec(pred_full_reg_size(s));
145 /* Invoke a vector expander on two Zregs. */
146 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
147 int esz, int rd, int rn)
149 if (sve_access_check(s)) {
150 unsigned vsz = vec_full_reg_size(s);
151 gvec_fn(esz, vec_full_reg_offset(s, rd),
152 vec_full_reg_offset(s, rn), vsz, vsz);
154 return true;
157 /* Invoke a vector expander on three Zregs. */
158 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
159 int esz, int rd, int rn, int rm)
161 if (sve_access_check(s)) {
162 unsigned vsz = vec_full_reg_size(s);
163 gvec_fn(esz, vec_full_reg_offset(s, rd),
164 vec_full_reg_offset(s, rn),
165 vec_full_reg_offset(s, rm), vsz, vsz);
167 return true;
170 /* Invoke a vector move on two Zregs. */
171 static bool do_mov_z(DisasContext *s, int rd, int rn)
173 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
176 /* Initialize a Zreg with replications of a 64-bit immediate. */
177 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
179 unsigned vsz = vec_full_reg_size(s);
180 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
183 /* Invoke a vector expander on two Pregs. */
184 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
185 int esz, int rd, int rn)
187 if (sve_access_check(s)) {
188 unsigned psz = pred_gvec_reg_size(s);
189 gvec_fn(esz, pred_full_reg_offset(s, rd),
190 pred_full_reg_offset(s, rn), psz, psz);
192 return true;
195 /* Invoke a vector expander on three Pregs. */
196 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
197 int esz, int rd, int rn, int rm)
199 if (sve_access_check(s)) {
200 unsigned psz = pred_gvec_reg_size(s);
201 gvec_fn(esz, pred_full_reg_offset(s, rd),
202 pred_full_reg_offset(s, rn),
203 pred_full_reg_offset(s, rm), psz, psz);
205 return true;
208 /* Invoke a vector operation on four Pregs. */
209 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
210 int rd, int rn, int rm, int rg)
212 if (sve_access_check(s)) {
213 unsigned psz = pred_gvec_reg_size(s);
214 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
215 pred_full_reg_offset(s, rn),
216 pred_full_reg_offset(s, rm),
217 pred_full_reg_offset(s, rg),
218 psz, psz, gvec_op);
220 return true;
223 /* Invoke a vector move on two Pregs. */
224 static bool do_mov_p(DisasContext *s, int rd, int rn)
226 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
229 /* Set the cpu flags as per a return from an SVE helper. */
230 static void do_pred_flags(TCGv_i32 t)
232 tcg_gen_mov_i32(cpu_NF, t);
233 tcg_gen_andi_i32(cpu_ZF, t, 2);
234 tcg_gen_andi_i32(cpu_CF, t, 1);
235 tcg_gen_movi_i32(cpu_VF, 0);
238 /* Subroutines computing the ARM PredTest psuedofunction. */
239 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
241 TCGv_i32 t = tcg_temp_new_i32();
243 gen_helper_sve_predtest1(t, d, g);
244 do_pred_flags(t);
245 tcg_temp_free_i32(t);
248 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
250 TCGv_ptr dptr = tcg_temp_new_ptr();
251 TCGv_ptr gptr = tcg_temp_new_ptr();
252 TCGv_i32 t;
254 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
255 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
256 t = tcg_const_i32(words);
258 gen_helper_sve_predtest(t, dptr, gptr, t);
259 tcg_temp_free_ptr(dptr);
260 tcg_temp_free_ptr(gptr);
262 do_pred_flags(t);
263 tcg_temp_free_i32(t);
266 /* For each element size, the bits within a predicate word that are active. */
267 const uint64_t pred_esz_masks[4] = {
268 0xffffffffffffffffull, 0x5555555555555555ull,
269 0x1111111111111111ull, 0x0101010101010101ull
273 *** SVE Logical - Unpredicated Group
276 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
278 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
281 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
283 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
286 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
288 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
291 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
293 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
297 *** SVE Integer Arithmetic - Unpredicated Group
300 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
302 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
305 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
307 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
310 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
312 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
315 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
317 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
320 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
322 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
325 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
327 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
331 *** SVE Integer Arithmetic - Binary Predicated Group
334 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
336 unsigned vsz = vec_full_reg_size(s);
337 if (fn == NULL) {
338 return false;
340 if (sve_access_check(s)) {
341 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
342 vec_full_reg_offset(s, a->rn),
343 vec_full_reg_offset(s, a->rm),
344 pred_full_reg_offset(s, a->pg),
345 vsz, vsz, 0, fn);
347 return true;
350 /* Select active elememnts from Zn and inactive elements from Zm,
351 * storing the result in Zd.
353 static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
355 static gen_helper_gvec_4 * const fns[4] = {
356 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
357 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
359 unsigned vsz = vec_full_reg_size(s);
360 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
361 vec_full_reg_offset(s, rn),
362 vec_full_reg_offset(s, rm),
363 pred_full_reg_offset(s, pg),
364 vsz, vsz, 0, fns[esz]);
367 #define DO_ZPZZ(NAME, name) \
368 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a) \
370 static gen_helper_gvec_4 * const fns[4] = { \
371 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
372 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
373 }; \
374 return do_zpzz_ool(s, a, fns[a->esz]); \
377 DO_ZPZZ(AND, and)
378 DO_ZPZZ(EOR, eor)
379 DO_ZPZZ(ORR, orr)
380 DO_ZPZZ(BIC, bic)
382 DO_ZPZZ(ADD, add)
383 DO_ZPZZ(SUB, sub)
385 DO_ZPZZ(SMAX, smax)
386 DO_ZPZZ(UMAX, umax)
387 DO_ZPZZ(SMIN, smin)
388 DO_ZPZZ(UMIN, umin)
389 DO_ZPZZ(SABD, sabd)
390 DO_ZPZZ(UABD, uabd)
392 DO_ZPZZ(MUL, mul)
393 DO_ZPZZ(SMULH, smulh)
394 DO_ZPZZ(UMULH, umulh)
396 DO_ZPZZ(ASR, asr)
397 DO_ZPZZ(LSR, lsr)
398 DO_ZPZZ(LSL, lsl)
400 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
402 static gen_helper_gvec_4 * const fns[4] = {
403 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
405 return do_zpzz_ool(s, a, fns[a->esz]);
408 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
410 static gen_helper_gvec_4 * const fns[4] = {
411 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
413 return do_zpzz_ool(s, a, fns[a->esz]);
416 static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
418 if (sve_access_check(s)) {
419 do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
421 return true;
424 #undef DO_ZPZZ
427 *** SVE Integer Arithmetic - Unary Predicated Group
430 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
432 if (fn == NULL) {
433 return false;
435 if (sve_access_check(s)) {
436 unsigned vsz = vec_full_reg_size(s);
437 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
438 vec_full_reg_offset(s, a->rn),
439 pred_full_reg_offset(s, a->pg),
440 vsz, vsz, 0, fn);
442 return true;
445 #define DO_ZPZ(NAME, name) \
446 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
448 static gen_helper_gvec_3 * const fns[4] = { \
449 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
450 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
451 }; \
452 return do_zpz_ool(s, a, fns[a->esz]); \
455 DO_ZPZ(CLS, cls)
456 DO_ZPZ(CLZ, clz)
457 DO_ZPZ(CNT_zpz, cnt_zpz)
458 DO_ZPZ(CNOT, cnot)
459 DO_ZPZ(NOT_zpz, not_zpz)
460 DO_ZPZ(ABS, abs)
461 DO_ZPZ(NEG, neg)
463 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
465 static gen_helper_gvec_3 * const fns[4] = {
466 NULL,
467 gen_helper_sve_fabs_h,
468 gen_helper_sve_fabs_s,
469 gen_helper_sve_fabs_d
471 return do_zpz_ool(s, a, fns[a->esz]);
474 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
476 static gen_helper_gvec_3 * const fns[4] = {
477 NULL,
478 gen_helper_sve_fneg_h,
479 gen_helper_sve_fneg_s,
480 gen_helper_sve_fneg_d
482 return do_zpz_ool(s, a, fns[a->esz]);
485 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
487 static gen_helper_gvec_3 * const fns[4] = {
488 NULL,
489 gen_helper_sve_sxtb_h,
490 gen_helper_sve_sxtb_s,
491 gen_helper_sve_sxtb_d
493 return do_zpz_ool(s, a, fns[a->esz]);
496 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
498 static gen_helper_gvec_3 * const fns[4] = {
499 NULL,
500 gen_helper_sve_uxtb_h,
501 gen_helper_sve_uxtb_s,
502 gen_helper_sve_uxtb_d
504 return do_zpz_ool(s, a, fns[a->esz]);
507 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
509 static gen_helper_gvec_3 * const fns[4] = {
510 NULL, NULL,
511 gen_helper_sve_sxth_s,
512 gen_helper_sve_sxth_d
514 return do_zpz_ool(s, a, fns[a->esz]);
517 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
519 static gen_helper_gvec_3 * const fns[4] = {
520 NULL, NULL,
521 gen_helper_sve_uxth_s,
522 gen_helper_sve_uxth_d
524 return do_zpz_ool(s, a, fns[a->esz]);
527 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
529 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
532 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
534 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
537 #undef DO_ZPZ
540 *** SVE Integer Reduction Group
543 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
544 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
545 gen_helper_gvec_reduc *fn)
547 unsigned vsz = vec_full_reg_size(s);
548 TCGv_ptr t_zn, t_pg;
549 TCGv_i32 desc;
550 TCGv_i64 temp;
552 if (fn == NULL) {
553 return false;
555 if (!sve_access_check(s)) {
556 return true;
559 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
560 temp = tcg_temp_new_i64();
561 t_zn = tcg_temp_new_ptr();
562 t_pg = tcg_temp_new_ptr();
564 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
565 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
566 fn(temp, t_zn, t_pg, desc);
567 tcg_temp_free_ptr(t_zn);
568 tcg_temp_free_ptr(t_pg);
569 tcg_temp_free_i32(desc);
571 write_fp_dreg(s, a->rd, temp);
572 tcg_temp_free_i64(temp);
573 return true;
576 #define DO_VPZ(NAME, name) \
577 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
579 static gen_helper_gvec_reduc * const fns[4] = { \
580 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
581 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
582 }; \
583 return do_vpz_ool(s, a, fns[a->esz]); \
586 DO_VPZ(ORV, orv)
587 DO_VPZ(ANDV, andv)
588 DO_VPZ(EORV, eorv)
590 DO_VPZ(UADDV, uaddv)
591 DO_VPZ(SMAXV, smaxv)
592 DO_VPZ(UMAXV, umaxv)
593 DO_VPZ(SMINV, sminv)
594 DO_VPZ(UMINV, uminv)
596 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
598 static gen_helper_gvec_reduc * const fns[4] = {
599 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
600 gen_helper_sve_saddv_s, NULL
602 return do_vpz_ool(s, a, fns[a->esz]);
605 #undef DO_VPZ
608 *** SVE Shift by Immediate - Predicated Group
611 /* Store zero into every active element of Zd. We will use this for two
612 * and three-operand predicated instructions for which logic dictates a
613 * zero result.
615 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
617 static gen_helper_gvec_2 * const fns[4] = {
618 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
619 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
621 if (sve_access_check(s)) {
622 unsigned vsz = vec_full_reg_size(s);
623 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
624 pred_full_reg_offset(s, pg),
625 vsz, vsz, 0, fns[esz]);
627 return true;
630 /* Copy Zn into Zd, storing zeros into inactive elements. */
631 static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
633 static gen_helper_gvec_3 * const fns[4] = {
634 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
635 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
637 unsigned vsz = vec_full_reg_size(s);
638 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
639 vec_full_reg_offset(s, rn),
640 pred_full_reg_offset(s, pg),
641 vsz, vsz, 0, fns[esz]);
644 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
645 gen_helper_gvec_3 *fn)
647 if (sve_access_check(s)) {
648 unsigned vsz = vec_full_reg_size(s);
649 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
650 vec_full_reg_offset(s, a->rn),
651 pred_full_reg_offset(s, a->pg),
652 vsz, vsz, a->imm, fn);
654 return true;
657 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
659 static gen_helper_gvec_3 * const fns[4] = {
660 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
661 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
663 if (a->esz < 0) {
664 /* Invalid tsz encoding -- see tszimm_esz. */
665 return false;
667 /* Shift by element size is architecturally valid. For
668 arithmetic right-shift, it's the same as by one less. */
669 a->imm = MIN(a->imm, (8 << a->esz) - 1);
670 return do_zpzi_ool(s, a, fns[a->esz]);
673 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
675 static gen_helper_gvec_3 * const fns[4] = {
676 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
677 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
679 if (a->esz < 0) {
680 return false;
682 /* Shift by element size is architecturally valid.
683 For logical shifts, it is a zeroing operation. */
684 if (a->imm >= (8 << a->esz)) {
685 return do_clr_zp(s, a->rd, a->pg, a->esz);
686 } else {
687 return do_zpzi_ool(s, a, fns[a->esz]);
691 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
693 static gen_helper_gvec_3 * const fns[4] = {
694 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
695 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
697 if (a->esz < 0) {
698 return false;
700 /* Shift by element size is architecturally valid.
701 For logical shifts, it is a zeroing operation. */
702 if (a->imm >= (8 << a->esz)) {
703 return do_clr_zp(s, a->rd, a->pg, a->esz);
704 } else {
705 return do_zpzi_ool(s, a, fns[a->esz]);
709 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
711 static gen_helper_gvec_3 * const fns[4] = {
712 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
713 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
715 if (a->esz < 0) {
716 return false;
718 /* Shift by element size is architecturally valid. For arithmetic
719 right shift for division, it is a zeroing operation. */
720 if (a->imm >= (8 << a->esz)) {
721 return do_clr_zp(s, a->rd, a->pg, a->esz);
722 } else {
723 return do_zpzi_ool(s, a, fns[a->esz]);
728 *** SVE Bitwise Shift - Predicated Group
731 #define DO_ZPZW(NAME, name) \
732 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a) \
734 static gen_helper_gvec_4 * const fns[3] = { \
735 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
736 gen_helper_sve_##name##_zpzw_s, \
737 }; \
738 if (a->esz < 0 || a->esz >= 3) { \
739 return false; \
741 return do_zpzz_ool(s, a, fns[a->esz]); \
744 DO_ZPZW(ASR, asr)
745 DO_ZPZW(LSR, lsr)
746 DO_ZPZW(LSL, lsl)
748 #undef DO_ZPZW
751 *** SVE Bitwise Shift - Unpredicated Group
754 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
755 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
756 int64_t, uint32_t, uint32_t))
758 if (a->esz < 0) {
759 /* Invalid tsz encoding -- see tszimm_esz. */
760 return false;
762 if (sve_access_check(s)) {
763 unsigned vsz = vec_full_reg_size(s);
764 /* Shift by element size is architecturally valid. For
765 arithmetic right-shift, it's the same as by one less.
766 Otherwise it is a zeroing operation. */
767 if (a->imm >= 8 << a->esz) {
768 if (asr) {
769 a->imm = (8 << a->esz) - 1;
770 } else {
771 do_dupi_z(s, a->rd, 0);
772 return true;
775 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
776 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
778 return true;
781 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
783 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
786 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
788 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
791 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
793 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
796 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
798 if (fn == NULL) {
799 return false;
801 if (sve_access_check(s)) {
802 unsigned vsz = vec_full_reg_size(s);
803 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
804 vec_full_reg_offset(s, a->rn),
805 vec_full_reg_offset(s, a->rm),
806 vsz, vsz, 0, fn);
808 return true;
811 #define DO_ZZW(NAME, name) \
812 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a) \
814 static gen_helper_gvec_3 * const fns[4] = { \
815 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
816 gen_helper_sve_##name##_zzw_s, NULL \
817 }; \
818 return do_zzw_ool(s, a, fns[a->esz]); \
821 DO_ZZW(ASR, asr)
822 DO_ZZW(LSR, lsr)
823 DO_ZZW(LSL, lsl)
825 #undef DO_ZZW
828 *** SVE Integer Multiply-Add Group
831 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
832 gen_helper_gvec_5 *fn)
834 if (sve_access_check(s)) {
835 unsigned vsz = vec_full_reg_size(s);
836 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
837 vec_full_reg_offset(s, a->ra),
838 vec_full_reg_offset(s, a->rn),
839 vec_full_reg_offset(s, a->rm),
840 pred_full_reg_offset(s, a->pg),
841 vsz, vsz, 0, fn);
843 return true;
846 #define DO_ZPZZZ(NAME, name) \
847 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
849 static gen_helper_gvec_5 * const fns[4] = { \
850 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
851 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
852 }; \
853 return do_zpzzz_ool(s, a, fns[a->esz]); \
856 DO_ZPZZZ(MLA, mla)
857 DO_ZPZZZ(MLS, mls)
859 #undef DO_ZPZZZ
862 *** SVE Index Generation Group
865 static void do_index(DisasContext *s, int esz, int rd,
866 TCGv_i64 start, TCGv_i64 incr)
868 unsigned vsz = vec_full_reg_size(s);
869 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
870 TCGv_ptr t_zd = tcg_temp_new_ptr();
872 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
873 if (esz == 3) {
874 gen_helper_sve_index_d(t_zd, start, incr, desc);
875 } else {
876 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
877 static index_fn * const fns[3] = {
878 gen_helper_sve_index_b,
879 gen_helper_sve_index_h,
880 gen_helper_sve_index_s,
882 TCGv_i32 s32 = tcg_temp_new_i32();
883 TCGv_i32 i32 = tcg_temp_new_i32();
885 tcg_gen_extrl_i64_i32(s32, start);
886 tcg_gen_extrl_i64_i32(i32, incr);
887 fns[esz](t_zd, s32, i32, desc);
889 tcg_temp_free_i32(s32);
890 tcg_temp_free_i32(i32);
892 tcg_temp_free_ptr(t_zd);
893 tcg_temp_free_i32(desc);
896 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
898 if (sve_access_check(s)) {
899 TCGv_i64 start = tcg_const_i64(a->imm1);
900 TCGv_i64 incr = tcg_const_i64(a->imm2);
901 do_index(s, a->esz, a->rd, start, incr);
902 tcg_temp_free_i64(start);
903 tcg_temp_free_i64(incr);
905 return true;
908 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
910 if (sve_access_check(s)) {
911 TCGv_i64 start = tcg_const_i64(a->imm);
912 TCGv_i64 incr = cpu_reg(s, a->rm);
913 do_index(s, a->esz, a->rd, start, incr);
914 tcg_temp_free_i64(start);
916 return true;
919 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
921 if (sve_access_check(s)) {
922 TCGv_i64 start = cpu_reg(s, a->rn);
923 TCGv_i64 incr = tcg_const_i64(a->imm);
924 do_index(s, a->esz, a->rd, start, incr);
925 tcg_temp_free_i64(incr);
927 return true;
930 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
932 if (sve_access_check(s)) {
933 TCGv_i64 start = cpu_reg(s, a->rn);
934 TCGv_i64 incr = cpu_reg(s, a->rm);
935 do_index(s, a->esz, a->rd, start, incr);
937 return true;
941 *** SVE Stack Allocation Group
944 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
946 if (sve_access_check(s)) {
947 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
948 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
949 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
951 return true;
954 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
956 if (sve_access_check(s)) {
957 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
958 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
959 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
961 return true;
964 static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
966 if (sve_access_check(s)) {
967 TCGv_i64 reg = cpu_reg(s, a->rd);
968 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
970 return true;
974 *** SVE Compute Vector Address Group
977 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
979 if (sve_access_check(s)) {
980 unsigned vsz = vec_full_reg_size(s);
981 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
982 vec_full_reg_offset(s, a->rn),
983 vec_full_reg_offset(s, a->rm),
984 vsz, vsz, a->imm, fn);
986 return true;
989 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
991 return do_adr(s, a, gen_helper_sve_adr_p32);
994 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
996 return do_adr(s, a, gen_helper_sve_adr_p64);
999 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
1001 return do_adr(s, a, gen_helper_sve_adr_s32);
1004 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
1006 return do_adr(s, a, gen_helper_sve_adr_u32);
1010 *** SVE Integer Misc - Unpredicated Group
1013 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
1015 static gen_helper_gvec_2 * const fns[4] = {
1016 NULL,
1017 gen_helper_sve_fexpa_h,
1018 gen_helper_sve_fexpa_s,
1019 gen_helper_sve_fexpa_d,
1021 if (a->esz == 0) {
1022 return false;
1024 if (sve_access_check(s)) {
1025 unsigned vsz = vec_full_reg_size(s);
1026 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1027 vec_full_reg_offset(s, a->rn),
1028 vsz, vsz, 0, fns[a->esz]);
1030 return true;
1033 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
1035 static gen_helper_gvec_3 * const fns[4] = {
1036 NULL,
1037 gen_helper_sve_ftssel_h,
1038 gen_helper_sve_ftssel_s,
1039 gen_helper_sve_ftssel_d,
1041 if (a->esz == 0) {
1042 return false;
1044 if (sve_access_check(s)) {
1045 unsigned vsz = vec_full_reg_size(s);
1046 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1047 vec_full_reg_offset(s, a->rn),
1048 vec_full_reg_offset(s, a->rm),
1049 vsz, vsz, 0, fns[a->esz]);
1051 return true;
1055 *** SVE Predicate Logical Operations Group
1058 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1059 const GVecGen4 *gvec_op)
1061 if (!sve_access_check(s)) {
1062 return true;
1065 unsigned psz = pred_gvec_reg_size(s);
1066 int dofs = pred_full_reg_offset(s, a->rd);
1067 int nofs = pred_full_reg_offset(s, a->rn);
1068 int mofs = pred_full_reg_offset(s, a->rm);
1069 int gofs = pred_full_reg_offset(s, a->pg);
1071 if (psz == 8) {
1072 /* Do the operation and the flags generation in temps. */
1073 TCGv_i64 pd = tcg_temp_new_i64();
1074 TCGv_i64 pn = tcg_temp_new_i64();
1075 TCGv_i64 pm = tcg_temp_new_i64();
1076 TCGv_i64 pg = tcg_temp_new_i64();
1078 tcg_gen_ld_i64(pn, cpu_env, nofs);
1079 tcg_gen_ld_i64(pm, cpu_env, mofs);
1080 tcg_gen_ld_i64(pg, cpu_env, gofs);
1082 gvec_op->fni8(pd, pn, pm, pg);
1083 tcg_gen_st_i64(pd, cpu_env, dofs);
1085 do_predtest1(pd, pg);
1087 tcg_temp_free_i64(pd);
1088 tcg_temp_free_i64(pn);
1089 tcg_temp_free_i64(pm);
1090 tcg_temp_free_i64(pg);
1091 } else {
1092 /* The operation and flags generation is large. The computation
1093 * of the flags depends on the original contents of the guarding
1094 * predicate. If the destination overwrites the guarding predicate,
1095 * then the easiest way to get this right is to save a copy.
1097 int tofs = gofs;
1098 if (a->rd == a->pg) {
1099 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1100 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1103 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1104 do_predtest(s, dofs, tofs, psz / 8);
1106 return true;
1109 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1111 tcg_gen_and_i64(pd, pn, pm);
1112 tcg_gen_and_i64(pd, pd, pg);
1115 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1116 TCGv_vec pm, TCGv_vec pg)
1118 tcg_gen_and_vec(vece, pd, pn, pm);
1119 tcg_gen_and_vec(vece, pd, pd, pg);
1122 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
1124 static const GVecGen4 op = {
1125 .fni8 = gen_and_pg_i64,
1126 .fniv = gen_and_pg_vec,
1127 .fno = gen_helper_sve_and_pppp,
1128 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1130 if (a->s) {
1131 return do_pppp_flags(s, a, &op);
1132 } else if (a->rn == a->rm) {
1133 if (a->pg == a->rn) {
1134 return do_mov_p(s, a->rd, a->rn);
1135 } else {
1136 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1138 } else if (a->pg == a->rn || a->pg == a->rm) {
1139 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1140 } else {
1141 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1145 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1147 tcg_gen_andc_i64(pd, pn, pm);
1148 tcg_gen_and_i64(pd, pd, pg);
1151 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1152 TCGv_vec pm, TCGv_vec pg)
1154 tcg_gen_andc_vec(vece, pd, pn, pm);
1155 tcg_gen_and_vec(vece, pd, pd, pg);
1158 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
1160 static const GVecGen4 op = {
1161 .fni8 = gen_bic_pg_i64,
1162 .fniv = gen_bic_pg_vec,
1163 .fno = gen_helper_sve_bic_pppp,
1164 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1166 if (a->s) {
1167 return do_pppp_flags(s, a, &op);
1168 } else if (a->pg == a->rn) {
1169 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1170 } else {
1171 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1175 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1177 tcg_gen_xor_i64(pd, pn, pm);
1178 tcg_gen_and_i64(pd, pd, pg);
1181 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1182 TCGv_vec pm, TCGv_vec pg)
1184 tcg_gen_xor_vec(vece, pd, pn, pm);
1185 tcg_gen_and_vec(vece, pd, pd, pg);
1188 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
1190 static const GVecGen4 op = {
1191 .fni8 = gen_eor_pg_i64,
1192 .fniv = gen_eor_pg_vec,
1193 .fno = gen_helper_sve_eor_pppp,
1194 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1196 if (a->s) {
1197 return do_pppp_flags(s, a, &op);
1198 } else {
1199 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1203 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1205 tcg_gen_and_i64(pn, pn, pg);
1206 tcg_gen_andc_i64(pm, pm, pg);
1207 tcg_gen_or_i64(pd, pn, pm);
1210 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1211 TCGv_vec pm, TCGv_vec pg)
1213 tcg_gen_and_vec(vece, pn, pn, pg);
1214 tcg_gen_andc_vec(vece, pm, pm, pg);
1215 tcg_gen_or_vec(vece, pd, pn, pm);
1218 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
1220 static const GVecGen4 op = {
1221 .fni8 = gen_sel_pg_i64,
1222 .fniv = gen_sel_pg_vec,
1223 .fno = gen_helper_sve_sel_pppp,
1224 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1226 if (a->s) {
1227 return false;
1228 } else {
1229 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1233 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1235 tcg_gen_or_i64(pd, pn, pm);
1236 tcg_gen_and_i64(pd, pd, pg);
1239 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1240 TCGv_vec pm, TCGv_vec pg)
1242 tcg_gen_or_vec(vece, pd, pn, pm);
1243 tcg_gen_and_vec(vece, pd, pd, pg);
1246 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
1248 static const GVecGen4 op = {
1249 .fni8 = gen_orr_pg_i64,
1250 .fniv = gen_orr_pg_vec,
1251 .fno = gen_helper_sve_orr_pppp,
1252 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1254 if (a->s) {
1255 return do_pppp_flags(s, a, &op);
1256 } else if (a->pg == a->rn && a->rn == a->rm) {
1257 return do_mov_p(s, a->rd, a->rn);
1258 } else {
1259 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1263 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1265 tcg_gen_orc_i64(pd, pn, pm);
1266 tcg_gen_and_i64(pd, pd, pg);
1269 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1270 TCGv_vec pm, TCGv_vec pg)
1272 tcg_gen_orc_vec(vece, pd, pn, pm);
1273 tcg_gen_and_vec(vece, pd, pd, pg);
1276 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
1278 static const GVecGen4 op = {
1279 .fni8 = gen_orn_pg_i64,
1280 .fniv = gen_orn_pg_vec,
1281 .fno = gen_helper_sve_orn_pppp,
1282 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1284 if (a->s) {
1285 return do_pppp_flags(s, a, &op);
1286 } else {
1287 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1291 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1293 tcg_gen_or_i64(pd, pn, pm);
1294 tcg_gen_andc_i64(pd, pg, pd);
1297 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1298 TCGv_vec pm, TCGv_vec pg)
1300 tcg_gen_or_vec(vece, pd, pn, pm);
1301 tcg_gen_andc_vec(vece, pd, pg, pd);
1304 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
1306 static const GVecGen4 op = {
1307 .fni8 = gen_nor_pg_i64,
1308 .fniv = gen_nor_pg_vec,
1309 .fno = gen_helper_sve_nor_pppp,
1310 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1312 if (a->s) {
1313 return do_pppp_flags(s, a, &op);
1314 } else {
1315 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1319 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1321 tcg_gen_and_i64(pd, pn, pm);
1322 tcg_gen_andc_i64(pd, pg, pd);
1325 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1326 TCGv_vec pm, TCGv_vec pg)
1328 tcg_gen_and_vec(vece, pd, pn, pm);
1329 tcg_gen_andc_vec(vece, pd, pg, pd);
1332 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
1334 static const GVecGen4 op = {
1335 .fni8 = gen_nand_pg_i64,
1336 .fniv = gen_nand_pg_vec,
1337 .fno = gen_helper_sve_nand_pppp,
1338 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1340 if (a->s) {
1341 return do_pppp_flags(s, a, &op);
1342 } else {
1343 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1348 *** SVE Predicate Misc Group
1351 static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
1353 if (sve_access_check(s)) {
1354 int nofs = pred_full_reg_offset(s, a->rn);
1355 int gofs = pred_full_reg_offset(s, a->pg);
1356 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1358 if (words == 1) {
1359 TCGv_i64 pn = tcg_temp_new_i64();
1360 TCGv_i64 pg = tcg_temp_new_i64();
1362 tcg_gen_ld_i64(pn, cpu_env, nofs);
1363 tcg_gen_ld_i64(pg, cpu_env, gofs);
1364 do_predtest1(pn, pg);
1366 tcg_temp_free_i64(pn);
1367 tcg_temp_free_i64(pg);
1368 } else {
1369 do_predtest(s, nofs, gofs, words);
1372 return true;
1375 /* See the ARM pseudocode DecodePredCount. */
1376 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1378 unsigned elements = fullsz >> esz;
1379 unsigned bound;
1381 switch (pattern) {
1382 case 0x0: /* POW2 */
1383 return pow2floor(elements);
1384 case 0x1: /* VL1 */
1385 case 0x2: /* VL2 */
1386 case 0x3: /* VL3 */
1387 case 0x4: /* VL4 */
1388 case 0x5: /* VL5 */
1389 case 0x6: /* VL6 */
1390 case 0x7: /* VL7 */
1391 case 0x8: /* VL8 */
1392 bound = pattern;
1393 break;
1394 case 0x9: /* VL16 */
1395 case 0xa: /* VL32 */
1396 case 0xb: /* VL64 */
1397 case 0xc: /* VL128 */
1398 case 0xd: /* VL256 */
1399 bound = 16 << (pattern - 9);
1400 break;
1401 case 0x1d: /* MUL4 */
1402 return elements - elements % 4;
1403 case 0x1e: /* MUL3 */
1404 return elements - elements % 3;
1405 case 0x1f: /* ALL */
1406 return elements;
1407 default: /* #uimm5 */
1408 return 0;
1410 return elements >= bound ? bound : 0;
1413 /* This handles all of the predicate initialization instructions,
1414 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1415 * so that decode_pred_count returns 0. For SETFFR, we will have
1416 * set RD == 16 == FFR.
1418 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1420 if (!sve_access_check(s)) {
1421 return true;
1424 unsigned fullsz = vec_full_reg_size(s);
1425 unsigned ofs = pred_full_reg_offset(s, rd);
1426 unsigned numelem, setsz, i;
1427 uint64_t word, lastword;
1428 TCGv_i64 t;
1430 numelem = decode_pred_count(fullsz, pat, esz);
1432 /* Determine what we must store into each bit, and how many. */
1433 if (numelem == 0) {
1434 lastword = word = 0;
1435 setsz = fullsz;
1436 } else {
1437 setsz = numelem << esz;
1438 lastword = word = pred_esz_masks[esz];
1439 if (setsz % 64) {
1440 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
1444 t = tcg_temp_new_i64();
1445 if (fullsz <= 64) {
1446 tcg_gen_movi_i64(t, lastword);
1447 tcg_gen_st_i64(t, cpu_env, ofs);
1448 goto done;
1451 if (word == lastword) {
1452 unsigned maxsz = size_for_gvec(fullsz / 8);
1453 unsigned oprsz = size_for_gvec(setsz / 8);
1455 if (oprsz * 8 == setsz) {
1456 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
1457 goto done;
1461 setsz /= 8;
1462 fullsz /= 8;
1464 tcg_gen_movi_i64(t, word);
1465 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
1466 tcg_gen_st_i64(t, cpu_env, ofs + i);
1468 if (lastword != word) {
1469 tcg_gen_movi_i64(t, lastword);
1470 tcg_gen_st_i64(t, cpu_env, ofs + i);
1471 i += 8;
1473 if (i < fullsz) {
1474 tcg_gen_movi_i64(t, 0);
1475 for (; i < fullsz; i += 8) {
1476 tcg_gen_st_i64(t, cpu_env, ofs + i);
1480 done:
1481 tcg_temp_free_i64(t);
1483 /* PTRUES */
1484 if (setflag) {
1485 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1486 tcg_gen_movi_i32(cpu_CF, word == 0);
1487 tcg_gen_movi_i32(cpu_VF, 0);
1488 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1490 return true;
1493 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
1495 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1498 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
1500 /* Note pat == 31 is #all, to set all elements. */
1501 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1504 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
1506 /* Note pat == 32 is #unimp, to set no elements. */
1507 return do_predset(s, 0, a->rd, 32, false);
1510 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
1512 /* The path through do_pppp_flags is complicated enough to want to avoid
1513 * duplication. Frob the arguments into the form of a predicated AND.
1515 arg_rprr_s alt_a = {
1516 .rd = a->rd, .pg = a->pg, .s = a->s,
1517 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1519 return trans_AND_pppp(s, &alt_a);
1522 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
1524 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1527 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
1529 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1532 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1533 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1534 TCGv_ptr, TCGv_i32))
1536 if (!sve_access_check(s)) {
1537 return true;
1540 TCGv_ptr t_pd = tcg_temp_new_ptr();
1541 TCGv_ptr t_pg = tcg_temp_new_ptr();
1542 TCGv_i32 t;
1543 unsigned desc;
1545 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1546 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1548 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1549 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1550 t = tcg_const_i32(desc);
1552 gen_fn(t, t_pd, t_pg, t);
1553 tcg_temp_free_ptr(t_pd);
1554 tcg_temp_free_ptr(t_pg);
1556 do_pred_flags(t);
1557 tcg_temp_free_i32(t);
1558 return true;
1561 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
1563 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1566 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
1568 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1572 *** SVE Element Count Group
1575 /* Perform an inline saturating addition of a 32-bit value within
1576 * a 64-bit register. The second operand is known to be positive,
1577 * which halves the comparisions we must perform to bound the result.
1579 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1581 int64_t ibound;
1582 TCGv_i64 bound;
1583 TCGCond cond;
1585 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1586 if (u) {
1587 tcg_gen_ext32u_i64(reg, reg);
1588 } else {
1589 tcg_gen_ext32s_i64(reg, reg);
1591 if (d) {
1592 tcg_gen_sub_i64(reg, reg, val);
1593 ibound = (u ? 0 : INT32_MIN);
1594 cond = TCG_COND_LT;
1595 } else {
1596 tcg_gen_add_i64(reg, reg, val);
1597 ibound = (u ? UINT32_MAX : INT32_MAX);
1598 cond = TCG_COND_GT;
1600 bound = tcg_const_i64(ibound);
1601 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1602 tcg_temp_free_i64(bound);
1605 /* Similarly with 64-bit values. */
1606 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1608 TCGv_i64 t0 = tcg_temp_new_i64();
1609 TCGv_i64 t1 = tcg_temp_new_i64();
1610 TCGv_i64 t2;
1612 if (u) {
1613 if (d) {
1614 tcg_gen_sub_i64(t0, reg, val);
1615 tcg_gen_movi_i64(t1, 0);
1616 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1617 } else {
1618 tcg_gen_add_i64(t0, reg, val);
1619 tcg_gen_movi_i64(t1, -1);
1620 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1622 } else {
1623 if (d) {
1624 /* Detect signed overflow for subtraction. */
1625 tcg_gen_xor_i64(t0, reg, val);
1626 tcg_gen_sub_i64(t1, reg, val);
1627 tcg_gen_xor_i64(reg, reg, t1);
1628 tcg_gen_and_i64(t0, t0, reg);
1630 /* Bound the result. */
1631 tcg_gen_movi_i64(reg, INT64_MIN);
1632 t2 = tcg_const_i64(0);
1633 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1634 } else {
1635 /* Detect signed overflow for addition. */
1636 tcg_gen_xor_i64(t0, reg, val);
1637 tcg_gen_add_i64(reg, reg, val);
1638 tcg_gen_xor_i64(t1, reg, val);
1639 tcg_gen_andc_i64(t0, t1, t0);
1641 /* Bound the result. */
1642 tcg_gen_movi_i64(t1, INT64_MAX);
1643 t2 = tcg_const_i64(0);
1644 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1646 tcg_temp_free_i64(t2);
1648 tcg_temp_free_i64(t0);
1649 tcg_temp_free_i64(t1);
1652 /* Similarly with a vector and a scalar operand. */
1653 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1654 TCGv_i64 val, bool u, bool d)
1656 unsigned vsz = vec_full_reg_size(s);
1657 TCGv_ptr dptr, nptr;
1658 TCGv_i32 t32, desc;
1659 TCGv_i64 t64;
1661 dptr = tcg_temp_new_ptr();
1662 nptr = tcg_temp_new_ptr();
1663 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1664 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1665 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1667 switch (esz) {
1668 case MO_8:
1669 t32 = tcg_temp_new_i32();
1670 tcg_gen_extrl_i64_i32(t32, val);
1671 if (d) {
1672 tcg_gen_neg_i32(t32, t32);
1674 if (u) {
1675 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1676 } else {
1677 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1679 tcg_temp_free_i32(t32);
1680 break;
1682 case MO_16:
1683 t32 = tcg_temp_new_i32();
1684 tcg_gen_extrl_i64_i32(t32, val);
1685 if (d) {
1686 tcg_gen_neg_i32(t32, t32);
1688 if (u) {
1689 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1690 } else {
1691 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1693 tcg_temp_free_i32(t32);
1694 break;
1696 case MO_32:
1697 t64 = tcg_temp_new_i64();
1698 if (d) {
1699 tcg_gen_neg_i64(t64, val);
1700 } else {
1701 tcg_gen_mov_i64(t64, val);
1703 if (u) {
1704 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1705 } else {
1706 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1708 tcg_temp_free_i64(t64);
1709 break;
1711 case MO_64:
1712 if (u) {
1713 if (d) {
1714 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1715 } else {
1716 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1718 } else if (d) {
1719 t64 = tcg_temp_new_i64();
1720 tcg_gen_neg_i64(t64, val);
1721 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1722 tcg_temp_free_i64(t64);
1723 } else {
1724 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1726 break;
1728 default:
1729 g_assert_not_reached();
1732 tcg_temp_free_ptr(dptr);
1733 tcg_temp_free_ptr(nptr);
1734 tcg_temp_free_i32(desc);
1737 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
1739 if (sve_access_check(s)) {
1740 unsigned fullsz = vec_full_reg_size(s);
1741 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1742 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1744 return true;
1747 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
1749 if (sve_access_check(s)) {
1750 unsigned fullsz = vec_full_reg_size(s);
1751 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1752 int inc = numelem * a->imm * (a->d ? -1 : 1);
1753 TCGv_i64 reg = cpu_reg(s, a->rd);
1755 tcg_gen_addi_i64(reg, reg, inc);
1757 return true;
1760 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
1762 if (!sve_access_check(s)) {
1763 return true;
1766 unsigned fullsz = vec_full_reg_size(s);
1767 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1768 int inc = numelem * a->imm;
1769 TCGv_i64 reg = cpu_reg(s, a->rd);
1771 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1772 if (inc == 0) {
1773 if (a->u) {
1774 tcg_gen_ext32u_i64(reg, reg);
1775 } else {
1776 tcg_gen_ext32s_i64(reg, reg);
1778 } else {
1779 TCGv_i64 t = tcg_const_i64(inc);
1780 do_sat_addsub_32(reg, t, a->u, a->d);
1781 tcg_temp_free_i64(t);
1783 return true;
1786 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
1788 if (!sve_access_check(s)) {
1789 return true;
1792 unsigned fullsz = vec_full_reg_size(s);
1793 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1794 int inc = numelem * a->imm;
1795 TCGv_i64 reg = cpu_reg(s, a->rd);
1797 if (inc != 0) {
1798 TCGv_i64 t = tcg_const_i64(inc);
1799 do_sat_addsub_64(reg, t, a->u, a->d);
1800 tcg_temp_free_i64(t);
1802 return true;
1805 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1807 if (a->esz == 0) {
1808 return false;
1811 unsigned fullsz = vec_full_reg_size(s);
1812 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1813 int inc = numelem * a->imm;
1815 if (inc != 0) {
1816 if (sve_access_check(s)) {
1817 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1818 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1819 vec_full_reg_offset(s, a->rn),
1820 t, fullsz, fullsz);
1821 tcg_temp_free_i64(t);
1823 } else {
1824 do_mov_z(s, a->rd, a->rn);
1826 return true;
1829 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1831 if (a->esz == 0) {
1832 return false;
1835 unsigned fullsz = vec_full_reg_size(s);
1836 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1837 int inc = numelem * a->imm;
1839 if (inc != 0) {
1840 if (sve_access_check(s)) {
1841 TCGv_i64 t = tcg_const_i64(inc);
1842 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1843 tcg_temp_free_i64(t);
1845 } else {
1846 do_mov_z(s, a->rd, a->rn);
1848 return true;
1852 *** SVE Bitwise Immediate Group
1855 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1857 uint64_t imm;
1858 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1859 extract32(a->dbm, 0, 6),
1860 extract32(a->dbm, 6, 6))) {
1861 return false;
1863 if (sve_access_check(s)) {
1864 unsigned vsz = vec_full_reg_size(s);
1865 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1866 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1868 return true;
1871 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
1873 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1876 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
1878 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1881 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
1883 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1886 static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
1888 uint64_t imm;
1889 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1890 extract32(a->dbm, 0, 6),
1891 extract32(a->dbm, 6, 6))) {
1892 return false;
1894 if (sve_access_check(s)) {
1895 do_dupi_z(s, a->rd, imm);
1897 return true;
1901 *** SVE Integer Wide Immediate - Predicated Group
1904 /* Implement all merging copies. This is used for CPY (immediate),
1905 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1907 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1908 TCGv_i64 val)
1910 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1911 static gen_cpy * const fns[4] = {
1912 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1913 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1915 unsigned vsz = vec_full_reg_size(s);
1916 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1917 TCGv_ptr t_zd = tcg_temp_new_ptr();
1918 TCGv_ptr t_zn = tcg_temp_new_ptr();
1919 TCGv_ptr t_pg = tcg_temp_new_ptr();
1921 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1922 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1923 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1925 fns[esz](t_zd, t_zn, t_pg, val, desc);
1927 tcg_temp_free_ptr(t_zd);
1928 tcg_temp_free_ptr(t_zn);
1929 tcg_temp_free_ptr(t_pg);
1930 tcg_temp_free_i32(desc);
1933 static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
1935 if (a->esz == 0) {
1936 return false;
1938 if (sve_access_check(s)) {
1939 /* Decode the VFP immediate. */
1940 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1941 TCGv_i64 t_imm = tcg_const_i64(imm);
1942 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1943 tcg_temp_free_i64(t_imm);
1945 return true;
1948 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
1950 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1951 return false;
1953 if (sve_access_check(s)) {
1954 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1955 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1956 tcg_temp_free_i64(t_imm);
1958 return true;
1961 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
1963 static gen_helper_gvec_2i * const fns[4] = {
1964 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1965 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1968 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1969 return false;
1971 if (sve_access_check(s)) {
1972 unsigned vsz = vec_full_reg_size(s);
1973 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1974 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1975 pred_full_reg_offset(s, a->pg),
1976 t_imm, vsz, vsz, 0, fns[a->esz]);
1977 tcg_temp_free_i64(t_imm);
1979 return true;
1983 *** SVE Permute Extract Group
1986 static bool trans_EXT(DisasContext *s, arg_EXT *a)
1988 if (!sve_access_check(s)) {
1989 return true;
1992 unsigned vsz = vec_full_reg_size(s);
1993 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1994 unsigned n_siz = vsz - n_ofs;
1995 unsigned d = vec_full_reg_offset(s, a->rd);
1996 unsigned n = vec_full_reg_offset(s, a->rn);
1997 unsigned m = vec_full_reg_offset(s, a->rm);
1999 /* Use host vector move insns if we have appropriate sizes
2000 * and no unfortunate overlap.
2002 if (m != d
2003 && n_ofs == size_for_gvec(n_ofs)
2004 && n_siz == size_for_gvec(n_siz)
2005 && (d != n || n_siz <= n_ofs)) {
2006 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2007 if (n_ofs != 0) {
2008 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2010 } else {
2011 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2013 return true;
2017 *** SVE Permute - Unpredicated Group
2020 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
2022 if (sve_access_check(s)) {
2023 unsigned vsz = vec_full_reg_size(s);
2024 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2025 vsz, vsz, cpu_reg_sp(s, a->rn));
2027 return true;
2030 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
2032 if ((a->imm & 0x1f) == 0) {
2033 return false;
2035 if (sve_access_check(s)) {
2036 unsigned vsz = vec_full_reg_size(s);
2037 unsigned dofs = vec_full_reg_offset(s, a->rd);
2038 unsigned esz, index;
2040 esz = ctz32(a->imm);
2041 index = a->imm >> (esz + 1);
2043 if ((index << esz) < vsz) {
2044 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2045 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2046 } else {
2048 * While dup_mem handles 128-bit elements, dup_imm does not.
2049 * Thankfully element size doesn't matter for splatting zero.
2051 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
2054 return true;
2057 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2059 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2060 static gen_insr * const fns[4] = {
2061 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2062 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2064 unsigned vsz = vec_full_reg_size(s);
2065 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2066 TCGv_ptr t_zd = tcg_temp_new_ptr();
2067 TCGv_ptr t_zn = tcg_temp_new_ptr();
2069 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2070 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2072 fns[a->esz](t_zd, t_zn, val, desc);
2074 tcg_temp_free_ptr(t_zd);
2075 tcg_temp_free_ptr(t_zn);
2076 tcg_temp_free_i32(desc);
2079 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
2081 if (sve_access_check(s)) {
2082 TCGv_i64 t = tcg_temp_new_i64();
2083 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2084 do_insr_i64(s, a, t);
2085 tcg_temp_free_i64(t);
2087 return true;
2090 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
2092 if (sve_access_check(s)) {
2093 do_insr_i64(s, a, cpu_reg(s, a->rm));
2095 return true;
2098 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
2100 static gen_helper_gvec_2 * const fns[4] = {
2101 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2102 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2105 if (sve_access_check(s)) {
2106 unsigned vsz = vec_full_reg_size(s);
2107 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2108 vec_full_reg_offset(s, a->rn),
2109 vsz, vsz, 0, fns[a->esz]);
2111 return true;
2114 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
2116 static gen_helper_gvec_3 * const fns[4] = {
2117 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2118 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2121 if (sve_access_check(s)) {
2122 unsigned vsz = vec_full_reg_size(s);
2123 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2124 vec_full_reg_offset(s, a->rn),
2125 vec_full_reg_offset(s, a->rm),
2126 vsz, vsz, 0, fns[a->esz]);
2128 return true;
2131 static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
2133 static gen_helper_gvec_2 * const fns[4][2] = {
2134 { NULL, NULL },
2135 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2136 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2137 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2140 if (a->esz == 0) {
2141 return false;
2143 if (sve_access_check(s)) {
2144 unsigned vsz = vec_full_reg_size(s);
2145 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2146 vec_full_reg_offset(s, a->rn)
2147 + (a->h ? vsz / 2 : 0),
2148 vsz, vsz, 0, fns[a->esz][a->u]);
2150 return true;
2154 *** SVE Permute - Predicates Group
2157 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2158 gen_helper_gvec_3 *fn)
2160 if (!sve_access_check(s)) {
2161 return true;
2164 unsigned vsz = pred_full_reg_size(s);
2166 /* Predicate sizes may be smaller and cannot use simd_desc.
2167 We cannot round up, as we do elsewhere, because we need
2168 the exact size for ZIP2 and REV. We retain the style for
2169 the other helpers for consistency. */
2170 TCGv_ptr t_d = tcg_temp_new_ptr();
2171 TCGv_ptr t_n = tcg_temp_new_ptr();
2172 TCGv_ptr t_m = tcg_temp_new_ptr();
2173 TCGv_i32 t_desc;
2174 int desc;
2176 desc = vsz - 2;
2177 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2178 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2180 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2181 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2182 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2183 t_desc = tcg_const_i32(desc);
2185 fn(t_d, t_n, t_m, t_desc);
2187 tcg_temp_free_ptr(t_d);
2188 tcg_temp_free_ptr(t_n);
2189 tcg_temp_free_ptr(t_m);
2190 tcg_temp_free_i32(t_desc);
2191 return true;
2194 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2195 gen_helper_gvec_2 *fn)
2197 if (!sve_access_check(s)) {
2198 return true;
2201 unsigned vsz = pred_full_reg_size(s);
2202 TCGv_ptr t_d = tcg_temp_new_ptr();
2203 TCGv_ptr t_n = tcg_temp_new_ptr();
2204 TCGv_i32 t_desc;
2205 int desc;
2207 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2208 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2210 /* Predicate sizes may be smaller and cannot use simd_desc.
2211 We cannot round up, as we do elsewhere, because we need
2212 the exact size for ZIP2 and REV. We retain the style for
2213 the other helpers for consistency. */
2215 desc = vsz - 2;
2216 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2217 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2218 t_desc = tcg_const_i32(desc);
2220 fn(t_d, t_n, t_desc);
2222 tcg_temp_free_i32(t_desc);
2223 tcg_temp_free_ptr(t_d);
2224 tcg_temp_free_ptr(t_n);
2225 return true;
2228 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
2230 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2233 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
2235 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2238 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
2240 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2243 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
2245 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2248 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
2250 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2253 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
2255 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2258 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
2260 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2263 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
2265 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2268 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
2270 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2274 *** SVE Permute - Interleaving Group
2277 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2279 static gen_helper_gvec_3 * const fns[4] = {
2280 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2281 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2284 if (sve_access_check(s)) {
2285 unsigned vsz = vec_full_reg_size(s);
2286 unsigned high_ofs = high ? vsz / 2 : 0;
2287 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2288 vec_full_reg_offset(s, a->rn) + high_ofs,
2289 vec_full_reg_offset(s, a->rm) + high_ofs,
2290 vsz, vsz, 0, fns[a->esz]);
2292 return true;
2295 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2296 gen_helper_gvec_3 *fn)
2298 if (sve_access_check(s)) {
2299 unsigned vsz = vec_full_reg_size(s);
2300 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2301 vec_full_reg_offset(s, a->rn),
2302 vec_full_reg_offset(s, a->rm),
2303 vsz, vsz, data, fn);
2305 return true;
2308 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
2310 return do_zip(s, a, false);
2313 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
2315 return do_zip(s, a, true);
2318 static gen_helper_gvec_3 * const uzp_fns[4] = {
2319 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2320 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2323 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
2325 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2328 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
2330 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2333 static gen_helper_gvec_3 * const trn_fns[4] = {
2334 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2335 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2338 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
2340 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2343 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
2345 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2349 *** SVE Permute Vector - Predicated Group
2352 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
2354 static gen_helper_gvec_3 * const fns[4] = {
2355 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2357 return do_zpz_ool(s, a, fns[a->esz]);
2360 /* Call the helper that computes the ARM LastActiveElement pseudocode
2361 * function, scaled by the element size. This includes the not found
2362 * indication; e.g. not found for esz=3 is -8.
2364 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2366 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2367 * round up, as we do elsewhere, because we need the exact size.
2369 TCGv_ptr t_p = tcg_temp_new_ptr();
2370 TCGv_i32 t_desc;
2371 unsigned vsz = pred_full_reg_size(s);
2372 unsigned desc;
2374 desc = vsz - 2;
2375 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2377 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2378 t_desc = tcg_const_i32(desc);
2380 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2382 tcg_temp_free_i32(t_desc);
2383 tcg_temp_free_ptr(t_p);
2386 /* Increment LAST to the offset of the next element in the vector,
2387 * wrapping around to 0.
2389 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2391 unsigned vsz = vec_full_reg_size(s);
2393 tcg_gen_addi_i32(last, last, 1 << esz);
2394 if (is_power_of_2(vsz)) {
2395 tcg_gen_andi_i32(last, last, vsz - 1);
2396 } else {
2397 TCGv_i32 max = tcg_const_i32(vsz);
2398 TCGv_i32 zero = tcg_const_i32(0);
2399 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2400 tcg_temp_free_i32(max);
2401 tcg_temp_free_i32(zero);
2405 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2406 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2408 unsigned vsz = vec_full_reg_size(s);
2410 if (is_power_of_2(vsz)) {
2411 tcg_gen_andi_i32(last, last, vsz - 1);
2412 } else {
2413 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2414 TCGv_i32 zero = tcg_const_i32(0);
2415 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2416 tcg_temp_free_i32(max);
2417 tcg_temp_free_i32(zero);
2421 /* Load an unsigned element of ESZ from BASE+OFS. */
2422 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2424 TCGv_i64 r = tcg_temp_new_i64();
2426 switch (esz) {
2427 case 0:
2428 tcg_gen_ld8u_i64(r, base, ofs);
2429 break;
2430 case 1:
2431 tcg_gen_ld16u_i64(r, base, ofs);
2432 break;
2433 case 2:
2434 tcg_gen_ld32u_i64(r, base, ofs);
2435 break;
2436 case 3:
2437 tcg_gen_ld_i64(r, base, ofs);
2438 break;
2439 default:
2440 g_assert_not_reached();
2442 return r;
2445 /* Load an unsigned element of ESZ from RM[LAST]. */
2446 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2447 int rm, int esz)
2449 TCGv_ptr p = tcg_temp_new_ptr();
2450 TCGv_i64 r;
2452 /* Convert offset into vector into offset into ENV.
2453 * The final adjustment for the vector register base
2454 * is added via constant offset to the load.
2456 #ifdef HOST_WORDS_BIGENDIAN
2457 /* Adjust for element ordering. See vec_reg_offset. */
2458 if (esz < 3) {
2459 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2461 #endif
2462 tcg_gen_ext_i32_ptr(p, last);
2463 tcg_gen_add_ptr(p, p, cpu_env);
2465 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2466 tcg_temp_free_ptr(p);
2468 return r;
2471 /* Compute CLAST for a Zreg. */
2472 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2474 TCGv_i32 last;
2475 TCGLabel *over;
2476 TCGv_i64 ele;
2477 unsigned vsz, esz = a->esz;
2479 if (!sve_access_check(s)) {
2480 return true;
2483 last = tcg_temp_local_new_i32();
2484 over = gen_new_label();
2486 find_last_active(s, last, esz, a->pg);
2488 /* There is of course no movcond for a 2048-bit vector,
2489 * so we must branch over the actual store.
2491 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2493 if (!before) {
2494 incr_last_active(s, last, esz);
2497 ele = load_last_active(s, last, a->rm, esz);
2498 tcg_temp_free_i32(last);
2500 vsz = vec_full_reg_size(s);
2501 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2502 tcg_temp_free_i64(ele);
2504 /* If this insn used MOVPRFX, we may need a second move. */
2505 if (a->rd != a->rn) {
2506 TCGLabel *done = gen_new_label();
2507 tcg_gen_br(done);
2509 gen_set_label(over);
2510 do_mov_z(s, a->rd, a->rn);
2512 gen_set_label(done);
2513 } else {
2514 gen_set_label(over);
2516 return true;
2519 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
2521 return do_clast_vector(s, a, false);
2524 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
2526 return do_clast_vector(s, a, true);
2529 /* Compute CLAST for a scalar. */
2530 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2531 bool before, TCGv_i64 reg_val)
2533 TCGv_i32 last = tcg_temp_new_i32();
2534 TCGv_i64 ele, cmp, zero;
2536 find_last_active(s, last, esz, pg);
2538 /* Extend the original value of last prior to incrementing. */
2539 cmp = tcg_temp_new_i64();
2540 tcg_gen_ext_i32_i64(cmp, last);
2542 if (!before) {
2543 incr_last_active(s, last, esz);
2546 /* The conceit here is that while last < 0 indicates not found, after
2547 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2548 * from which we can load garbage. We then discard the garbage with
2549 * a conditional move.
2551 ele = load_last_active(s, last, rm, esz);
2552 tcg_temp_free_i32(last);
2554 zero = tcg_const_i64(0);
2555 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2557 tcg_temp_free_i64(zero);
2558 tcg_temp_free_i64(cmp);
2559 tcg_temp_free_i64(ele);
2562 /* Compute CLAST for a Vreg. */
2563 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2565 if (sve_access_check(s)) {
2566 int esz = a->esz;
2567 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2568 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2570 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2571 write_fp_dreg(s, a->rd, reg);
2572 tcg_temp_free_i64(reg);
2574 return true;
2577 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
2579 return do_clast_fp(s, a, false);
2582 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
2584 return do_clast_fp(s, a, true);
2587 /* Compute CLAST for a Xreg. */
2588 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2590 TCGv_i64 reg;
2592 if (!sve_access_check(s)) {
2593 return true;
2596 reg = cpu_reg(s, a->rd);
2597 switch (a->esz) {
2598 case 0:
2599 tcg_gen_ext8u_i64(reg, reg);
2600 break;
2601 case 1:
2602 tcg_gen_ext16u_i64(reg, reg);
2603 break;
2604 case 2:
2605 tcg_gen_ext32u_i64(reg, reg);
2606 break;
2607 case 3:
2608 break;
2609 default:
2610 g_assert_not_reached();
2613 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2614 return true;
2617 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
2619 return do_clast_general(s, a, false);
2622 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
2624 return do_clast_general(s, a, true);
2627 /* Compute LAST for a scalar. */
2628 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2629 int pg, int rm, bool before)
2631 TCGv_i32 last = tcg_temp_new_i32();
2632 TCGv_i64 ret;
2634 find_last_active(s, last, esz, pg);
2635 if (before) {
2636 wrap_last_active(s, last, esz);
2637 } else {
2638 incr_last_active(s, last, esz);
2641 ret = load_last_active(s, last, rm, esz);
2642 tcg_temp_free_i32(last);
2643 return ret;
2646 /* Compute LAST for a Vreg. */
2647 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2649 if (sve_access_check(s)) {
2650 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2651 write_fp_dreg(s, a->rd, val);
2652 tcg_temp_free_i64(val);
2654 return true;
2657 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
2659 return do_last_fp(s, a, false);
2662 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
2664 return do_last_fp(s, a, true);
2667 /* Compute LAST for a Xreg. */
2668 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2670 if (sve_access_check(s)) {
2671 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2672 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2673 tcg_temp_free_i64(val);
2675 return true;
2678 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
2680 return do_last_general(s, a, false);
2683 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
2685 return do_last_general(s, a, true);
2688 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
2690 if (sve_access_check(s)) {
2691 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2693 return true;
2696 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
2698 if (sve_access_check(s)) {
2699 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2700 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2701 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2702 tcg_temp_free_i64(t);
2704 return true;
2707 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
2709 static gen_helper_gvec_3 * const fns[4] = {
2710 NULL,
2711 gen_helper_sve_revb_h,
2712 gen_helper_sve_revb_s,
2713 gen_helper_sve_revb_d,
2715 return do_zpz_ool(s, a, fns[a->esz]);
2718 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
2720 static gen_helper_gvec_3 * const fns[4] = {
2721 NULL,
2722 NULL,
2723 gen_helper_sve_revh_s,
2724 gen_helper_sve_revh_d,
2726 return do_zpz_ool(s, a, fns[a->esz]);
2729 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
2731 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2734 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
2736 static gen_helper_gvec_3 * const fns[4] = {
2737 gen_helper_sve_rbit_b,
2738 gen_helper_sve_rbit_h,
2739 gen_helper_sve_rbit_s,
2740 gen_helper_sve_rbit_d,
2742 return do_zpz_ool(s, a, fns[a->esz]);
2745 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
2747 if (sve_access_check(s)) {
2748 unsigned vsz = vec_full_reg_size(s);
2749 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2750 vec_full_reg_offset(s, a->rn),
2751 vec_full_reg_offset(s, a->rm),
2752 pred_full_reg_offset(s, a->pg),
2753 vsz, vsz, a->esz, gen_helper_sve_splice);
2755 return true;
2759 *** SVE Integer Compare - Vectors Group
2762 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2763 gen_helper_gvec_flags_4 *gen_fn)
2765 TCGv_ptr pd, zn, zm, pg;
2766 unsigned vsz;
2767 TCGv_i32 t;
2769 if (gen_fn == NULL) {
2770 return false;
2772 if (!sve_access_check(s)) {
2773 return true;
2776 vsz = vec_full_reg_size(s);
2777 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2778 pd = tcg_temp_new_ptr();
2779 zn = tcg_temp_new_ptr();
2780 zm = tcg_temp_new_ptr();
2781 pg = tcg_temp_new_ptr();
2783 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2784 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2785 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2786 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2788 gen_fn(t, pd, zn, zm, pg, t);
2790 tcg_temp_free_ptr(pd);
2791 tcg_temp_free_ptr(zn);
2792 tcg_temp_free_ptr(zm);
2793 tcg_temp_free_ptr(pg);
2795 do_pred_flags(t);
2797 tcg_temp_free_i32(t);
2798 return true;
2801 #define DO_PPZZ(NAME, name) \
2802 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
2804 static gen_helper_gvec_flags_4 * const fns[4] = { \
2805 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2806 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2807 }; \
2808 return do_ppzz_flags(s, a, fns[a->esz]); \
2811 DO_PPZZ(CMPEQ, cmpeq)
2812 DO_PPZZ(CMPNE, cmpne)
2813 DO_PPZZ(CMPGT, cmpgt)
2814 DO_PPZZ(CMPGE, cmpge)
2815 DO_PPZZ(CMPHI, cmphi)
2816 DO_PPZZ(CMPHS, cmphs)
2818 #undef DO_PPZZ
2820 #define DO_PPZW(NAME, name) \
2821 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \
2823 static gen_helper_gvec_flags_4 * const fns[4] = { \
2824 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2825 gen_helper_sve_##name##_ppzw_s, NULL \
2826 }; \
2827 return do_ppzz_flags(s, a, fns[a->esz]); \
2830 DO_PPZW(CMPEQ, cmpeq)
2831 DO_PPZW(CMPNE, cmpne)
2832 DO_PPZW(CMPGT, cmpgt)
2833 DO_PPZW(CMPGE, cmpge)
2834 DO_PPZW(CMPHI, cmphi)
2835 DO_PPZW(CMPHS, cmphs)
2836 DO_PPZW(CMPLT, cmplt)
2837 DO_PPZW(CMPLE, cmple)
2838 DO_PPZW(CMPLO, cmplo)
2839 DO_PPZW(CMPLS, cmpls)
2841 #undef DO_PPZW
2844 *** SVE Integer Compare - Immediate Groups
2847 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2848 gen_helper_gvec_flags_3 *gen_fn)
2850 TCGv_ptr pd, zn, pg;
2851 unsigned vsz;
2852 TCGv_i32 t;
2854 if (gen_fn == NULL) {
2855 return false;
2857 if (!sve_access_check(s)) {
2858 return true;
2861 vsz = vec_full_reg_size(s);
2862 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2863 pd = tcg_temp_new_ptr();
2864 zn = tcg_temp_new_ptr();
2865 pg = tcg_temp_new_ptr();
2867 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2868 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2869 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2871 gen_fn(t, pd, zn, pg, t);
2873 tcg_temp_free_ptr(pd);
2874 tcg_temp_free_ptr(zn);
2875 tcg_temp_free_ptr(pg);
2877 do_pred_flags(t);
2879 tcg_temp_free_i32(t);
2880 return true;
2883 #define DO_PPZI(NAME, name) \
2884 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \
2886 static gen_helper_gvec_flags_3 * const fns[4] = { \
2887 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2888 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2889 }; \
2890 return do_ppzi_flags(s, a, fns[a->esz]); \
2893 DO_PPZI(CMPEQ, cmpeq)
2894 DO_PPZI(CMPNE, cmpne)
2895 DO_PPZI(CMPGT, cmpgt)
2896 DO_PPZI(CMPGE, cmpge)
2897 DO_PPZI(CMPHI, cmphi)
2898 DO_PPZI(CMPHS, cmphs)
2899 DO_PPZI(CMPLT, cmplt)
2900 DO_PPZI(CMPLE, cmple)
2901 DO_PPZI(CMPLO, cmplo)
2902 DO_PPZI(CMPLS, cmpls)
2904 #undef DO_PPZI
2907 *** SVE Partition Break Group
2910 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2911 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2913 if (!sve_access_check(s)) {
2914 return true;
2917 unsigned vsz = pred_full_reg_size(s);
2919 /* Predicate sizes may be smaller and cannot use simd_desc. */
2920 TCGv_ptr d = tcg_temp_new_ptr();
2921 TCGv_ptr n = tcg_temp_new_ptr();
2922 TCGv_ptr m = tcg_temp_new_ptr();
2923 TCGv_ptr g = tcg_temp_new_ptr();
2924 TCGv_i32 t = tcg_const_i32(vsz - 2);
2926 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2927 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2928 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2929 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2931 if (a->s) {
2932 fn_s(t, d, n, m, g, t);
2933 do_pred_flags(t);
2934 } else {
2935 fn(d, n, m, g, t);
2937 tcg_temp_free_ptr(d);
2938 tcg_temp_free_ptr(n);
2939 tcg_temp_free_ptr(m);
2940 tcg_temp_free_ptr(g);
2941 tcg_temp_free_i32(t);
2942 return true;
2945 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2946 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2948 if (!sve_access_check(s)) {
2949 return true;
2952 unsigned vsz = pred_full_reg_size(s);
2954 /* Predicate sizes may be smaller and cannot use simd_desc. */
2955 TCGv_ptr d = tcg_temp_new_ptr();
2956 TCGv_ptr n = tcg_temp_new_ptr();
2957 TCGv_ptr g = tcg_temp_new_ptr();
2958 TCGv_i32 t = tcg_const_i32(vsz - 2);
2960 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2961 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2962 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2964 if (a->s) {
2965 fn_s(t, d, n, g, t);
2966 do_pred_flags(t);
2967 } else {
2968 fn(d, n, g, t);
2970 tcg_temp_free_ptr(d);
2971 tcg_temp_free_ptr(n);
2972 tcg_temp_free_ptr(g);
2973 tcg_temp_free_i32(t);
2974 return true;
2977 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
2979 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2982 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
2984 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2987 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
2989 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2992 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
2994 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2997 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
2999 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
3002 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
3004 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3007 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
3009 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3013 *** SVE Predicate Count Group
3016 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3018 unsigned psz = pred_full_reg_size(s);
3020 if (psz <= 8) {
3021 uint64_t psz_mask;
3023 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3024 if (pn != pg) {
3025 TCGv_i64 g = tcg_temp_new_i64();
3026 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3027 tcg_gen_and_i64(val, val, g);
3028 tcg_temp_free_i64(g);
3031 /* Reduce the pred_esz_masks value simply to reduce the
3032 * size of the code generated here.
3034 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3035 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3037 tcg_gen_ctpop_i64(val, val);
3038 } else {
3039 TCGv_ptr t_pn = tcg_temp_new_ptr();
3040 TCGv_ptr t_pg = tcg_temp_new_ptr();
3041 unsigned desc;
3042 TCGv_i32 t_desc;
3044 desc = psz - 2;
3045 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3047 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3048 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3049 t_desc = tcg_const_i32(desc);
3051 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3052 tcg_temp_free_ptr(t_pn);
3053 tcg_temp_free_ptr(t_pg);
3054 tcg_temp_free_i32(t_desc);
3058 static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
3060 if (sve_access_check(s)) {
3061 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3063 return true;
3066 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
3068 if (sve_access_check(s)) {
3069 TCGv_i64 reg = cpu_reg(s, a->rd);
3070 TCGv_i64 val = tcg_temp_new_i64();
3072 do_cntp(s, val, a->esz, a->pg, a->pg);
3073 if (a->d) {
3074 tcg_gen_sub_i64(reg, reg, val);
3075 } else {
3076 tcg_gen_add_i64(reg, reg, val);
3078 tcg_temp_free_i64(val);
3080 return true;
3083 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3085 if (a->esz == 0) {
3086 return false;
3088 if (sve_access_check(s)) {
3089 unsigned vsz = vec_full_reg_size(s);
3090 TCGv_i64 val = tcg_temp_new_i64();
3091 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3093 do_cntp(s, val, a->esz, a->pg, a->pg);
3094 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3095 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3097 return true;
3100 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
3102 if (sve_access_check(s)) {
3103 TCGv_i64 reg = cpu_reg(s, a->rd);
3104 TCGv_i64 val = tcg_temp_new_i64();
3106 do_cntp(s, val, a->esz, a->pg, a->pg);
3107 do_sat_addsub_32(reg, val, a->u, a->d);
3109 return true;
3112 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
3114 if (sve_access_check(s)) {
3115 TCGv_i64 reg = cpu_reg(s, a->rd);
3116 TCGv_i64 val = tcg_temp_new_i64();
3118 do_cntp(s, val, a->esz, a->pg, a->pg);
3119 do_sat_addsub_64(reg, val, a->u, a->d);
3121 return true;
3124 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3126 if (a->esz == 0) {
3127 return false;
3129 if (sve_access_check(s)) {
3130 TCGv_i64 val = tcg_temp_new_i64();
3131 do_cntp(s, val, a->esz, a->pg, a->pg);
3132 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3134 return true;
3138 *** SVE Integer Compare Scalars Group
3141 static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
3143 if (!sve_access_check(s)) {
3144 return true;
3147 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3148 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3149 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3150 TCGv_i64 cmp = tcg_temp_new_i64();
3152 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3153 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3154 tcg_temp_free_i64(cmp);
3156 /* VF = !NF & !CF. */
3157 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3158 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3160 /* Both NF and VF actually look at bit 31. */
3161 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3162 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3163 return true;
3166 static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
3168 TCGv_i64 op0, op1, t0, t1, tmax;
3169 TCGv_i32 t2, t3;
3170 TCGv_ptr ptr;
3171 unsigned desc, vsz = vec_full_reg_size(s);
3172 TCGCond cond;
3174 if (!sve_access_check(s)) {
3175 return true;
3178 op0 = read_cpu_reg(s, a->rn, 1);
3179 op1 = read_cpu_reg(s, a->rm, 1);
3181 if (!a->sf) {
3182 if (a->u) {
3183 tcg_gen_ext32u_i64(op0, op0);
3184 tcg_gen_ext32u_i64(op1, op1);
3185 } else {
3186 tcg_gen_ext32s_i64(op0, op0);
3187 tcg_gen_ext32s_i64(op1, op1);
3191 /* For the helper, compress the different conditions into a computation
3192 * of how many iterations for which the condition is true.
3194 t0 = tcg_temp_new_i64();
3195 t1 = tcg_temp_new_i64();
3196 tcg_gen_sub_i64(t0, op1, op0);
3198 tmax = tcg_const_i64(vsz >> a->esz);
3199 if (a->eq) {
3200 /* Equality means one more iteration. */
3201 tcg_gen_addi_i64(t0, t0, 1);
3203 /* If op1 is max (un)signed integer (and the only time the addition
3204 * above could overflow), then we produce an all-true predicate by
3205 * setting the count to the vector length. This is because the
3206 * pseudocode is described as an increment + compare loop, and the
3207 * max integer would always compare true.
3209 tcg_gen_movi_i64(t1, (a->sf
3210 ? (a->u ? UINT64_MAX : INT64_MAX)
3211 : (a->u ? UINT32_MAX : INT32_MAX)));
3212 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
3215 /* Bound to the maximum. */
3216 tcg_gen_umin_i64(t0, t0, tmax);
3217 tcg_temp_free_i64(tmax);
3219 /* Set the count to zero if the condition is false. */
3220 cond = (a->u
3221 ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3222 : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3223 tcg_gen_movi_i64(t1, 0);
3224 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3225 tcg_temp_free_i64(t1);
3227 /* Since we're bounded, pass as a 32-bit type. */
3228 t2 = tcg_temp_new_i32();
3229 tcg_gen_extrl_i64_i32(t2, t0);
3230 tcg_temp_free_i64(t0);
3232 /* Scale elements to bits. */
3233 tcg_gen_shli_i32(t2, t2, a->esz);
3235 desc = (vsz / 8) - 2;
3236 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3237 t3 = tcg_const_i32(desc);
3239 ptr = tcg_temp_new_ptr();
3240 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3242 gen_helper_sve_while(t2, ptr, t2, t3);
3243 do_pred_flags(t2);
3245 tcg_temp_free_ptr(ptr);
3246 tcg_temp_free_i32(t2);
3247 tcg_temp_free_i32(t3);
3248 return true;
3252 *** SVE Integer Wide Immediate - Unpredicated Group
3255 static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
3257 if (a->esz == 0) {
3258 return false;
3260 if (sve_access_check(s)) {
3261 unsigned vsz = vec_full_reg_size(s);
3262 int dofs = vec_full_reg_offset(s, a->rd);
3263 uint64_t imm;
3265 /* Decode the VFP immediate. */
3266 imm = vfp_expand_imm(a->esz, a->imm);
3267 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
3269 return true;
3272 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
3274 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3275 return false;
3277 if (sve_access_check(s)) {
3278 unsigned vsz = vec_full_reg_size(s);
3279 int dofs = vec_full_reg_offset(s, a->rd);
3281 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
3283 return true;
3286 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
3288 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3289 return false;
3291 if (sve_access_check(s)) {
3292 unsigned vsz = vec_full_reg_size(s);
3293 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3294 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3296 return true;
3299 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
3301 a->imm = -a->imm;
3302 return trans_ADD_zzi(s, a);
3305 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
3307 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
3308 static const GVecGen2s op[4] = {
3309 { .fni8 = tcg_gen_vec_sub8_i64,
3310 .fniv = tcg_gen_sub_vec,
3311 .fno = gen_helper_sve_subri_b,
3312 .opt_opc = vecop_list,
3313 .vece = MO_8,
3314 .scalar_first = true },
3315 { .fni8 = tcg_gen_vec_sub16_i64,
3316 .fniv = tcg_gen_sub_vec,
3317 .fno = gen_helper_sve_subri_h,
3318 .opt_opc = vecop_list,
3319 .vece = MO_16,
3320 .scalar_first = true },
3321 { .fni4 = tcg_gen_sub_i32,
3322 .fniv = tcg_gen_sub_vec,
3323 .fno = gen_helper_sve_subri_s,
3324 .opt_opc = vecop_list,
3325 .vece = MO_32,
3326 .scalar_first = true },
3327 { .fni8 = tcg_gen_sub_i64,
3328 .fniv = tcg_gen_sub_vec,
3329 .fno = gen_helper_sve_subri_d,
3330 .opt_opc = vecop_list,
3331 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3332 .vece = MO_64,
3333 .scalar_first = true }
3336 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3337 return false;
3339 if (sve_access_check(s)) {
3340 unsigned vsz = vec_full_reg_size(s);
3341 TCGv_i64 c = tcg_const_i64(a->imm);
3342 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3343 vec_full_reg_offset(s, a->rn),
3344 vsz, vsz, c, &op[a->esz]);
3345 tcg_temp_free_i64(c);
3347 return true;
3350 static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
3352 if (sve_access_check(s)) {
3353 unsigned vsz = vec_full_reg_size(s);
3354 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3355 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3357 return true;
3360 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
3362 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3363 return false;
3365 if (sve_access_check(s)) {
3366 TCGv_i64 val = tcg_const_i64(a->imm);
3367 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3368 tcg_temp_free_i64(val);
3370 return true;
3373 static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
3375 return do_zzi_sat(s, a, false, false);
3378 static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
3380 return do_zzi_sat(s, a, true, false);
3383 static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3385 return do_zzi_sat(s, a, false, true);
3388 static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3390 return do_zzi_sat(s, a, true, true);
3393 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3395 if (sve_access_check(s)) {
3396 unsigned vsz = vec_full_reg_size(s);
3397 TCGv_i64 c = tcg_const_i64(a->imm);
3399 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3400 vec_full_reg_offset(s, a->rn),
3401 c, vsz, vsz, 0, fn);
3402 tcg_temp_free_i64(c);
3404 return true;
3407 #define DO_ZZI(NAME, name) \
3408 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a) \
3410 static gen_helper_gvec_2i * const fns[4] = { \
3411 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3412 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3413 }; \
3414 return do_zzi_ool(s, a, fns[a->esz]); \
3417 DO_ZZI(SMAX, smax)
3418 DO_ZZI(UMAX, umax)
3419 DO_ZZI(SMIN, smin)
3420 DO_ZZI(UMIN, umin)
3422 #undef DO_ZZI
3424 static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a)
3426 static gen_helper_gvec_3 * const fns[2][2] = {
3427 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3428 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3431 if (sve_access_check(s)) {
3432 unsigned vsz = vec_full_reg_size(s);
3433 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3434 vec_full_reg_offset(s, a->rn),
3435 vec_full_reg_offset(s, a->rm),
3436 vsz, vsz, 0, fns[a->u][a->sz]);
3438 return true;
3441 static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a)
3443 static gen_helper_gvec_3 * const fns[2][2] = {
3444 { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3445 { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3448 if (sve_access_check(s)) {
3449 unsigned vsz = vec_full_reg_size(s);
3450 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3451 vec_full_reg_offset(s, a->rn),
3452 vec_full_reg_offset(s, a->rm),
3453 vsz, vsz, a->index, fns[a->u][a->sz]);
3455 return true;
3460 *** SVE Floating Point Multiply-Add Indexed Group
3463 static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
3465 static gen_helper_gvec_4_ptr * const fns[3] = {
3466 gen_helper_gvec_fmla_idx_h,
3467 gen_helper_gvec_fmla_idx_s,
3468 gen_helper_gvec_fmla_idx_d,
3471 if (sve_access_check(s)) {
3472 unsigned vsz = vec_full_reg_size(s);
3473 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3474 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3475 vec_full_reg_offset(s, a->rn),
3476 vec_full_reg_offset(s, a->rm),
3477 vec_full_reg_offset(s, a->ra),
3478 status, vsz, vsz, (a->index << 1) | a->sub,
3479 fns[a->esz - 1]);
3480 tcg_temp_free_ptr(status);
3482 return true;
3486 *** SVE Floating Point Multiply Indexed Group
3489 static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
3491 static gen_helper_gvec_3_ptr * const fns[3] = {
3492 gen_helper_gvec_fmul_idx_h,
3493 gen_helper_gvec_fmul_idx_s,
3494 gen_helper_gvec_fmul_idx_d,
3497 if (sve_access_check(s)) {
3498 unsigned vsz = vec_full_reg_size(s);
3499 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3500 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3501 vec_full_reg_offset(s, a->rn),
3502 vec_full_reg_offset(s, a->rm),
3503 status, vsz, vsz, a->index, fns[a->esz - 1]);
3504 tcg_temp_free_ptr(status);
3506 return true;
3510 *** SVE Floating Point Fast Reduction Group
3513 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3514 TCGv_ptr, TCGv_i32);
3516 static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3517 gen_helper_fp_reduce *fn)
3519 unsigned vsz = vec_full_reg_size(s);
3520 unsigned p2vsz = pow2ceil(vsz);
3521 TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
3522 TCGv_ptr t_zn, t_pg, status;
3523 TCGv_i64 temp;
3525 temp = tcg_temp_new_i64();
3526 t_zn = tcg_temp_new_ptr();
3527 t_pg = tcg_temp_new_ptr();
3529 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3530 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3531 status = get_fpstatus_ptr(a->esz == MO_16);
3533 fn(temp, t_zn, t_pg, status, t_desc);
3534 tcg_temp_free_ptr(t_zn);
3535 tcg_temp_free_ptr(t_pg);
3536 tcg_temp_free_ptr(status);
3537 tcg_temp_free_i32(t_desc);
3539 write_fp_dreg(s, a->rd, temp);
3540 tcg_temp_free_i64(temp);
3543 #define DO_VPZ(NAME, name) \
3544 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
3546 static gen_helper_fp_reduce * const fns[3] = { \
3547 gen_helper_sve_##name##_h, \
3548 gen_helper_sve_##name##_s, \
3549 gen_helper_sve_##name##_d, \
3550 }; \
3551 if (a->esz == 0) { \
3552 return false; \
3554 if (sve_access_check(s)) { \
3555 do_reduce(s, a, fns[a->esz - 1]); \
3557 return true; \
3560 DO_VPZ(FADDV, faddv)
3561 DO_VPZ(FMINNMV, fminnmv)
3562 DO_VPZ(FMAXNMV, fmaxnmv)
3563 DO_VPZ(FMINV, fminv)
3564 DO_VPZ(FMAXV, fmaxv)
3567 *** SVE Floating Point Unary Operations - Unpredicated Group
3570 static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3572 unsigned vsz = vec_full_reg_size(s);
3573 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3575 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3576 vec_full_reg_offset(s, a->rn),
3577 status, vsz, vsz, 0, fn);
3578 tcg_temp_free_ptr(status);
3581 static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3583 static gen_helper_gvec_2_ptr * const fns[3] = {
3584 gen_helper_gvec_frecpe_h,
3585 gen_helper_gvec_frecpe_s,
3586 gen_helper_gvec_frecpe_d,
3588 if (a->esz == 0) {
3589 return false;
3591 if (sve_access_check(s)) {
3592 do_zz_fp(s, a, fns[a->esz - 1]);
3594 return true;
3597 static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3599 static gen_helper_gvec_2_ptr * const fns[3] = {
3600 gen_helper_gvec_frsqrte_h,
3601 gen_helper_gvec_frsqrte_s,
3602 gen_helper_gvec_frsqrte_d,
3604 if (a->esz == 0) {
3605 return false;
3607 if (sve_access_check(s)) {
3608 do_zz_fp(s, a, fns[a->esz - 1]);
3610 return true;
3614 *** SVE Floating Point Compare with Zero Group
3617 static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3618 gen_helper_gvec_3_ptr *fn)
3620 unsigned vsz = vec_full_reg_size(s);
3621 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3623 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3624 vec_full_reg_offset(s, a->rn),
3625 pred_full_reg_offset(s, a->pg),
3626 status, vsz, vsz, 0, fn);
3627 tcg_temp_free_ptr(status);
3630 #define DO_PPZ(NAME, name) \
3631 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
3633 static gen_helper_gvec_3_ptr * const fns[3] = { \
3634 gen_helper_sve_##name##_h, \
3635 gen_helper_sve_##name##_s, \
3636 gen_helper_sve_##name##_d, \
3637 }; \
3638 if (a->esz == 0) { \
3639 return false; \
3641 if (sve_access_check(s)) { \
3642 do_ppz_fp(s, a, fns[a->esz - 1]); \
3644 return true; \
3647 DO_PPZ(FCMGE_ppz0, fcmge0)
3648 DO_PPZ(FCMGT_ppz0, fcmgt0)
3649 DO_PPZ(FCMLE_ppz0, fcmle0)
3650 DO_PPZ(FCMLT_ppz0, fcmlt0)
3651 DO_PPZ(FCMEQ_ppz0, fcmeq0)
3652 DO_PPZ(FCMNE_ppz0, fcmne0)
3654 #undef DO_PPZ
3657 *** SVE floating-point trig multiply-add coefficient
3660 static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
3662 static gen_helper_gvec_3_ptr * const fns[3] = {
3663 gen_helper_sve_ftmad_h,
3664 gen_helper_sve_ftmad_s,
3665 gen_helper_sve_ftmad_d,
3668 if (a->esz == 0) {
3669 return false;
3671 if (sve_access_check(s)) {
3672 unsigned vsz = vec_full_reg_size(s);
3673 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3674 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3675 vec_full_reg_offset(s, a->rn),
3676 vec_full_reg_offset(s, a->rm),
3677 status, vsz, vsz, a->imm, fns[a->esz - 1]);
3678 tcg_temp_free_ptr(status);
3680 return true;
3684 *** SVE Floating Point Accumulating Reduction Group
3687 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
3689 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3690 TCGv_ptr, TCGv_ptr, TCGv_i32);
3691 static fadda_fn * const fns[3] = {
3692 gen_helper_sve_fadda_h,
3693 gen_helper_sve_fadda_s,
3694 gen_helper_sve_fadda_d,
3696 unsigned vsz = vec_full_reg_size(s);
3697 TCGv_ptr t_rm, t_pg, t_fpst;
3698 TCGv_i64 t_val;
3699 TCGv_i32 t_desc;
3701 if (a->esz == 0) {
3702 return false;
3704 if (!sve_access_check(s)) {
3705 return true;
3708 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3709 t_rm = tcg_temp_new_ptr();
3710 t_pg = tcg_temp_new_ptr();
3711 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3712 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3713 t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3714 t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3716 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3718 tcg_temp_free_i32(t_desc);
3719 tcg_temp_free_ptr(t_fpst);
3720 tcg_temp_free_ptr(t_pg);
3721 tcg_temp_free_ptr(t_rm);
3723 write_fp_dreg(s, a->rd, t_val);
3724 tcg_temp_free_i64(t_val);
3725 return true;
3729 *** SVE Floating Point Arithmetic - Unpredicated Group
3732 static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3733 gen_helper_gvec_3_ptr *fn)
3735 if (fn == NULL) {
3736 return false;
3738 if (sve_access_check(s)) {
3739 unsigned vsz = vec_full_reg_size(s);
3740 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3741 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3742 vec_full_reg_offset(s, a->rn),
3743 vec_full_reg_offset(s, a->rm),
3744 status, vsz, vsz, 0, fn);
3745 tcg_temp_free_ptr(status);
3747 return true;
3751 #define DO_FP3(NAME, name) \
3752 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
3754 static gen_helper_gvec_3_ptr * const fns[4] = { \
3755 NULL, gen_helper_gvec_##name##_h, \
3756 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3757 }; \
3758 return do_zzz_fp(s, a, fns[a->esz]); \
3761 DO_FP3(FADD_zzz, fadd)
3762 DO_FP3(FSUB_zzz, fsub)
3763 DO_FP3(FMUL_zzz, fmul)
3764 DO_FP3(FTSMUL, ftsmul)
3765 DO_FP3(FRECPS, recps)
3766 DO_FP3(FRSQRTS, rsqrts)
3768 #undef DO_FP3
3771 *** SVE Floating Point Arithmetic - Predicated Group
3774 static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3775 gen_helper_gvec_4_ptr *fn)
3777 if (fn == NULL) {
3778 return false;
3780 if (sve_access_check(s)) {
3781 unsigned vsz = vec_full_reg_size(s);
3782 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3783 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3784 vec_full_reg_offset(s, a->rn),
3785 vec_full_reg_offset(s, a->rm),
3786 pred_full_reg_offset(s, a->pg),
3787 status, vsz, vsz, 0, fn);
3788 tcg_temp_free_ptr(status);
3790 return true;
3793 #define DO_FP3(NAME, name) \
3794 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
3796 static gen_helper_gvec_4_ptr * const fns[4] = { \
3797 NULL, gen_helper_sve_##name##_h, \
3798 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3799 }; \
3800 return do_zpzz_fp(s, a, fns[a->esz]); \
3803 DO_FP3(FADD_zpzz, fadd)
3804 DO_FP3(FSUB_zpzz, fsub)
3805 DO_FP3(FMUL_zpzz, fmul)
3806 DO_FP3(FMIN_zpzz, fmin)
3807 DO_FP3(FMAX_zpzz, fmax)
3808 DO_FP3(FMINNM_zpzz, fminnum)
3809 DO_FP3(FMAXNM_zpzz, fmaxnum)
3810 DO_FP3(FABD, fabd)
3811 DO_FP3(FSCALE, fscalbn)
3812 DO_FP3(FDIV, fdiv)
3813 DO_FP3(FMULX, fmulx)
3815 #undef DO_FP3
3817 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3818 TCGv_i64, TCGv_ptr, TCGv_i32);
3820 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3821 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3823 unsigned vsz = vec_full_reg_size(s);
3824 TCGv_ptr t_zd, t_zn, t_pg, status;
3825 TCGv_i32 desc;
3827 t_zd = tcg_temp_new_ptr();
3828 t_zn = tcg_temp_new_ptr();
3829 t_pg = tcg_temp_new_ptr();
3830 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3831 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3832 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3834 status = get_fpstatus_ptr(is_fp16);
3835 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3836 fn(t_zd, t_zn, t_pg, scalar, status, desc);
3838 tcg_temp_free_i32(desc);
3839 tcg_temp_free_ptr(status);
3840 tcg_temp_free_ptr(t_pg);
3841 tcg_temp_free_ptr(t_zn);
3842 tcg_temp_free_ptr(t_zd);
3845 static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3846 gen_helper_sve_fp2scalar *fn)
3848 TCGv_i64 temp = tcg_const_i64(imm);
3849 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3850 tcg_temp_free_i64(temp);
3853 #define DO_FP_IMM(NAME, name, const0, const1) \
3854 static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
3856 static gen_helper_sve_fp2scalar * const fns[3] = { \
3857 gen_helper_sve_##name##_h, \
3858 gen_helper_sve_##name##_s, \
3859 gen_helper_sve_##name##_d \
3860 }; \
3861 static uint64_t const val[3][2] = { \
3862 { float16_##const0, float16_##const1 }, \
3863 { float32_##const0, float32_##const1 }, \
3864 { float64_##const0, float64_##const1 }, \
3865 }; \
3866 if (a->esz == 0) { \
3867 return false; \
3869 if (sve_access_check(s)) { \
3870 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
3872 return true; \
3875 #define float16_two make_float16(0x4000)
3876 #define float32_two make_float32(0x40000000)
3877 #define float64_two make_float64(0x4000000000000000ULL)
3879 DO_FP_IMM(FADD, fadds, half, one)
3880 DO_FP_IMM(FSUB, fsubs, half, one)
3881 DO_FP_IMM(FMUL, fmuls, half, two)
3882 DO_FP_IMM(FSUBR, fsubrs, half, one)
3883 DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3884 DO_FP_IMM(FMINNM, fminnms, zero, one)
3885 DO_FP_IMM(FMAX, fmaxs, zero, one)
3886 DO_FP_IMM(FMIN, fmins, zero, one)
3888 #undef DO_FP_IMM
3890 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3891 gen_helper_gvec_4_ptr *fn)
3893 if (fn == NULL) {
3894 return false;
3896 if (sve_access_check(s)) {
3897 unsigned vsz = vec_full_reg_size(s);
3898 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3899 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3900 vec_full_reg_offset(s, a->rn),
3901 vec_full_reg_offset(s, a->rm),
3902 pred_full_reg_offset(s, a->pg),
3903 status, vsz, vsz, 0, fn);
3904 tcg_temp_free_ptr(status);
3906 return true;
3909 #define DO_FPCMP(NAME, name) \
3910 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
3912 static gen_helper_gvec_4_ptr * const fns[4] = { \
3913 NULL, gen_helper_sve_##name##_h, \
3914 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3915 }; \
3916 return do_fp_cmp(s, a, fns[a->esz]); \
3919 DO_FPCMP(FCMGE, fcmge)
3920 DO_FPCMP(FCMGT, fcmgt)
3921 DO_FPCMP(FCMEQ, fcmeq)
3922 DO_FPCMP(FCMNE, fcmne)
3923 DO_FPCMP(FCMUO, fcmuo)
3924 DO_FPCMP(FACGE, facge)
3925 DO_FPCMP(FACGT, facgt)
3927 #undef DO_FPCMP
3929 static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
3931 static gen_helper_gvec_4_ptr * const fns[3] = {
3932 gen_helper_sve_fcadd_h,
3933 gen_helper_sve_fcadd_s,
3934 gen_helper_sve_fcadd_d
3937 if (a->esz == 0) {
3938 return false;
3940 if (sve_access_check(s)) {
3941 unsigned vsz = vec_full_reg_size(s);
3942 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3943 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3944 vec_full_reg_offset(s, a->rn),
3945 vec_full_reg_offset(s, a->rm),
3946 pred_full_reg_offset(s, a->pg),
3947 status, vsz, vsz, a->rot, fns[a->esz - 1]);
3948 tcg_temp_free_ptr(status);
3950 return true;
3953 static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
3954 gen_helper_gvec_5_ptr *fn)
3956 if (a->esz == 0) {
3957 return false;
3959 if (sve_access_check(s)) {
3960 unsigned vsz = vec_full_reg_size(s);
3961 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3962 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
3963 vec_full_reg_offset(s, a->rn),
3964 vec_full_reg_offset(s, a->rm),
3965 vec_full_reg_offset(s, a->ra),
3966 pred_full_reg_offset(s, a->pg),
3967 status, vsz, vsz, 0, fn);
3968 tcg_temp_free_ptr(status);
3970 return true;
3973 #define DO_FMLA(NAME, name) \
3974 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
3976 static gen_helper_gvec_5_ptr * const fns[4] = { \
3977 NULL, gen_helper_sve_##name##_h, \
3978 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3979 }; \
3980 return do_fmla(s, a, fns[a->esz]); \
3983 DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3984 DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3985 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3986 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3988 #undef DO_FMLA
3990 static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
3992 static gen_helper_gvec_5_ptr * const fns[4] = {
3993 NULL,
3994 gen_helper_sve_fcmla_zpzzz_h,
3995 gen_helper_sve_fcmla_zpzzz_s,
3996 gen_helper_sve_fcmla_zpzzz_d,
3999 if (a->esz == 0) {
4000 return false;
4002 if (sve_access_check(s)) {
4003 unsigned vsz = vec_full_reg_size(s);
4004 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4005 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4006 vec_full_reg_offset(s, a->rn),
4007 vec_full_reg_offset(s, a->rm),
4008 vec_full_reg_offset(s, a->ra),
4009 pred_full_reg_offset(s, a->pg),
4010 status, vsz, vsz, a->rot, fns[a->esz]);
4011 tcg_temp_free_ptr(status);
4013 return true;
4016 static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
4018 static gen_helper_gvec_3_ptr * const fns[2] = {
4019 gen_helper_gvec_fcmlah_idx,
4020 gen_helper_gvec_fcmlas_idx,
4023 tcg_debug_assert(a->esz == 1 || a->esz == 2);
4024 tcg_debug_assert(a->rd == a->ra);
4025 if (sve_access_check(s)) {
4026 unsigned vsz = vec_full_reg_size(s);
4027 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4028 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4029 vec_full_reg_offset(s, a->rn),
4030 vec_full_reg_offset(s, a->rm),
4031 status, vsz, vsz,
4032 a->index * 4 + a->rot,
4033 fns[a->esz - 1]);
4034 tcg_temp_free_ptr(status);
4036 return true;
4040 *** SVE Floating Point Unary Operations Predicated Group
4043 static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4044 bool is_fp16, gen_helper_gvec_3_ptr *fn)
4046 if (sve_access_check(s)) {
4047 unsigned vsz = vec_full_reg_size(s);
4048 TCGv_ptr status = get_fpstatus_ptr(is_fp16);
4049 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4050 vec_full_reg_offset(s, rn),
4051 pred_full_reg_offset(s, pg),
4052 status, vsz, vsz, 0, fn);
4053 tcg_temp_free_ptr(status);
4055 return true;
4058 static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
4060 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
4063 static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
4065 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4068 static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
4070 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
4073 static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
4075 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4078 static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
4080 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4083 static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
4085 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4088 static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
4090 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4093 static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
4095 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4098 static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
4100 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4103 static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
4105 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4108 static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
4110 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4113 static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
4115 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4118 static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
4120 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4123 static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
4125 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4128 static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
4130 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4133 static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
4135 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4138 static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
4140 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4143 static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
4145 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4148 static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
4150 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4153 static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
4155 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4158 static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4159 gen_helper_sve_frint_h,
4160 gen_helper_sve_frint_s,
4161 gen_helper_sve_frint_d
4164 static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
4166 if (a->esz == 0) {
4167 return false;
4169 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4170 frint_fns[a->esz - 1]);
4173 static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
4175 static gen_helper_gvec_3_ptr * const fns[3] = {
4176 gen_helper_sve_frintx_h,
4177 gen_helper_sve_frintx_s,
4178 gen_helper_sve_frintx_d
4180 if (a->esz == 0) {
4181 return false;
4183 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4186 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4188 if (a->esz == 0) {
4189 return false;
4191 if (sve_access_check(s)) {
4192 unsigned vsz = vec_full_reg_size(s);
4193 TCGv_i32 tmode = tcg_const_i32(mode);
4194 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4196 gen_helper_set_rmode(tmode, tmode, status);
4198 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4199 vec_full_reg_offset(s, a->rn),
4200 pred_full_reg_offset(s, a->pg),
4201 status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4203 gen_helper_set_rmode(tmode, tmode, status);
4204 tcg_temp_free_i32(tmode);
4205 tcg_temp_free_ptr(status);
4207 return true;
4210 static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
4212 return do_frint_mode(s, a, float_round_nearest_even);
4215 static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
4217 return do_frint_mode(s, a, float_round_up);
4220 static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
4222 return do_frint_mode(s, a, float_round_down);
4225 static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
4227 return do_frint_mode(s, a, float_round_to_zero);
4230 static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
4232 return do_frint_mode(s, a, float_round_ties_away);
4235 static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
4237 static gen_helper_gvec_3_ptr * const fns[3] = {
4238 gen_helper_sve_frecpx_h,
4239 gen_helper_sve_frecpx_s,
4240 gen_helper_sve_frecpx_d
4242 if (a->esz == 0) {
4243 return false;
4245 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4248 static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
4250 static gen_helper_gvec_3_ptr * const fns[3] = {
4251 gen_helper_sve_fsqrt_h,
4252 gen_helper_sve_fsqrt_s,
4253 gen_helper_sve_fsqrt_d
4255 if (a->esz == 0) {
4256 return false;
4258 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4261 static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4263 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4266 static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4268 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4271 static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4273 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4276 static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
4278 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4281 static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
4283 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4286 static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
4288 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4291 static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
4293 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4296 static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4298 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4301 static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4303 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4306 static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4308 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4311 static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
4313 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4316 static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
4318 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4321 static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
4323 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4326 static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
4328 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4332 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4335 /* Subroutine loading a vector register at VOFS of LEN bytes.
4336 * The load should begin at the address Rn + IMM.
4339 static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4341 int len_align = QEMU_ALIGN_DOWN(len, 8);
4342 int len_remain = len % 8;
4343 int nparts = len / 8 + ctpop8(len_remain);
4344 int midx = get_mem_index(s);
4345 TCGv_i64 addr, t0, t1;
4347 addr = tcg_temp_new_i64();
4348 t0 = tcg_temp_new_i64();
4350 /* Note that unpredicated load/store of vector/predicate registers
4351 * are defined as a stream of bytes, which equates to little-endian
4352 * operations on larger quantities. There is no nice way to force
4353 * a little-endian load for aarch64_be-linux-user out of line.
4355 * Attempt to keep code expansion to a minimum by limiting the
4356 * amount of unrolling done.
4358 if (nparts <= 4) {
4359 int i;
4361 for (i = 0; i < len_align; i += 8) {
4362 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4363 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4364 tcg_gen_st_i64(t0, cpu_env, vofs + i);
4366 } else {
4367 TCGLabel *loop = gen_new_label();
4368 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4370 gen_set_label(loop);
4372 /* Minimize the number of local temps that must be re-read from
4373 * the stack each iteration. Instead, re-compute values other
4374 * than the loop counter.
4376 tp = tcg_temp_new_ptr();
4377 tcg_gen_addi_ptr(tp, i, imm);
4378 tcg_gen_extu_ptr_i64(addr, tp);
4379 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4381 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4383 tcg_gen_add_ptr(tp, cpu_env, i);
4384 tcg_gen_addi_ptr(i, i, 8);
4385 tcg_gen_st_i64(t0, tp, vofs);
4386 tcg_temp_free_ptr(tp);
4388 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4389 tcg_temp_free_ptr(i);
4392 /* Predicate register loads can be any multiple of 2.
4393 * Note that we still store the entire 64-bit unit into cpu_env.
4395 if (len_remain) {
4396 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4398 switch (len_remain) {
4399 case 2:
4400 case 4:
4401 case 8:
4402 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4403 break;
4405 case 6:
4406 t1 = tcg_temp_new_i64();
4407 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
4408 tcg_gen_addi_i64(addr, addr, 4);
4409 tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
4410 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4411 tcg_temp_free_i64(t1);
4412 break;
4414 default:
4415 g_assert_not_reached();
4417 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
4419 tcg_temp_free_i64(addr);
4420 tcg_temp_free_i64(t0);
4423 /* Similarly for stores. */
4424 static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4426 int len_align = QEMU_ALIGN_DOWN(len, 8);
4427 int len_remain = len % 8;
4428 int nparts = len / 8 + ctpop8(len_remain);
4429 int midx = get_mem_index(s);
4430 TCGv_i64 addr, t0;
4432 addr = tcg_temp_new_i64();
4433 t0 = tcg_temp_new_i64();
4435 /* Note that unpredicated load/store of vector/predicate registers
4436 * are defined as a stream of bytes, which equates to little-endian
4437 * operations on larger quantities. There is no nice way to force
4438 * a little-endian store for aarch64_be-linux-user out of line.
4440 * Attempt to keep code expansion to a minimum by limiting the
4441 * amount of unrolling done.
4443 if (nparts <= 4) {
4444 int i;
4446 for (i = 0; i < len_align; i += 8) {
4447 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
4448 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4449 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4451 } else {
4452 TCGLabel *loop = gen_new_label();
4453 TCGv_ptr t2, i = tcg_const_local_ptr(0);
4455 gen_set_label(loop);
4457 t2 = tcg_temp_new_ptr();
4458 tcg_gen_add_ptr(t2, cpu_env, i);
4459 tcg_gen_ld_i64(t0, t2, vofs);
4461 /* Minimize the number of local temps that must be re-read from
4462 * the stack each iteration. Instead, re-compute values other
4463 * than the loop counter.
4465 tcg_gen_addi_ptr(t2, i, imm);
4466 tcg_gen_extu_ptr_i64(addr, t2);
4467 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4468 tcg_temp_free_ptr(t2);
4470 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4472 tcg_gen_addi_ptr(i, i, 8);
4474 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4475 tcg_temp_free_ptr(i);
4478 /* Predicate register stores can be any multiple of 2. */
4479 if (len_remain) {
4480 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
4481 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4483 switch (len_remain) {
4484 case 2:
4485 case 4:
4486 case 8:
4487 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4488 break;
4490 case 6:
4491 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
4492 tcg_gen_addi_i64(addr, addr, 4);
4493 tcg_gen_shri_i64(t0, t0, 32);
4494 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
4495 break;
4497 default:
4498 g_assert_not_reached();
4501 tcg_temp_free_i64(addr);
4502 tcg_temp_free_i64(t0);
4505 static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
4507 if (sve_access_check(s)) {
4508 int size = vec_full_reg_size(s);
4509 int off = vec_full_reg_offset(s, a->rd);
4510 do_ldr(s, off, size, a->rn, a->imm * size);
4512 return true;
4515 static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
4517 if (sve_access_check(s)) {
4518 int size = pred_full_reg_size(s);
4519 int off = pred_full_reg_offset(s, a->rd);
4520 do_ldr(s, off, size, a->rn, a->imm * size);
4522 return true;
4525 static bool trans_STR_zri(DisasContext *s, arg_rri *a)
4527 if (sve_access_check(s)) {
4528 int size = vec_full_reg_size(s);
4529 int off = vec_full_reg_offset(s, a->rd);
4530 do_str(s, off, size, a->rn, a->imm * size);
4532 return true;
4535 static bool trans_STR_pri(DisasContext *s, arg_rri *a)
4537 if (sve_access_check(s)) {
4538 int size = pred_full_reg_size(s);
4539 int off = pred_full_reg_offset(s, a->rd);
4540 do_str(s, off, size, a->rn, a->imm * size);
4542 return true;
4546 *** SVE Memory - Contiguous Load Group
4549 /* The memory mode of the dtype. */
4550 static const MemOp dtype_mop[16] = {
4551 MO_UB, MO_UB, MO_UB, MO_UB,
4552 MO_SL, MO_UW, MO_UW, MO_UW,
4553 MO_SW, MO_SW, MO_UL, MO_UL,
4554 MO_SB, MO_SB, MO_SB, MO_Q
4557 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4559 /* The vector element size of dtype. */
4560 static const uint8_t dtype_esz[16] = {
4561 0, 1, 2, 3,
4562 3, 1, 2, 3,
4563 3, 2, 2, 3,
4564 3, 2, 1, 3
4567 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4568 int dtype, gen_helper_gvec_mem *fn)
4570 unsigned vsz = vec_full_reg_size(s);
4571 TCGv_ptr t_pg;
4572 TCGv_i32 t_desc;
4573 int desc;
4575 /* For e.g. LD4, there are not enough arguments to pass all 4
4576 * registers as pointers, so encode the regno into the data field.
4577 * For consistency, do this even for LD1.
4579 desc = simd_desc(vsz, vsz, zt);
4580 t_desc = tcg_const_i32(desc);
4581 t_pg = tcg_temp_new_ptr();
4583 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4584 fn(cpu_env, t_pg, addr, t_desc);
4586 tcg_temp_free_ptr(t_pg);
4587 tcg_temp_free_i32(t_desc);
4590 static void do_ld_zpa(DisasContext *s, int zt, int pg,
4591 TCGv_i64 addr, int dtype, int nreg)
4593 static gen_helper_gvec_mem * const fns[2][16][4] = {
4594 /* Little-endian */
4595 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4596 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4597 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4598 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4599 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4601 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4602 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4603 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4604 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4605 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4607 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4608 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4609 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4610 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4611 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4613 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4614 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4615 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4616 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4617 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4619 /* Big-endian */
4620 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4621 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4622 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4623 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4624 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4626 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4627 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4628 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4629 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4630 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4632 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4633 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4634 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4635 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4636 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4638 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4639 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4640 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4641 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4642 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } }
4644 gen_helper_gvec_mem *fn = fns[s->be_data == MO_BE][dtype][nreg];
4646 /* While there are holes in the table, they are not
4647 * accessible via the instruction encoding.
4649 assert(fn != NULL);
4650 do_mem_zpa(s, zt, pg, addr, dtype, fn);
4653 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
4655 if (a->rm == 31) {
4656 return false;
4658 if (sve_access_check(s)) {
4659 TCGv_i64 addr = new_tmp_a64(s);
4660 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4661 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4662 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4664 return true;
4667 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
4669 if (sve_access_check(s)) {
4670 int vsz = vec_full_reg_size(s);
4671 int elements = vsz >> dtype_esz[a->dtype];
4672 TCGv_i64 addr = new_tmp_a64(s);
4674 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4675 (a->imm * elements * (a->nreg + 1))
4676 << dtype_msz(a->dtype));
4677 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4679 return true;
4682 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
4684 static gen_helper_gvec_mem * const fns[2][16] = {
4685 /* Little-endian */
4686 { gen_helper_sve_ldff1bb_r,
4687 gen_helper_sve_ldff1bhu_r,
4688 gen_helper_sve_ldff1bsu_r,
4689 gen_helper_sve_ldff1bdu_r,
4691 gen_helper_sve_ldff1sds_le_r,
4692 gen_helper_sve_ldff1hh_le_r,
4693 gen_helper_sve_ldff1hsu_le_r,
4694 gen_helper_sve_ldff1hdu_le_r,
4696 gen_helper_sve_ldff1hds_le_r,
4697 gen_helper_sve_ldff1hss_le_r,
4698 gen_helper_sve_ldff1ss_le_r,
4699 gen_helper_sve_ldff1sdu_le_r,
4701 gen_helper_sve_ldff1bds_r,
4702 gen_helper_sve_ldff1bss_r,
4703 gen_helper_sve_ldff1bhs_r,
4704 gen_helper_sve_ldff1dd_le_r },
4706 /* Big-endian */
4707 { gen_helper_sve_ldff1bb_r,
4708 gen_helper_sve_ldff1bhu_r,
4709 gen_helper_sve_ldff1bsu_r,
4710 gen_helper_sve_ldff1bdu_r,
4712 gen_helper_sve_ldff1sds_be_r,
4713 gen_helper_sve_ldff1hh_be_r,
4714 gen_helper_sve_ldff1hsu_be_r,
4715 gen_helper_sve_ldff1hdu_be_r,
4717 gen_helper_sve_ldff1hds_be_r,
4718 gen_helper_sve_ldff1hss_be_r,
4719 gen_helper_sve_ldff1ss_be_r,
4720 gen_helper_sve_ldff1sdu_be_r,
4722 gen_helper_sve_ldff1bds_r,
4723 gen_helper_sve_ldff1bss_r,
4724 gen_helper_sve_ldff1bhs_r,
4725 gen_helper_sve_ldff1dd_be_r },
4728 if (sve_access_check(s)) {
4729 TCGv_i64 addr = new_tmp_a64(s);
4730 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4731 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4732 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
4733 fns[s->be_data == MO_BE][a->dtype]);
4735 return true;
4738 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
4740 static gen_helper_gvec_mem * const fns[2][16] = {
4741 /* Little-endian */
4742 { gen_helper_sve_ldnf1bb_r,
4743 gen_helper_sve_ldnf1bhu_r,
4744 gen_helper_sve_ldnf1bsu_r,
4745 gen_helper_sve_ldnf1bdu_r,
4747 gen_helper_sve_ldnf1sds_le_r,
4748 gen_helper_sve_ldnf1hh_le_r,
4749 gen_helper_sve_ldnf1hsu_le_r,
4750 gen_helper_sve_ldnf1hdu_le_r,
4752 gen_helper_sve_ldnf1hds_le_r,
4753 gen_helper_sve_ldnf1hss_le_r,
4754 gen_helper_sve_ldnf1ss_le_r,
4755 gen_helper_sve_ldnf1sdu_le_r,
4757 gen_helper_sve_ldnf1bds_r,
4758 gen_helper_sve_ldnf1bss_r,
4759 gen_helper_sve_ldnf1bhs_r,
4760 gen_helper_sve_ldnf1dd_le_r },
4762 /* Big-endian */
4763 { gen_helper_sve_ldnf1bb_r,
4764 gen_helper_sve_ldnf1bhu_r,
4765 gen_helper_sve_ldnf1bsu_r,
4766 gen_helper_sve_ldnf1bdu_r,
4768 gen_helper_sve_ldnf1sds_be_r,
4769 gen_helper_sve_ldnf1hh_be_r,
4770 gen_helper_sve_ldnf1hsu_be_r,
4771 gen_helper_sve_ldnf1hdu_be_r,
4773 gen_helper_sve_ldnf1hds_be_r,
4774 gen_helper_sve_ldnf1hss_be_r,
4775 gen_helper_sve_ldnf1ss_be_r,
4776 gen_helper_sve_ldnf1sdu_be_r,
4778 gen_helper_sve_ldnf1bds_r,
4779 gen_helper_sve_ldnf1bss_r,
4780 gen_helper_sve_ldnf1bhs_r,
4781 gen_helper_sve_ldnf1dd_be_r },
4784 if (sve_access_check(s)) {
4785 int vsz = vec_full_reg_size(s);
4786 int elements = vsz >> dtype_esz[a->dtype];
4787 int off = (a->imm * elements) << dtype_msz(a->dtype);
4788 TCGv_i64 addr = new_tmp_a64(s);
4790 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4791 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
4792 fns[s->be_data == MO_BE][a->dtype]);
4794 return true;
4797 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4799 static gen_helper_gvec_mem * const fns[2][4] = {
4800 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_le_r,
4801 gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld1dd_le_r },
4802 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_be_r,
4803 gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld1dd_be_r },
4805 unsigned vsz = vec_full_reg_size(s);
4806 TCGv_ptr t_pg;
4807 TCGv_i32 t_desc;
4808 int desc, poff;
4810 /* Load the first quadword using the normal predicated load helpers. */
4811 desc = simd_desc(16, 16, zt);
4812 t_desc = tcg_const_i32(desc);
4814 poff = pred_full_reg_offset(s, pg);
4815 if (vsz > 16) {
4817 * Zero-extend the first 16 bits of the predicate into a temporary.
4818 * This avoids triggering an assert making sure we don't have bits
4819 * set within a predicate beyond VQ, but we have lowered VQ to 1
4820 * for this load operation.
4822 TCGv_i64 tmp = tcg_temp_new_i64();
4823 #ifdef HOST_WORDS_BIGENDIAN
4824 poff += 6;
4825 #endif
4826 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
4828 poff = offsetof(CPUARMState, vfp.preg_tmp);
4829 tcg_gen_st_i64(tmp, cpu_env, poff);
4830 tcg_temp_free_i64(tmp);
4833 t_pg = tcg_temp_new_ptr();
4834 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
4836 fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc);
4838 tcg_temp_free_ptr(t_pg);
4839 tcg_temp_free_i32(t_desc);
4841 /* Replicate that first quadword. */
4842 if (vsz > 16) {
4843 unsigned dofs = vec_full_reg_offset(s, zt);
4844 tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4848 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
4850 if (a->rm == 31) {
4851 return false;
4853 if (sve_access_check(s)) {
4854 int msz = dtype_msz(a->dtype);
4855 TCGv_i64 addr = new_tmp_a64(s);
4856 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4857 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4858 do_ldrq(s, a->rd, a->pg, addr, msz);
4860 return true;
4863 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
4865 if (sve_access_check(s)) {
4866 TCGv_i64 addr = new_tmp_a64(s);
4867 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4868 do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4870 return true;
4873 /* Load and broadcast element. */
4874 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
4876 if (!sve_access_check(s)) {
4877 return true;
4880 unsigned vsz = vec_full_reg_size(s);
4881 unsigned psz = pred_full_reg_size(s);
4882 unsigned esz = dtype_esz[a->dtype];
4883 unsigned msz = dtype_msz(a->dtype);
4884 TCGLabel *over = gen_new_label();
4885 TCGv_i64 temp;
4887 /* If the guarding predicate has no bits set, no load occurs. */
4888 if (psz <= 8) {
4889 /* Reduce the pred_esz_masks value simply to reduce the
4890 * size of the code generated here.
4892 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4893 temp = tcg_temp_new_i64();
4894 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4895 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4896 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4897 tcg_temp_free_i64(temp);
4898 } else {
4899 TCGv_i32 t32 = tcg_temp_new_i32();
4900 find_last_active(s, t32, esz, a->pg);
4901 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4902 tcg_temp_free_i32(t32);
4905 /* Load the data. */
4906 temp = tcg_temp_new_i64();
4907 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4908 tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
4909 s->be_data | dtype_mop[a->dtype]);
4911 /* Broadcast to *all* elements. */
4912 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4913 vsz, vsz, temp);
4914 tcg_temp_free_i64(temp);
4916 /* Zero the inactive elements. */
4917 gen_set_label(over);
4918 do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4919 return true;
4922 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4923 int msz, int esz, int nreg)
4925 static gen_helper_gvec_mem * const fn_single[2][4][4] = {
4926 { { gen_helper_sve_st1bb_r,
4927 gen_helper_sve_st1bh_r,
4928 gen_helper_sve_st1bs_r,
4929 gen_helper_sve_st1bd_r },
4930 { NULL,
4931 gen_helper_sve_st1hh_le_r,
4932 gen_helper_sve_st1hs_le_r,
4933 gen_helper_sve_st1hd_le_r },
4934 { NULL, NULL,
4935 gen_helper_sve_st1ss_le_r,
4936 gen_helper_sve_st1sd_le_r },
4937 { NULL, NULL, NULL,
4938 gen_helper_sve_st1dd_le_r } },
4939 { { gen_helper_sve_st1bb_r,
4940 gen_helper_sve_st1bh_r,
4941 gen_helper_sve_st1bs_r,
4942 gen_helper_sve_st1bd_r },
4943 { NULL,
4944 gen_helper_sve_st1hh_be_r,
4945 gen_helper_sve_st1hs_be_r,
4946 gen_helper_sve_st1hd_be_r },
4947 { NULL, NULL,
4948 gen_helper_sve_st1ss_be_r,
4949 gen_helper_sve_st1sd_be_r },
4950 { NULL, NULL, NULL,
4951 gen_helper_sve_st1dd_be_r } },
4953 static gen_helper_gvec_mem * const fn_multiple[2][3][4] = {
4954 { { gen_helper_sve_st2bb_r,
4955 gen_helper_sve_st2hh_le_r,
4956 gen_helper_sve_st2ss_le_r,
4957 gen_helper_sve_st2dd_le_r },
4958 { gen_helper_sve_st3bb_r,
4959 gen_helper_sve_st3hh_le_r,
4960 gen_helper_sve_st3ss_le_r,
4961 gen_helper_sve_st3dd_le_r },
4962 { gen_helper_sve_st4bb_r,
4963 gen_helper_sve_st4hh_le_r,
4964 gen_helper_sve_st4ss_le_r,
4965 gen_helper_sve_st4dd_le_r } },
4966 { { gen_helper_sve_st2bb_r,
4967 gen_helper_sve_st2hh_be_r,
4968 gen_helper_sve_st2ss_be_r,
4969 gen_helper_sve_st2dd_be_r },
4970 { gen_helper_sve_st3bb_r,
4971 gen_helper_sve_st3hh_be_r,
4972 gen_helper_sve_st3ss_be_r,
4973 gen_helper_sve_st3dd_be_r },
4974 { gen_helper_sve_st4bb_r,
4975 gen_helper_sve_st4hh_be_r,
4976 gen_helper_sve_st4ss_be_r,
4977 gen_helper_sve_st4dd_be_r } },
4979 gen_helper_gvec_mem *fn;
4980 int be = s->be_data == MO_BE;
4982 if (nreg == 0) {
4983 /* ST1 */
4984 fn = fn_single[be][msz][esz];
4985 } else {
4986 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
4987 assert(msz == esz);
4988 fn = fn_multiple[be][nreg - 1][msz];
4990 assert(fn != NULL);
4991 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), fn);
4994 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
4996 if (a->rm == 31 || a->msz > a->esz) {
4997 return false;
4999 if (sve_access_check(s)) {
5000 TCGv_i64 addr = new_tmp_a64(s);
5001 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
5002 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5003 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5005 return true;
5008 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
5010 if (a->msz > a->esz) {
5011 return false;
5013 if (sve_access_check(s)) {
5014 int vsz = vec_full_reg_size(s);
5015 int elements = vsz >> a->esz;
5016 TCGv_i64 addr = new_tmp_a64(s);
5018 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5019 (a->imm * elements * (a->nreg + 1)) << a->msz);
5020 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5022 return true;
5026 *** SVE gather loads / scatter stores
5029 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
5030 int scale, TCGv_i64 scalar, int msz,
5031 gen_helper_gvec_mem_scatter *fn)
5033 unsigned vsz = vec_full_reg_size(s);
5034 TCGv_ptr t_zm = tcg_temp_new_ptr();
5035 TCGv_ptr t_pg = tcg_temp_new_ptr();
5036 TCGv_ptr t_zt = tcg_temp_new_ptr();
5037 TCGv_i32 t_desc;
5038 int desc;
5040 desc = simd_desc(vsz, vsz, scale);
5041 t_desc = tcg_const_i32(desc);
5043 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5044 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5045 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
5046 fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
5048 tcg_temp_free_ptr(t_zt);
5049 tcg_temp_free_ptr(t_zm);
5050 tcg_temp_free_ptr(t_pg);
5051 tcg_temp_free_i32(t_desc);
5054 /* Indexed by [be][ff][xs][u][msz]. */
5055 static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][2][3] = {
5056 /* Little-endian */
5057 { { { { gen_helper_sve_ldbss_zsu,
5058 gen_helper_sve_ldhss_le_zsu,
5059 NULL, },
5060 { gen_helper_sve_ldbsu_zsu,
5061 gen_helper_sve_ldhsu_le_zsu,
5062 gen_helper_sve_ldss_le_zsu, } },
5063 { { gen_helper_sve_ldbss_zss,
5064 gen_helper_sve_ldhss_le_zss,
5065 NULL, },
5066 { gen_helper_sve_ldbsu_zss,
5067 gen_helper_sve_ldhsu_le_zss,
5068 gen_helper_sve_ldss_le_zss, } } },
5070 /* First-fault */
5071 { { { gen_helper_sve_ldffbss_zsu,
5072 gen_helper_sve_ldffhss_le_zsu,
5073 NULL, },
5074 { gen_helper_sve_ldffbsu_zsu,
5075 gen_helper_sve_ldffhsu_le_zsu,
5076 gen_helper_sve_ldffss_le_zsu, } },
5077 { { gen_helper_sve_ldffbss_zss,
5078 gen_helper_sve_ldffhss_le_zss,
5079 NULL, },
5080 { gen_helper_sve_ldffbsu_zss,
5081 gen_helper_sve_ldffhsu_le_zss,
5082 gen_helper_sve_ldffss_le_zss, } } } },
5084 /* Big-endian */
5085 { { { { gen_helper_sve_ldbss_zsu,
5086 gen_helper_sve_ldhss_be_zsu,
5087 NULL, },
5088 { gen_helper_sve_ldbsu_zsu,
5089 gen_helper_sve_ldhsu_be_zsu,
5090 gen_helper_sve_ldss_be_zsu, } },
5091 { { gen_helper_sve_ldbss_zss,
5092 gen_helper_sve_ldhss_be_zss,
5093 NULL, },
5094 { gen_helper_sve_ldbsu_zss,
5095 gen_helper_sve_ldhsu_be_zss,
5096 gen_helper_sve_ldss_be_zss, } } },
5098 /* First-fault */
5099 { { { gen_helper_sve_ldffbss_zsu,
5100 gen_helper_sve_ldffhss_be_zsu,
5101 NULL, },
5102 { gen_helper_sve_ldffbsu_zsu,
5103 gen_helper_sve_ldffhsu_be_zsu,
5104 gen_helper_sve_ldffss_be_zsu, } },
5105 { { gen_helper_sve_ldffbss_zss,
5106 gen_helper_sve_ldffhss_be_zss,
5107 NULL, },
5108 { gen_helper_sve_ldffbsu_zss,
5109 gen_helper_sve_ldffhsu_be_zss,
5110 gen_helper_sve_ldffss_be_zss, } } } },
5113 /* Note that we overload xs=2 to indicate 64-bit offset. */
5114 static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][2][3][2][4] = {
5115 /* Little-endian */
5116 { { { { gen_helper_sve_ldbds_zsu,
5117 gen_helper_sve_ldhds_le_zsu,
5118 gen_helper_sve_ldsds_le_zsu,
5119 NULL, },
5120 { gen_helper_sve_ldbdu_zsu,
5121 gen_helper_sve_ldhdu_le_zsu,
5122 gen_helper_sve_ldsdu_le_zsu,
5123 gen_helper_sve_lddd_le_zsu, } },
5124 { { gen_helper_sve_ldbds_zss,
5125 gen_helper_sve_ldhds_le_zss,
5126 gen_helper_sve_ldsds_le_zss,
5127 NULL, },
5128 { gen_helper_sve_ldbdu_zss,
5129 gen_helper_sve_ldhdu_le_zss,
5130 gen_helper_sve_ldsdu_le_zss,
5131 gen_helper_sve_lddd_le_zss, } },
5132 { { gen_helper_sve_ldbds_zd,
5133 gen_helper_sve_ldhds_le_zd,
5134 gen_helper_sve_ldsds_le_zd,
5135 NULL, },
5136 { gen_helper_sve_ldbdu_zd,
5137 gen_helper_sve_ldhdu_le_zd,
5138 gen_helper_sve_ldsdu_le_zd,
5139 gen_helper_sve_lddd_le_zd, } } },
5141 /* First-fault */
5142 { { { gen_helper_sve_ldffbds_zsu,
5143 gen_helper_sve_ldffhds_le_zsu,
5144 gen_helper_sve_ldffsds_le_zsu,
5145 NULL, },
5146 { gen_helper_sve_ldffbdu_zsu,
5147 gen_helper_sve_ldffhdu_le_zsu,
5148 gen_helper_sve_ldffsdu_le_zsu,
5149 gen_helper_sve_ldffdd_le_zsu, } },
5150 { { gen_helper_sve_ldffbds_zss,
5151 gen_helper_sve_ldffhds_le_zss,
5152 gen_helper_sve_ldffsds_le_zss,
5153 NULL, },
5154 { gen_helper_sve_ldffbdu_zss,
5155 gen_helper_sve_ldffhdu_le_zss,
5156 gen_helper_sve_ldffsdu_le_zss,
5157 gen_helper_sve_ldffdd_le_zss, } },
5158 { { gen_helper_sve_ldffbds_zd,
5159 gen_helper_sve_ldffhds_le_zd,
5160 gen_helper_sve_ldffsds_le_zd,
5161 NULL, },
5162 { gen_helper_sve_ldffbdu_zd,
5163 gen_helper_sve_ldffhdu_le_zd,
5164 gen_helper_sve_ldffsdu_le_zd,
5165 gen_helper_sve_ldffdd_le_zd, } } } },
5167 /* Big-endian */
5168 { { { { gen_helper_sve_ldbds_zsu,
5169 gen_helper_sve_ldhds_be_zsu,
5170 gen_helper_sve_ldsds_be_zsu,
5171 NULL, },
5172 { gen_helper_sve_ldbdu_zsu,
5173 gen_helper_sve_ldhdu_be_zsu,
5174 gen_helper_sve_ldsdu_be_zsu,
5175 gen_helper_sve_lddd_be_zsu, } },
5176 { { gen_helper_sve_ldbds_zss,
5177 gen_helper_sve_ldhds_be_zss,
5178 gen_helper_sve_ldsds_be_zss,
5179 NULL, },
5180 { gen_helper_sve_ldbdu_zss,
5181 gen_helper_sve_ldhdu_be_zss,
5182 gen_helper_sve_ldsdu_be_zss,
5183 gen_helper_sve_lddd_be_zss, } },
5184 { { gen_helper_sve_ldbds_zd,
5185 gen_helper_sve_ldhds_be_zd,
5186 gen_helper_sve_ldsds_be_zd,
5187 NULL, },
5188 { gen_helper_sve_ldbdu_zd,
5189 gen_helper_sve_ldhdu_be_zd,
5190 gen_helper_sve_ldsdu_be_zd,
5191 gen_helper_sve_lddd_be_zd, } } },
5193 /* First-fault */
5194 { { { gen_helper_sve_ldffbds_zsu,
5195 gen_helper_sve_ldffhds_be_zsu,
5196 gen_helper_sve_ldffsds_be_zsu,
5197 NULL, },
5198 { gen_helper_sve_ldffbdu_zsu,
5199 gen_helper_sve_ldffhdu_be_zsu,
5200 gen_helper_sve_ldffsdu_be_zsu,
5201 gen_helper_sve_ldffdd_be_zsu, } },
5202 { { gen_helper_sve_ldffbds_zss,
5203 gen_helper_sve_ldffhds_be_zss,
5204 gen_helper_sve_ldffsds_be_zss,
5205 NULL, },
5206 { gen_helper_sve_ldffbdu_zss,
5207 gen_helper_sve_ldffhdu_be_zss,
5208 gen_helper_sve_ldffsdu_be_zss,
5209 gen_helper_sve_ldffdd_be_zss, } },
5210 { { gen_helper_sve_ldffbds_zd,
5211 gen_helper_sve_ldffhds_be_zd,
5212 gen_helper_sve_ldffsds_be_zd,
5213 NULL, },
5214 { gen_helper_sve_ldffbdu_zd,
5215 gen_helper_sve_ldffhdu_be_zd,
5216 gen_helper_sve_ldffsdu_be_zd,
5217 gen_helper_sve_ldffdd_be_zd, } } } },
5220 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
5222 gen_helper_gvec_mem_scatter *fn = NULL;
5223 int be = s->be_data == MO_BE;
5225 if (!sve_access_check(s)) {
5226 return true;
5229 switch (a->esz) {
5230 case MO_32:
5231 fn = gather_load_fn32[be][a->ff][a->xs][a->u][a->msz];
5232 break;
5233 case MO_64:
5234 fn = gather_load_fn64[be][a->ff][a->xs][a->u][a->msz];
5235 break;
5237 assert(fn != NULL);
5239 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5240 cpu_reg_sp(s, a->rn), a->msz, fn);
5241 return true;
5244 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
5246 gen_helper_gvec_mem_scatter *fn = NULL;
5247 int be = s->be_data == MO_BE;
5248 TCGv_i64 imm;
5250 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5251 return false;
5253 if (!sve_access_check(s)) {
5254 return true;
5257 switch (a->esz) {
5258 case MO_32:
5259 fn = gather_load_fn32[be][a->ff][0][a->u][a->msz];
5260 break;
5261 case MO_64:
5262 fn = gather_load_fn64[be][a->ff][2][a->u][a->msz];
5263 break;
5265 assert(fn != NULL);
5267 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5268 * by loading the immediate into the scalar parameter.
5270 imm = tcg_const_i64(a->imm << a->msz);
5271 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
5272 tcg_temp_free_i64(imm);
5273 return true;
5276 /* Indexed by [be][xs][msz]. */
5277 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][3] = {
5278 /* Little-endian */
5279 { { gen_helper_sve_stbs_zsu,
5280 gen_helper_sve_sths_le_zsu,
5281 gen_helper_sve_stss_le_zsu, },
5282 { gen_helper_sve_stbs_zss,
5283 gen_helper_sve_sths_le_zss,
5284 gen_helper_sve_stss_le_zss, } },
5285 /* Big-endian */
5286 { { gen_helper_sve_stbs_zsu,
5287 gen_helper_sve_sths_be_zsu,
5288 gen_helper_sve_stss_be_zsu, },
5289 { gen_helper_sve_stbs_zss,
5290 gen_helper_sve_sths_be_zss,
5291 gen_helper_sve_stss_be_zss, } },
5294 /* Note that we overload xs=2 to indicate 64-bit offset. */
5295 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][3][4] = {
5296 /* Little-endian */
5297 { { gen_helper_sve_stbd_zsu,
5298 gen_helper_sve_sthd_le_zsu,
5299 gen_helper_sve_stsd_le_zsu,
5300 gen_helper_sve_stdd_le_zsu, },
5301 { gen_helper_sve_stbd_zss,
5302 gen_helper_sve_sthd_le_zss,
5303 gen_helper_sve_stsd_le_zss,
5304 gen_helper_sve_stdd_le_zss, },
5305 { gen_helper_sve_stbd_zd,
5306 gen_helper_sve_sthd_le_zd,
5307 gen_helper_sve_stsd_le_zd,
5308 gen_helper_sve_stdd_le_zd, } },
5309 /* Big-endian */
5310 { { gen_helper_sve_stbd_zsu,
5311 gen_helper_sve_sthd_be_zsu,
5312 gen_helper_sve_stsd_be_zsu,
5313 gen_helper_sve_stdd_be_zsu, },
5314 { gen_helper_sve_stbd_zss,
5315 gen_helper_sve_sthd_be_zss,
5316 gen_helper_sve_stsd_be_zss,
5317 gen_helper_sve_stdd_be_zss, },
5318 { gen_helper_sve_stbd_zd,
5319 gen_helper_sve_sthd_be_zd,
5320 gen_helper_sve_stsd_be_zd,
5321 gen_helper_sve_stdd_be_zd, } },
5324 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
5326 gen_helper_gvec_mem_scatter *fn;
5327 int be = s->be_data == MO_BE;
5329 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5330 return false;
5332 if (!sve_access_check(s)) {
5333 return true;
5335 switch (a->esz) {
5336 case MO_32:
5337 fn = scatter_store_fn32[be][a->xs][a->msz];
5338 break;
5339 case MO_64:
5340 fn = scatter_store_fn64[be][a->xs][a->msz];
5341 break;
5342 default:
5343 g_assert_not_reached();
5345 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5346 cpu_reg_sp(s, a->rn), a->msz, fn);
5347 return true;
5350 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
5352 gen_helper_gvec_mem_scatter *fn = NULL;
5353 int be = s->be_data == MO_BE;
5354 TCGv_i64 imm;
5356 if (a->esz < a->msz) {
5357 return false;
5359 if (!sve_access_check(s)) {
5360 return true;
5363 switch (a->esz) {
5364 case MO_32:
5365 fn = scatter_store_fn32[be][0][a->msz];
5366 break;
5367 case MO_64:
5368 fn = scatter_store_fn64[be][2][a->msz];
5369 break;
5371 assert(fn != NULL);
5373 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5374 * by loading the immediate into the scalar parameter.
5376 imm = tcg_const_i64(a->imm << a->msz);
5377 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
5378 tcg_temp_free_i64(imm);
5379 return true;
5383 * Prefetches
5386 static bool trans_PRF(DisasContext *s, arg_PRF *a)
5388 /* Prefetch is a nop within QEMU. */
5389 (void)sve_access_check(s);
5390 return true;
5393 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
5395 if (a->rm == 31) {
5396 return false;
5398 /* Prefetch is a nop within QEMU. */
5399 (void)sve_access_check(s);
5400 return true;
5404 * Move Prefix
5406 * TODO: The implementation so far could handle predicated merging movprfx.
5407 * The helper functions as written take an extra source register to
5408 * use in the operation, but the result is only written when predication
5409 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
5410 * to allow the final write back to the destination to be unconditional.
5411 * For predicated zeroing movprfx, we need to rearrange the helpers to
5412 * allow the final write back to zero inactives.
5414 * In the meantime, just emit the moves.
5417 static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
5419 return do_mov_z(s, a->rd, a->rn);
5422 static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
5424 if (sve_access_check(s)) {
5425 do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
5427 return true;
5430 static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
5432 if (sve_access_check(s)) {
5433 do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
5435 return true;