Merge remote-tracking branch 'remotes/bonzini-gitlab/tags/for-upstream' into staging
[qemu/ar7.git] / target / arm / translate-sve.c
blob0eefb6121447da092b83ad426ced91672a9925bc
1 /*
2 * AArch64 SVE translation
4 * Copyright (c) 2018 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg/tcg-op.h"
24 #include "tcg/tcg-op-gvec.h"
25 #include "tcg/tcg-gvec-desc.h"
26 #include "qemu/log.h"
27 #include "arm_ldst.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
32 #include "exec/log.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
35 #include "fpu/softfloat.h"
38 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
39 TCGv_i64, uint32_t, uint32_t);
41 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
42 TCGv_ptr, TCGv_i32);
43 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
44 TCGv_ptr, TCGv_ptr, TCGv_i32);
46 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
47 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
48 TCGv_ptr, TCGv_i64, TCGv_i32);
51 * Helpers for extracting complex instruction fields.
54 /* See e.g. ASR (immediate, predicated).
55 * Returns -1 for unallocated encoding; diagnose later.
57 static int tszimm_esz(DisasContext *s, int x)
59 x >>= 3; /* discard imm3 */
60 return 31 - clz32(x);
63 static int tszimm_shr(DisasContext *s, int x)
65 return (16 << tszimm_esz(s, x)) - x;
68 /* See e.g. LSL (immediate, predicated). */
69 static int tszimm_shl(DisasContext *s, int x)
71 return x - (8 << tszimm_esz(s, x));
74 static inline int plus1(DisasContext *s, int x)
76 return x + 1;
79 /* The SH bit is in bit 8. Extract the low 8 and shift. */
80 static inline int expand_imm_sh8s(DisasContext *s, int x)
82 return (int8_t)x << (x & 0x100 ? 8 : 0);
85 static inline int expand_imm_sh8u(DisasContext *s, int x)
87 return (uint8_t)x << (x & 0x100 ? 8 : 0);
90 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
91 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
93 static inline int msz_dtype(DisasContext *s, int msz)
95 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
96 return dtype[msz];
100 * Include the generated decoder.
103 #include "decode-sve.c.inc"
106 * Implement all of the translator functions referenced by the decoder.
109 /* Return the offset info CPUARMState of the predicate vector register Pn.
110 * Note for this purpose, FFR is P16.
112 static inline int pred_full_reg_offset(DisasContext *s, int regno)
114 return offsetof(CPUARMState, vfp.pregs[regno]);
117 /* Return the byte size of the whole predicate register, VL / 64. */
118 static inline int pred_full_reg_size(DisasContext *s)
120 return s->sve_len >> 3;
123 /* Round up the size of a register to a size allowed by
124 * the tcg vector infrastructure. Any operation which uses this
125 * size may assume that the bits above pred_full_reg_size are zero,
126 * and must leave them the same way.
128 * Note that this is not needed for the vector registers as they
129 * are always properly sized for tcg vectors.
131 static int size_for_gvec(int size)
133 if (size <= 8) {
134 return 8;
135 } else {
136 return QEMU_ALIGN_UP(size, 16);
140 static int pred_gvec_reg_size(DisasContext *s)
142 return size_for_gvec(pred_full_reg_size(s));
145 /* Invoke an out-of-line helper on 2 Zregs. */
146 static void gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
147 int rd, int rn, int data)
149 unsigned vsz = vec_full_reg_size(s);
150 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
151 vec_full_reg_offset(s, rn),
152 vsz, vsz, data, fn);
155 /* Invoke an out-of-line helper on 3 Zregs. */
156 static void gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
157 int rd, int rn, int rm, int data)
159 unsigned vsz = vec_full_reg_size(s);
160 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
161 vec_full_reg_offset(s, rn),
162 vec_full_reg_offset(s, rm),
163 vsz, vsz, data, fn);
166 /* Invoke an out-of-line helper on 2 Zregs and a predicate. */
167 static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
168 int rd, int rn, int pg, int data)
170 unsigned vsz = vec_full_reg_size(s);
171 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
172 vec_full_reg_offset(s, rn),
173 pred_full_reg_offset(s, pg),
174 vsz, vsz, data, fn);
177 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */
178 static void gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
179 int rd, int rn, int rm, int pg, int data)
181 unsigned vsz = vec_full_reg_size(s);
182 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
183 vec_full_reg_offset(s, rn),
184 vec_full_reg_offset(s, rm),
185 pred_full_reg_offset(s, pg),
186 vsz, vsz, data, fn);
189 /* Invoke a vector expander on two Zregs. */
190 static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn,
191 int esz, int rd, int rn)
193 unsigned vsz = vec_full_reg_size(s);
194 gvec_fn(esz, vec_full_reg_offset(s, rd),
195 vec_full_reg_offset(s, rn), vsz, vsz);
198 /* Invoke a vector expander on three Zregs. */
199 static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
200 int esz, int rd, int rn, int rm)
202 unsigned vsz = vec_full_reg_size(s);
203 gvec_fn(esz, vec_full_reg_offset(s, rd),
204 vec_full_reg_offset(s, rn),
205 vec_full_reg_offset(s, rm), vsz, vsz);
208 /* Invoke a vector move on two Zregs. */
209 static bool do_mov_z(DisasContext *s, int rd, int rn)
211 if (sve_access_check(s)) {
212 gen_gvec_fn_zz(s, tcg_gen_gvec_mov, MO_8, rd, rn);
214 return true;
217 /* Initialize a Zreg with replications of a 64-bit immediate. */
218 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
220 unsigned vsz = vec_full_reg_size(s);
221 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
224 /* Invoke a vector expander on three Pregs. */
225 static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
226 int rd, int rn, int rm)
228 unsigned psz = pred_gvec_reg_size(s);
229 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
230 pred_full_reg_offset(s, rn),
231 pred_full_reg_offset(s, rm), psz, psz);
234 /* Invoke a vector move on two Pregs. */
235 static bool do_mov_p(DisasContext *s, int rd, int rn)
237 if (sve_access_check(s)) {
238 unsigned psz = pred_gvec_reg_size(s);
239 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
240 pred_full_reg_offset(s, rn), psz, psz);
242 return true;
245 /* Set the cpu flags as per a return from an SVE helper. */
246 static void do_pred_flags(TCGv_i32 t)
248 tcg_gen_mov_i32(cpu_NF, t);
249 tcg_gen_andi_i32(cpu_ZF, t, 2);
250 tcg_gen_andi_i32(cpu_CF, t, 1);
251 tcg_gen_movi_i32(cpu_VF, 0);
254 /* Subroutines computing the ARM PredTest psuedofunction. */
255 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
257 TCGv_i32 t = tcg_temp_new_i32();
259 gen_helper_sve_predtest1(t, d, g);
260 do_pred_flags(t);
261 tcg_temp_free_i32(t);
264 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
266 TCGv_ptr dptr = tcg_temp_new_ptr();
267 TCGv_ptr gptr = tcg_temp_new_ptr();
268 TCGv_i32 t;
270 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
271 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
272 t = tcg_const_i32(words);
274 gen_helper_sve_predtest(t, dptr, gptr, t);
275 tcg_temp_free_ptr(dptr);
276 tcg_temp_free_ptr(gptr);
278 do_pred_flags(t);
279 tcg_temp_free_i32(t);
282 /* For each element size, the bits within a predicate word that are active. */
283 const uint64_t pred_esz_masks[4] = {
284 0xffffffffffffffffull, 0x5555555555555555ull,
285 0x1111111111111111ull, 0x0101010101010101ull
289 *** SVE Logical - Unpredicated Group
292 static bool do_zzz_fn(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *gvec_fn)
294 if (sve_access_check(s)) {
295 gen_gvec_fn_zzz(s, gvec_fn, a->esz, a->rd, a->rn, a->rm);
297 return true;
300 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
302 return do_zzz_fn(s, a, tcg_gen_gvec_and);
305 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
307 return do_zzz_fn(s, a, tcg_gen_gvec_or);
310 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
312 return do_zzz_fn(s, a, tcg_gen_gvec_xor);
315 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
317 return do_zzz_fn(s, a, tcg_gen_gvec_andc);
321 *** SVE Integer Arithmetic - Unpredicated Group
324 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
326 return do_zzz_fn(s, a, tcg_gen_gvec_add);
329 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
331 return do_zzz_fn(s, a, tcg_gen_gvec_sub);
334 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
336 return do_zzz_fn(s, a, tcg_gen_gvec_ssadd);
339 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
341 return do_zzz_fn(s, a, tcg_gen_gvec_sssub);
344 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
346 return do_zzz_fn(s, a, tcg_gen_gvec_usadd);
349 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
351 return do_zzz_fn(s, a, tcg_gen_gvec_ussub);
355 *** SVE Integer Arithmetic - Binary Predicated Group
358 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
360 if (fn == NULL) {
361 return false;
363 if (sve_access_check(s)) {
364 gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0);
366 return true;
369 /* Select active elememnts from Zn and inactive elements from Zm,
370 * storing the result in Zd.
372 static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
374 static gen_helper_gvec_4 * const fns[4] = {
375 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
376 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
378 gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
381 #define DO_ZPZZ(NAME, name) \
382 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a) \
384 static gen_helper_gvec_4 * const fns[4] = { \
385 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
386 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
387 }; \
388 return do_zpzz_ool(s, a, fns[a->esz]); \
391 DO_ZPZZ(AND, and)
392 DO_ZPZZ(EOR, eor)
393 DO_ZPZZ(ORR, orr)
394 DO_ZPZZ(BIC, bic)
396 DO_ZPZZ(ADD, add)
397 DO_ZPZZ(SUB, sub)
399 DO_ZPZZ(SMAX, smax)
400 DO_ZPZZ(UMAX, umax)
401 DO_ZPZZ(SMIN, smin)
402 DO_ZPZZ(UMIN, umin)
403 DO_ZPZZ(SABD, sabd)
404 DO_ZPZZ(UABD, uabd)
406 DO_ZPZZ(MUL, mul)
407 DO_ZPZZ(SMULH, smulh)
408 DO_ZPZZ(UMULH, umulh)
410 DO_ZPZZ(ASR, asr)
411 DO_ZPZZ(LSR, lsr)
412 DO_ZPZZ(LSL, lsl)
414 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
416 static gen_helper_gvec_4 * const fns[4] = {
417 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
419 return do_zpzz_ool(s, a, fns[a->esz]);
422 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
424 static gen_helper_gvec_4 * const fns[4] = {
425 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
427 return do_zpzz_ool(s, a, fns[a->esz]);
430 static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
432 if (sve_access_check(s)) {
433 do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
435 return true;
438 #undef DO_ZPZZ
441 *** SVE Integer Arithmetic - Unary Predicated Group
444 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
446 if (fn == NULL) {
447 return false;
449 if (sve_access_check(s)) {
450 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, 0);
452 return true;
455 #define DO_ZPZ(NAME, name) \
456 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
458 static gen_helper_gvec_3 * const fns[4] = { \
459 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
460 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
461 }; \
462 return do_zpz_ool(s, a, fns[a->esz]); \
465 DO_ZPZ(CLS, cls)
466 DO_ZPZ(CLZ, clz)
467 DO_ZPZ(CNT_zpz, cnt_zpz)
468 DO_ZPZ(CNOT, cnot)
469 DO_ZPZ(NOT_zpz, not_zpz)
470 DO_ZPZ(ABS, abs)
471 DO_ZPZ(NEG, neg)
473 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
475 static gen_helper_gvec_3 * const fns[4] = {
476 NULL,
477 gen_helper_sve_fabs_h,
478 gen_helper_sve_fabs_s,
479 gen_helper_sve_fabs_d
481 return do_zpz_ool(s, a, fns[a->esz]);
484 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
486 static gen_helper_gvec_3 * const fns[4] = {
487 NULL,
488 gen_helper_sve_fneg_h,
489 gen_helper_sve_fneg_s,
490 gen_helper_sve_fneg_d
492 return do_zpz_ool(s, a, fns[a->esz]);
495 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
497 static gen_helper_gvec_3 * const fns[4] = {
498 NULL,
499 gen_helper_sve_sxtb_h,
500 gen_helper_sve_sxtb_s,
501 gen_helper_sve_sxtb_d
503 return do_zpz_ool(s, a, fns[a->esz]);
506 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
508 static gen_helper_gvec_3 * const fns[4] = {
509 NULL,
510 gen_helper_sve_uxtb_h,
511 gen_helper_sve_uxtb_s,
512 gen_helper_sve_uxtb_d
514 return do_zpz_ool(s, a, fns[a->esz]);
517 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
519 static gen_helper_gvec_3 * const fns[4] = {
520 NULL, NULL,
521 gen_helper_sve_sxth_s,
522 gen_helper_sve_sxth_d
524 return do_zpz_ool(s, a, fns[a->esz]);
527 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
529 static gen_helper_gvec_3 * const fns[4] = {
530 NULL, NULL,
531 gen_helper_sve_uxth_s,
532 gen_helper_sve_uxth_d
534 return do_zpz_ool(s, a, fns[a->esz]);
537 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
539 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
542 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
544 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
547 #undef DO_ZPZ
550 *** SVE Integer Reduction Group
553 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
554 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
555 gen_helper_gvec_reduc *fn)
557 unsigned vsz = vec_full_reg_size(s);
558 TCGv_ptr t_zn, t_pg;
559 TCGv_i32 desc;
560 TCGv_i64 temp;
562 if (fn == NULL) {
563 return false;
565 if (!sve_access_check(s)) {
566 return true;
569 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
570 temp = tcg_temp_new_i64();
571 t_zn = tcg_temp_new_ptr();
572 t_pg = tcg_temp_new_ptr();
574 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
575 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
576 fn(temp, t_zn, t_pg, desc);
577 tcg_temp_free_ptr(t_zn);
578 tcg_temp_free_ptr(t_pg);
579 tcg_temp_free_i32(desc);
581 write_fp_dreg(s, a->rd, temp);
582 tcg_temp_free_i64(temp);
583 return true;
586 #define DO_VPZ(NAME, name) \
587 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
589 static gen_helper_gvec_reduc * const fns[4] = { \
590 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
591 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
592 }; \
593 return do_vpz_ool(s, a, fns[a->esz]); \
596 DO_VPZ(ORV, orv)
597 DO_VPZ(ANDV, andv)
598 DO_VPZ(EORV, eorv)
600 DO_VPZ(UADDV, uaddv)
601 DO_VPZ(SMAXV, smaxv)
602 DO_VPZ(UMAXV, umaxv)
603 DO_VPZ(SMINV, sminv)
604 DO_VPZ(UMINV, uminv)
606 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
608 static gen_helper_gvec_reduc * const fns[4] = {
609 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
610 gen_helper_sve_saddv_s, NULL
612 return do_vpz_ool(s, a, fns[a->esz]);
615 #undef DO_VPZ
618 *** SVE Shift by Immediate - Predicated Group
622 * Copy Zn into Zd, storing zeros into inactive elements.
623 * If invert, store zeros into the active elements.
625 static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
626 int esz, bool invert)
628 static gen_helper_gvec_3 * const fns[4] = {
629 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
630 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
633 if (sve_access_check(s)) {
634 gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
636 return true;
639 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
640 gen_helper_gvec_3 *fn)
642 if (sve_access_check(s)) {
643 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
645 return true;
648 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
650 static gen_helper_gvec_3 * const fns[4] = {
651 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
652 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
654 if (a->esz < 0) {
655 /* Invalid tsz encoding -- see tszimm_esz. */
656 return false;
658 /* Shift by element size is architecturally valid. For
659 arithmetic right-shift, it's the same as by one less. */
660 a->imm = MIN(a->imm, (8 << a->esz) - 1);
661 return do_zpzi_ool(s, a, fns[a->esz]);
664 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
666 static gen_helper_gvec_3 * const fns[4] = {
667 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
668 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
670 if (a->esz < 0) {
671 return false;
673 /* Shift by element size is architecturally valid.
674 For logical shifts, it is a zeroing operation. */
675 if (a->imm >= (8 << a->esz)) {
676 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
677 } else {
678 return do_zpzi_ool(s, a, fns[a->esz]);
682 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
684 static gen_helper_gvec_3 * const fns[4] = {
685 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
686 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
688 if (a->esz < 0) {
689 return false;
691 /* Shift by element size is architecturally valid.
692 For logical shifts, it is a zeroing operation. */
693 if (a->imm >= (8 << a->esz)) {
694 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
695 } else {
696 return do_zpzi_ool(s, a, fns[a->esz]);
700 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
702 static gen_helper_gvec_3 * const fns[4] = {
703 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
704 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
706 if (a->esz < 0) {
707 return false;
709 /* Shift by element size is architecturally valid. For arithmetic
710 right shift for division, it is a zeroing operation. */
711 if (a->imm >= (8 << a->esz)) {
712 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
713 } else {
714 return do_zpzi_ool(s, a, fns[a->esz]);
719 *** SVE Bitwise Shift - Predicated Group
722 #define DO_ZPZW(NAME, name) \
723 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a) \
725 static gen_helper_gvec_4 * const fns[3] = { \
726 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
727 gen_helper_sve_##name##_zpzw_s, \
728 }; \
729 if (a->esz < 0 || a->esz >= 3) { \
730 return false; \
732 return do_zpzz_ool(s, a, fns[a->esz]); \
735 DO_ZPZW(ASR, asr)
736 DO_ZPZW(LSR, lsr)
737 DO_ZPZW(LSL, lsl)
739 #undef DO_ZPZW
742 *** SVE Bitwise Shift - Unpredicated Group
745 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
746 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
747 int64_t, uint32_t, uint32_t))
749 if (a->esz < 0) {
750 /* Invalid tsz encoding -- see tszimm_esz. */
751 return false;
753 if (sve_access_check(s)) {
754 unsigned vsz = vec_full_reg_size(s);
755 /* Shift by element size is architecturally valid. For
756 arithmetic right-shift, it's the same as by one less.
757 Otherwise it is a zeroing operation. */
758 if (a->imm >= 8 << a->esz) {
759 if (asr) {
760 a->imm = (8 << a->esz) - 1;
761 } else {
762 do_dupi_z(s, a->rd, 0);
763 return true;
766 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
767 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
769 return true;
772 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
774 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
777 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
779 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
782 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
784 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
787 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
789 if (fn == NULL) {
790 return false;
792 if (sve_access_check(s)) {
793 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
795 return true;
798 #define DO_ZZW(NAME, name) \
799 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a) \
801 static gen_helper_gvec_3 * const fns[4] = { \
802 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
803 gen_helper_sve_##name##_zzw_s, NULL \
804 }; \
805 return do_zzw_ool(s, a, fns[a->esz]); \
808 DO_ZZW(ASR, asr)
809 DO_ZZW(LSR, lsr)
810 DO_ZZW(LSL, lsl)
812 #undef DO_ZZW
815 *** SVE Integer Multiply-Add Group
818 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
819 gen_helper_gvec_5 *fn)
821 if (sve_access_check(s)) {
822 unsigned vsz = vec_full_reg_size(s);
823 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
824 vec_full_reg_offset(s, a->ra),
825 vec_full_reg_offset(s, a->rn),
826 vec_full_reg_offset(s, a->rm),
827 pred_full_reg_offset(s, a->pg),
828 vsz, vsz, 0, fn);
830 return true;
833 #define DO_ZPZZZ(NAME, name) \
834 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
836 static gen_helper_gvec_5 * const fns[4] = { \
837 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
838 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
839 }; \
840 return do_zpzzz_ool(s, a, fns[a->esz]); \
843 DO_ZPZZZ(MLA, mla)
844 DO_ZPZZZ(MLS, mls)
846 #undef DO_ZPZZZ
849 *** SVE Index Generation Group
852 static void do_index(DisasContext *s, int esz, int rd,
853 TCGv_i64 start, TCGv_i64 incr)
855 unsigned vsz = vec_full_reg_size(s);
856 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
857 TCGv_ptr t_zd = tcg_temp_new_ptr();
859 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
860 if (esz == 3) {
861 gen_helper_sve_index_d(t_zd, start, incr, desc);
862 } else {
863 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
864 static index_fn * const fns[3] = {
865 gen_helper_sve_index_b,
866 gen_helper_sve_index_h,
867 gen_helper_sve_index_s,
869 TCGv_i32 s32 = tcg_temp_new_i32();
870 TCGv_i32 i32 = tcg_temp_new_i32();
872 tcg_gen_extrl_i64_i32(s32, start);
873 tcg_gen_extrl_i64_i32(i32, incr);
874 fns[esz](t_zd, s32, i32, desc);
876 tcg_temp_free_i32(s32);
877 tcg_temp_free_i32(i32);
879 tcg_temp_free_ptr(t_zd);
880 tcg_temp_free_i32(desc);
883 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
885 if (sve_access_check(s)) {
886 TCGv_i64 start = tcg_const_i64(a->imm1);
887 TCGv_i64 incr = tcg_const_i64(a->imm2);
888 do_index(s, a->esz, a->rd, start, incr);
889 tcg_temp_free_i64(start);
890 tcg_temp_free_i64(incr);
892 return true;
895 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
897 if (sve_access_check(s)) {
898 TCGv_i64 start = tcg_const_i64(a->imm);
899 TCGv_i64 incr = cpu_reg(s, a->rm);
900 do_index(s, a->esz, a->rd, start, incr);
901 tcg_temp_free_i64(start);
903 return true;
906 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
908 if (sve_access_check(s)) {
909 TCGv_i64 start = cpu_reg(s, a->rn);
910 TCGv_i64 incr = tcg_const_i64(a->imm);
911 do_index(s, a->esz, a->rd, start, incr);
912 tcg_temp_free_i64(incr);
914 return true;
917 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
919 if (sve_access_check(s)) {
920 TCGv_i64 start = cpu_reg(s, a->rn);
921 TCGv_i64 incr = cpu_reg(s, a->rm);
922 do_index(s, a->esz, a->rd, start, incr);
924 return true;
928 *** SVE Stack Allocation Group
931 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
933 if (sve_access_check(s)) {
934 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
935 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
936 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
938 return true;
941 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
943 if (sve_access_check(s)) {
944 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
945 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
946 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
948 return true;
951 static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
953 if (sve_access_check(s)) {
954 TCGv_i64 reg = cpu_reg(s, a->rd);
955 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
957 return true;
961 *** SVE Compute Vector Address Group
964 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
966 if (sve_access_check(s)) {
967 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
969 return true;
972 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
974 return do_adr(s, a, gen_helper_sve_adr_p32);
977 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
979 return do_adr(s, a, gen_helper_sve_adr_p64);
982 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
984 return do_adr(s, a, gen_helper_sve_adr_s32);
987 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
989 return do_adr(s, a, gen_helper_sve_adr_u32);
993 *** SVE Integer Misc - Unpredicated Group
996 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
998 static gen_helper_gvec_2 * const fns[4] = {
999 NULL,
1000 gen_helper_sve_fexpa_h,
1001 gen_helper_sve_fexpa_s,
1002 gen_helper_sve_fexpa_d,
1004 if (a->esz == 0) {
1005 return false;
1007 if (sve_access_check(s)) {
1008 gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
1010 return true;
1013 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
1015 static gen_helper_gvec_3 * const fns[4] = {
1016 NULL,
1017 gen_helper_sve_ftssel_h,
1018 gen_helper_sve_ftssel_s,
1019 gen_helper_sve_ftssel_d,
1021 if (a->esz == 0) {
1022 return false;
1024 if (sve_access_check(s)) {
1025 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
1027 return true;
1031 *** SVE Predicate Logical Operations Group
1034 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1035 const GVecGen4 *gvec_op)
1037 if (!sve_access_check(s)) {
1038 return true;
1041 unsigned psz = pred_gvec_reg_size(s);
1042 int dofs = pred_full_reg_offset(s, a->rd);
1043 int nofs = pred_full_reg_offset(s, a->rn);
1044 int mofs = pred_full_reg_offset(s, a->rm);
1045 int gofs = pred_full_reg_offset(s, a->pg);
1047 if (!a->s) {
1048 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1049 return true;
1052 if (psz == 8) {
1053 /* Do the operation and the flags generation in temps. */
1054 TCGv_i64 pd = tcg_temp_new_i64();
1055 TCGv_i64 pn = tcg_temp_new_i64();
1056 TCGv_i64 pm = tcg_temp_new_i64();
1057 TCGv_i64 pg = tcg_temp_new_i64();
1059 tcg_gen_ld_i64(pn, cpu_env, nofs);
1060 tcg_gen_ld_i64(pm, cpu_env, mofs);
1061 tcg_gen_ld_i64(pg, cpu_env, gofs);
1063 gvec_op->fni8(pd, pn, pm, pg);
1064 tcg_gen_st_i64(pd, cpu_env, dofs);
1066 do_predtest1(pd, pg);
1068 tcg_temp_free_i64(pd);
1069 tcg_temp_free_i64(pn);
1070 tcg_temp_free_i64(pm);
1071 tcg_temp_free_i64(pg);
1072 } else {
1073 /* The operation and flags generation is large. The computation
1074 * of the flags depends on the original contents of the guarding
1075 * predicate. If the destination overwrites the guarding predicate,
1076 * then the easiest way to get this right is to save a copy.
1078 int tofs = gofs;
1079 if (a->rd == a->pg) {
1080 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1081 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1084 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1085 do_predtest(s, dofs, tofs, psz / 8);
1087 return true;
1090 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1092 tcg_gen_and_i64(pd, pn, pm);
1093 tcg_gen_and_i64(pd, pd, pg);
1096 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1097 TCGv_vec pm, TCGv_vec pg)
1099 tcg_gen_and_vec(vece, pd, pn, pm);
1100 tcg_gen_and_vec(vece, pd, pd, pg);
1103 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
1105 static const GVecGen4 op = {
1106 .fni8 = gen_and_pg_i64,
1107 .fniv = gen_and_pg_vec,
1108 .fno = gen_helper_sve_and_pppp,
1109 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1112 if (!a->s) {
1113 if (!sve_access_check(s)) {
1114 return true;
1116 if (a->rn == a->rm) {
1117 if (a->pg == a->rn) {
1118 do_mov_p(s, a->rd, a->rn);
1119 } else {
1120 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
1122 return true;
1123 } else if (a->pg == a->rn || a->pg == a->rm) {
1124 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
1125 return true;
1128 return do_pppp_flags(s, a, &op);
1131 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1133 tcg_gen_andc_i64(pd, pn, pm);
1134 tcg_gen_and_i64(pd, pd, pg);
1137 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1138 TCGv_vec pm, TCGv_vec pg)
1140 tcg_gen_andc_vec(vece, pd, pn, pm);
1141 tcg_gen_and_vec(vece, pd, pd, pg);
1144 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
1146 static const GVecGen4 op = {
1147 .fni8 = gen_bic_pg_i64,
1148 .fniv = gen_bic_pg_vec,
1149 .fno = gen_helper_sve_bic_pppp,
1150 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1153 if (!a->s && a->pg == a->rn) {
1154 if (sve_access_check(s)) {
1155 gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
1157 return true;
1159 return do_pppp_flags(s, a, &op);
1162 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1164 tcg_gen_xor_i64(pd, pn, pm);
1165 tcg_gen_and_i64(pd, pd, pg);
1168 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1169 TCGv_vec pm, TCGv_vec pg)
1171 tcg_gen_xor_vec(vece, pd, pn, pm);
1172 tcg_gen_and_vec(vece, pd, pd, pg);
1175 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
1177 static const GVecGen4 op = {
1178 .fni8 = gen_eor_pg_i64,
1179 .fniv = gen_eor_pg_vec,
1180 .fno = gen_helper_sve_eor_pppp,
1181 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1183 return do_pppp_flags(s, a, &op);
1186 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
1188 if (a->s) {
1189 return false;
1191 if (sve_access_check(s)) {
1192 unsigned psz = pred_gvec_reg_size(s);
1193 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1194 pred_full_reg_offset(s, a->pg),
1195 pred_full_reg_offset(s, a->rn),
1196 pred_full_reg_offset(s, a->rm), psz, psz);
1198 return true;
1201 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1203 tcg_gen_or_i64(pd, pn, pm);
1204 tcg_gen_and_i64(pd, pd, pg);
1207 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1208 TCGv_vec pm, TCGv_vec pg)
1210 tcg_gen_or_vec(vece, pd, pn, pm);
1211 tcg_gen_and_vec(vece, pd, pd, pg);
1214 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
1216 static const GVecGen4 op = {
1217 .fni8 = gen_orr_pg_i64,
1218 .fniv = gen_orr_pg_vec,
1219 .fno = gen_helper_sve_orr_pppp,
1220 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1223 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
1224 return do_mov_p(s, a->rd, a->rn);
1226 return do_pppp_flags(s, a, &op);
1229 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1231 tcg_gen_orc_i64(pd, pn, pm);
1232 tcg_gen_and_i64(pd, pd, pg);
1235 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1236 TCGv_vec pm, TCGv_vec pg)
1238 tcg_gen_orc_vec(vece, pd, pn, pm);
1239 tcg_gen_and_vec(vece, pd, pd, pg);
1242 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
1244 static const GVecGen4 op = {
1245 .fni8 = gen_orn_pg_i64,
1246 .fniv = gen_orn_pg_vec,
1247 .fno = gen_helper_sve_orn_pppp,
1248 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1250 return do_pppp_flags(s, a, &op);
1253 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1255 tcg_gen_or_i64(pd, pn, pm);
1256 tcg_gen_andc_i64(pd, pg, pd);
1259 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1260 TCGv_vec pm, TCGv_vec pg)
1262 tcg_gen_or_vec(vece, pd, pn, pm);
1263 tcg_gen_andc_vec(vece, pd, pg, pd);
1266 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
1268 static const GVecGen4 op = {
1269 .fni8 = gen_nor_pg_i64,
1270 .fniv = gen_nor_pg_vec,
1271 .fno = gen_helper_sve_nor_pppp,
1272 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1274 return do_pppp_flags(s, a, &op);
1277 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1279 tcg_gen_and_i64(pd, pn, pm);
1280 tcg_gen_andc_i64(pd, pg, pd);
1283 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1284 TCGv_vec pm, TCGv_vec pg)
1286 tcg_gen_and_vec(vece, pd, pn, pm);
1287 tcg_gen_andc_vec(vece, pd, pg, pd);
1290 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
1292 static const GVecGen4 op = {
1293 .fni8 = gen_nand_pg_i64,
1294 .fniv = gen_nand_pg_vec,
1295 .fno = gen_helper_sve_nand_pppp,
1296 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1298 return do_pppp_flags(s, a, &op);
1302 *** SVE Predicate Misc Group
1305 static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
1307 if (sve_access_check(s)) {
1308 int nofs = pred_full_reg_offset(s, a->rn);
1309 int gofs = pred_full_reg_offset(s, a->pg);
1310 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1312 if (words == 1) {
1313 TCGv_i64 pn = tcg_temp_new_i64();
1314 TCGv_i64 pg = tcg_temp_new_i64();
1316 tcg_gen_ld_i64(pn, cpu_env, nofs);
1317 tcg_gen_ld_i64(pg, cpu_env, gofs);
1318 do_predtest1(pn, pg);
1320 tcg_temp_free_i64(pn);
1321 tcg_temp_free_i64(pg);
1322 } else {
1323 do_predtest(s, nofs, gofs, words);
1326 return true;
1329 /* See the ARM pseudocode DecodePredCount. */
1330 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1332 unsigned elements = fullsz >> esz;
1333 unsigned bound;
1335 switch (pattern) {
1336 case 0x0: /* POW2 */
1337 return pow2floor(elements);
1338 case 0x1: /* VL1 */
1339 case 0x2: /* VL2 */
1340 case 0x3: /* VL3 */
1341 case 0x4: /* VL4 */
1342 case 0x5: /* VL5 */
1343 case 0x6: /* VL6 */
1344 case 0x7: /* VL7 */
1345 case 0x8: /* VL8 */
1346 bound = pattern;
1347 break;
1348 case 0x9: /* VL16 */
1349 case 0xa: /* VL32 */
1350 case 0xb: /* VL64 */
1351 case 0xc: /* VL128 */
1352 case 0xd: /* VL256 */
1353 bound = 16 << (pattern - 9);
1354 break;
1355 case 0x1d: /* MUL4 */
1356 return elements - elements % 4;
1357 case 0x1e: /* MUL3 */
1358 return elements - elements % 3;
1359 case 0x1f: /* ALL */
1360 return elements;
1361 default: /* #uimm5 */
1362 return 0;
1364 return elements >= bound ? bound : 0;
1367 /* This handles all of the predicate initialization instructions,
1368 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1369 * so that decode_pred_count returns 0. For SETFFR, we will have
1370 * set RD == 16 == FFR.
1372 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1374 if (!sve_access_check(s)) {
1375 return true;
1378 unsigned fullsz = vec_full_reg_size(s);
1379 unsigned ofs = pred_full_reg_offset(s, rd);
1380 unsigned numelem, setsz, i;
1381 uint64_t word, lastword;
1382 TCGv_i64 t;
1384 numelem = decode_pred_count(fullsz, pat, esz);
1386 /* Determine what we must store into each bit, and how many. */
1387 if (numelem == 0) {
1388 lastword = word = 0;
1389 setsz = fullsz;
1390 } else {
1391 setsz = numelem << esz;
1392 lastword = word = pred_esz_masks[esz];
1393 if (setsz % 64) {
1394 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
1398 t = tcg_temp_new_i64();
1399 if (fullsz <= 64) {
1400 tcg_gen_movi_i64(t, lastword);
1401 tcg_gen_st_i64(t, cpu_env, ofs);
1402 goto done;
1405 if (word == lastword) {
1406 unsigned maxsz = size_for_gvec(fullsz / 8);
1407 unsigned oprsz = size_for_gvec(setsz / 8);
1409 if (oprsz * 8 == setsz) {
1410 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
1411 goto done;
1415 setsz /= 8;
1416 fullsz /= 8;
1418 tcg_gen_movi_i64(t, word);
1419 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
1420 tcg_gen_st_i64(t, cpu_env, ofs + i);
1422 if (lastword != word) {
1423 tcg_gen_movi_i64(t, lastword);
1424 tcg_gen_st_i64(t, cpu_env, ofs + i);
1425 i += 8;
1427 if (i < fullsz) {
1428 tcg_gen_movi_i64(t, 0);
1429 for (; i < fullsz; i += 8) {
1430 tcg_gen_st_i64(t, cpu_env, ofs + i);
1434 done:
1435 tcg_temp_free_i64(t);
1437 /* PTRUES */
1438 if (setflag) {
1439 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1440 tcg_gen_movi_i32(cpu_CF, word == 0);
1441 tcg_gen_movi_i32(cpu_VF, 0);
1442 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1444 return true;
1447 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
1449 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1452 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
1454 /* Note pat == 31 is #all, to set all elements. */
1455 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1458 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
1460 /* Note pat == 32 is #unimp, to set no elements. */
1461 return do_predset(s, 0, a->rd, 32, false);
1464 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
1466 /* The path through do_pppp_flags is complicated enough to want to avoid
1467 * duplication. Frob the arguments into the form of a predicated AND.
1469 arg_rprr_s alt_a = {
1470 .rd = a->rd, .pg = a->pg, .s = a->s,
1471 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1473 return trans_AND_pppp(s, &alt_a);
1476 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
1478 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1481 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
1483 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1486 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1487 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1488 TCGv_ptr, TCGv_i32))
1490 if (!sve_access_check(s)) {
1491 return true;
1494 TCGv_ptr t_pd = tcg_temp_new_ptr();
1495 TCGv_ptr t_pg = tcg_temp_new_ptr();
1496 TCGv_i32 t;
1497 unsigned desc = 0;
1499 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1500 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
1502 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1503 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1504 t = tcg_const_i32(desc);
1506 gen_fn(t, t_pd, t_pg, t);
1507 tcg_temp_free_ptr(t_pd);
1508 tcg_temp_free_ptr(t_pg);
1510 do_pred_flags(t);
1511 tcg_temp_free_i32(t);
1512 return true;
1515 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
1517 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1520 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
1522 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1526 *** SVE Element Count Group
1529 /* Perform an inline saturating addition of a 32-bit value within
1530 * a 64-bit register. The second operand is known to be positive,
1531 * which halves the comparisions we must perform to bound the result.
1533 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1535 int64_t ibound;
1536 TCGv_i64 bound;
1537 TCGCond cond;
1539 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1540 if (u) {
1541 tcg_gen_ext32u_i64(reg, reg);
1542 } else {
1543 tcg_gen_ext32s_i64(reg, reg);
1545 if (d) {
1546 tcg_gen_sub_i64(reg, reg, val);
1547 ibound = (u ? 0 : INT32_MIN);
1548 cond = TCG_COND_LT;
1549 } else {
1550 tcg_gen_add_i64(reg, reg, val);
1551 ibound = (u ? UINT32_MAX : INT32_MAX);
1552 cond = TCG_COND_GT;
1554 bound = tcg_const_i64(ibound);
1555 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1556 tcg_temp_free_i64(bound);
1559 /* Similarly with 64-bit values. */
1560 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1562 TCGv_i64 t0 = tcg_temp_new_i64();
1563 TCGv_i64 t1 = tcg_temp_new_i64();
1564 TCGv_i64 t2;
1566 if (u) {
1567 if (d) {
1568 tcg_gen_sub_i64(t0, reg, val);
1569 tcg_gen_movi_i64(t1, 0);
1570 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1571 } else {
1572 tcg_gen_add_i64(t0, reg, val);
1573 tcg_gen_movi_i64(t1, -1);
1574 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1576 } else {
1577 if (d) {
1578 /* Detect signed overflow for subtraction. */
1579 tcg_gen_xor_i64(t0, reg, val);
1580 tcg_gen_sub_i64(t1, reg, val);
1581 tcg_gen_xor_i64(reg, reg, t1);
1582 tcg_gen_and_i64(t0, t0, reg);
1584 /* Bound the result. */
1585 tcg_gen_movi_i64(reg, INT64_MIN);
1586 t2 = tcg_const_i64(0);
1587 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1588 } else {
1589 /* Detect signed overflow for addition. */
1590 tcg_gen_xor_i64(t0, reg, val);
1591 tcg_gen_add_i64(reg, reg, val);
1592 tcg_gen_xor_i64(t1, reg, val);
1593 tcg_gen_andc_i64(t0, t1, t0);
1595 /* Bound the result. */
1596 tcg_gen_movi_i64(t1, INT64_MAX);
1597 t2 = tcg_const_i64(0);
1598 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1600 tcg_temp_free_i64(t2);
1602 tcg_temp_free_i64(t0);
1603 tcg_temp_free_i64(t1);
1606 /* Similarly with a vector and a scalar operand. */
1607 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1608 TCGv_i64 val, bool u, bool d)
1610 unsigned vsz = vec_full_reg_size(s);
1611 TCGv_ptr dptr, nptr;
1612 TCGv_i32 t32, desc;
1613 TCGv_i64 t64;
1615 dptr = tcg_temp_new_ptr();
1616 nptr = tcg_temp_new_ptr();
1617 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1618 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1619 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1621 switch (esz) {
1622 case MO_8:
1623 t32 = tcg_temp_new_i32();
1624 tcg_gen_extrl_i64_i32(t32, val);
1625 if (d) {
1626 tcg_gen_neg_i32(t32, t32);
1628 if (u) {
1629 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1630 } else {
1631 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1633 tcg_temp_free_i32(t32);
1634 break;
1636 case MO_16:
1637 t32 = tcg_temp_new_i32();
1638 tcg_gen_extrl_i64_i32(t32, val);
1639 if (d) {
1640 tcg_gen_neg_i32(t32, t32);
1642 if (u) {
1643 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1644 } else {
1645 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1647 tcg_temp_free_i32(t32);
1648 break;
1650 case MO_32:
1651 t64 = tcg_temp_new_i64();
1652 if (d) {
1653 tcg_gen_neg_i64(t64, val);
1654 } else {
1655 tcg_gen_mov_i64(t64, val);
1657 if (u) {
1658 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1659 } else {
1660 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1662 tcg_temp_free_i64(t64);
1663 break;
1665 case MO_64:
1666 if (u) {
1667 if (d) {
1668 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1669 } else {
1670 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1672 } else if (d) {
1673 t64 = tcg_temp_new_i64();
1674 tcg_gen_neg_i64(t64, val);
1675 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1676 tcg_temp_free_i64(t64);
1677 } else {
1678 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1680 break;
1682 default:
1683 g_assert_not_reached();
1686 tcg_temp_free_ptr(dptr);
1687 tcg_temp_free_ptr(nptr);
1688 tcg_temp_free_i32(desc);
1691 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
1693 if (sve_access_check(s)) {
1694 unsigned fullsz = vec_full_reg_size(s);
1695 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1696 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1698 return true;
1701 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
1703 if (sve_access_check(s)) {
1704 unsigned fullsz = vec_full_reg_size(s);
1705 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1706 int inc = numelem * a->imm * (a->d ? -1 : 1);
1707 TCGv_i64 reg = cpu_reg(s, a->rd);
1709 tcg_gen_addi_i64(reg, reg, inc);
1711 return true;
1714 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
1716 if (!sve_access_check(s)) {
1717 return true;
1720 unsigned fullsz = vec_full_reg_size(s);
1721 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1722 int inc = numelem * a->imm;
1723 TCGv_i64 reg = cpu_reg(s, a->rd);
1725 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1726 if (inc == 0) {
1727 if (a->u) {
1728 tcg_gen_ext32u_i64(reg, reg);
1729 } else {
1730 tcg_gen_ext32s_i64(reg, reg);
1732 } else {
1733 TCGv_i64 t = tcg_const_i64(inc);
1734 do_sat_addsub_32(reg, t, a->u, a->d);
1735 tcg_temp_free_i64(t);
1737 return true;
1740 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
1742 if (!sve_access_check(s)) {
1743 return true;
1746 unsigned fullsz = vec_full_reg_size(s);
1747 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1748 int inc = numelem * a->imm;
1749 TCGv_i64 reg = cpu_reg(s, a->rd);
1751 if (inc != 0) {
1752 TCGv_i64 t = tcg_const_i64(inc);
1753 do_sat_addsub_64(reg, t, a->u, a->d);
1754 tcg_temp_free_i64(t);
1756 return true;
1759 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1761 if (a->esz == 0) {
1762 return false;
1765 unsigned fullsz = vec_full_reg_size(s);
1766 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1767 int inc = numelem * a->imm;
1769 if (inc != 0) {
1770 if (sve_access_check(s)) {
1771 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1772 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1773 vec_full_reg_offset(s, a->rn),
1774 t, fullsz, fullsz);
1775 tcg_temp_free_i64(t);
1777 } else {
1778 do_mov_z(s, a->rd, a->rn);
1780 return true;
1783 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1785 if (a->esz == 0) {
1786 return false;
1789 unsigned fullsz = vec_full_reg_size(s);
1790 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1791 int inc = numelem * a->imm;
1793 if (inc != 0) {
1794 if (sve_access_check(s)) {
1795 TCGv_i64 t = tcg_const_i64(inc);
1796 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1797 tcg_temp_free_i64(t);
1799 } else {
1800 do_mov_z(s, a->rd, a->rn);
1802 return true;
1806 *** SVE Bitwise Immediate Group
1809 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1811 uint64_t imm;
1812 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1813 extract32(a->dbm, 0, 6),
1814 extract32(a->dbm, 6, 6))) {
1815 return false;
1817 if (sve_access_check(s)) {
1818 unsigned vsz = vec_full_reg_size(s);
1819 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1820 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1822 return true;
1825 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
1827 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1830 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
1832 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1835 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
1837 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1840 static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
1842 uint64_t imm;
1843 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1844 extract32(a->dbm, 0, 6),
1845 extract32(a->dbm, 6, 6))) {
1846 return false;
1848 if (sve_access_check(s)) {
1849 do_dupi_z(s, a->rd, imm);
1851 return true;
1855 *** SVE Integer Wide Immediate - Predicated Group
1858 /* Implement all merging copies. This is used for CPY (immediate),
1859 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1861 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1862 TCGv_i64 val)
1864 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1865 static gen_cpy * const fns[4] = {
1866 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1867 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1869 unsigned vsz = vec_full_reg_size(s);
1870 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1871 TCGv_ptr t_zd = tcg_temp_new_ptr();
1872 TCGv_ptr t_zn = tcg_temp_new_ptr();
1873 TCGv_ptr t_pg = tcg_temp_new_ptr();
1875 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1876 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1877 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1879 fns[esz](t_zd, t_zn, t_pg, val, desc);
1881 tcg_temp_free_ptr(t_zd);
1882 tcg_temp_free_ptr(t_zn);
1883 tcg_temp_free_ptr(t_pg);
1884 tcg_temp_free_i32(desc);
1887 static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
1889 if (a->esz == 0) {
1890 return false;
1892 if (sve_access_check(s)) {
1893 /* Decode the VFP immediate. */
1894 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1895 TCGv_i64 t_imm = tcg_const_i64(imm);
1896 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1897 tcg_temp_free_i64(t_imm);
1899 return true;
1902 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
1904 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1905 return false;
1907 if (sve_access_check(s)) {
1908 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1909 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1910 tcg_temp_free_i64(t_imm);
1912 return true;
1915 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
1917 static gen_helper_gvec_2i * const fns[4] = {
1918 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1919 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1922 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1923 return false;
1925 if (sve_access_check(s)) {
1926 unsigned vsz = vec_full_reg_size(s);
1927 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1928 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1929 pred_full_reg_offset(s, a->pg),
1930 t_imm, vsz, vsz, 0, fns[a->esz]);
1931 tcg_temp_free_i64(t_imm);
1933 return true;
1937 *** SVE Permute Extract Group
1940 static bool trans_EXT(DisasContext *s, arg_EXT *a)
1942 if (!sve_access_check(s)) {
1943 return true;
1946 unsigned vsz = vec_full_reg_size(s);
1947 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1948 unsigned n_siz = vsz - n_ofs;
1949 unsigned d = vec_full_reg_offset(s, a->rd);
1950 unsigned n = vec_full_reg_offset(s, a->rn);
1951 unsigned m = vec_full_reg_offset(s, a->rm);
1953 /* Use host vector move insns if we have appropriate sizes
1954 * and no unfortunate overlap.
1956 if (m != d
1957 && n_ofs == size_for_gvec(n_ofs)
1958 && n_siz == size_for_gvec(n_siz)
1959 && (d != n || n_siz <= n_ofs)) {
1960 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1961 if (n_ofs != 0) {
1962 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1964 } else {
1965 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1967 return true;
1971 *** SVE Permute - Unpredicated Group
1974 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
1976 if (sve_access_check(s)) {
1977 unsigned vsz = vec_full_reg_size(s);
1978 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
1979 vsz, vsz, cpu_reg_sp(s, a->rn));
1981 return true;
1984 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
1986 if ((a->imm & 0x1f) == 0) {
1987 return false;
1989 if (sve_access_check(s)) {
1990 unsigned vsz = vec_full_reg_size(s);
1991 unsigned dofs = vec_full_reg_offset(s, a->rd);
1992 unsigned esz, index;
1994 esz = ctz32(a->imm);
1995 index = a->imm >> (esz + 1);
1997 if ((index << esz) < vsz) {
1998 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
1999 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2000 } else {
2002 * While dup_mem handles 128-bit elements, dup_imm does not.
2003 * Thankfully element size doesn't matter for splatting zero.
2005 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
2008 return true;
2011 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2013 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2014 static gen_insr * const fns[4] = {
2015 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2016 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2018 unsigned vsz = vec_full_reg_size(s);
2019 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2020 TCGv_ptr t_zd = tcg_temp_new_ptr();
2021 TCGv_ptr t_zn = tcg_temp_new_ptr();
2023 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2024 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2026 fns[a->esz](t_zd, t_zn, val, desc);
2028 tcg_temp_free_ptr(t_zd);
2029 tcg_temp_free_ptr(t_zn);
2030 tcg_temp_free_i32(desc);
2033 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
2035 if (sve_access_check(s)) {
2036 TCGv_i64 t = tcg_temp_new_i64();
2037 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2038 do_insr_i64(s, a, t);
2039 tcg_temp_free_i64(t);
2041 return true;
2044 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
2046 if (sve_access_check(s)) {
2047 do_insr_i64(s, a, cpu_reg(s, a->rm));
2049 return true;
2052 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
2054 static gen_helper_gvec_2 * const fns[4] = {
2055 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2056 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2059 if (sve_access_check(s)) {
2060 gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
2062 return true;
2065 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
2067 static gen_helper_gvec_3 * const fns[4] = {
2068 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2069 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2072 if (sve_access_check(s)) {
2073 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
2075 return true;
2078 static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
2080 static gen_helper_gvec_2 * const fns[4][2] = {
2081 { NULL, NULL },
2082 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2083 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2084 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2087 if (a->esz == 0) {
2088 return false;
2090 if (sve_access_check(s)) {
2091 unsigned vsz = vec_full_reg_size(s);
2092 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2093 vec_full_reg_offset(s, a->rn)
2094 + (a->h ? vsz / 2 : 0),
2095 vsz, vsz, 0, fns[a->esz][a->u]);
2097 return true;
2101 *** SVE Permute - Predicates Group
2104 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2105 gen_helper_gvec_3 *fn)
2107 if (!sve_access_check(s)) {
2108 return true;
2111 unsigned vsz = pred_full_reg_size(s);
2113 TCGv_ptr t_d = tcg_temp_new_ptr();
2114 TCGv_ptr t_n = tcg_temp_new_ptr();
2115 TCGv_ptr t_m = tcg_temp_new_ptr();
2116 TCGv_i32 t_desc;
2117 uint32_t desc = 0;
2119 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2120 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2121 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
2123 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2124 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2125 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2126 t_desc = tcg_const_i32(desc);
2128 fn(t_d, t_n, t_m, t_desc);
2130 tcg_temp_free_ptr(t_d);
2131 tcg_temp_free_ptr(t_n);
2132 tcg_temp_free_ptr(t_m);
2133 tcg_temp_free_i32(t_desc);
2134 return true;
2137 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2138 gen_helper_gvec_2 *fn)
2140 if (!sve_access_check(s)) {
2141 return true;
2144 unsigned vsz = pred_full_reg_size(s);
2145 TCGv_ptr t_d = tcg_temp_new_ptr();
2146 TCGv_ptr t_n = tcg_temp_new_ptr();
2147 TCGv_i32 t_desc;
2148 uint32_t desc = 0;
2150 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2151 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2153 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2154 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2155 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
2156 t_desc = tcg_const_i32(desc);
2158 fn(t_d, t_n, t_desc);
2160 tcg_temp_free_i32(t_desc);
2161 tcg_temp_free_ptr(t_d);
2162 tcg_temp_free_ptr(t_n);
2163 return true;
2166 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
2168 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2171 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
2173 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2176 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
2178 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2181 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
2183 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2186 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
2188 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2191 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
2193 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2196 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
2198 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2201 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
2203 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2206 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
2208 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2212 *** SVE Permute - Interleaving Group
2215 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2217 static gen_helper_gvec_3 * const fns[4] = {
2218 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2219 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2222 if (sve_access_check(s)) {
2223 unsigned vsz = vec_full_reg_size(s);
2224 unsigned high_ofs = high ? vsz / 2 : 0;
2225 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2226 vec_full_reg_offset(s, a->rn) + high_ofs,
2227 vec_full_reg_offset(s, a->rm) + high_ofs,
2228 vsz, vsz, 0, fns[a->esz]);
2230 return true;
2233 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2234 gen_helper_gvec_3 *fn)
2236 if (sve_access_check(s)) {
2237 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
2239 return true;
2242 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
2244 return do_zip(s, a, false);
2247 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
2249 return do_zip(s, a, true);
2252 static gen_helper_gvec_3 * const uzp_fns[4] = {
2253 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2254 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2257 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
2259 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2262 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
2264 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2267 static gen_helper_gvec_3 * const trn_fns[4] = {
2268 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2269 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2272 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
2274 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2277 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
2279 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2283 *** SVE Permute Vector - Predicated Group
2286 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
2288 static gen_helper_gvec_3 * const fns[4] = {
2289 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2291 return do_zpz_ool(s, a, fns[a->esz]);
2294 /* Call the helper that computes the ARM LastActiveElement pseudocode
2295 * function, scaled by the element size. This includes the not found
2296 * indication; e.g. not found for esz=3 is -8.
2298 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2300 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2301 * round up, as we do elsewhere, because we need the exact size.
2303 TCGv_ptr t_p = tcg_temp_new_ptr();
2304 TCGv_i32 t_desc;
2305 unsigned desc = 0;
2307 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2308 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
2310 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2311 t_desc = tcg_const_i32(desc);
2313 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2315 tcg_temp_free_i32(t_desc);
2316 tcg_temp_free_ptr(t_p);
2319 /* Increment LAST to the offset of the next element in the vector,
2320 * wrapping around to 0.
2322 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2324 unsigned vsz = vec_full_reg_size(s);
2326 tcg_gen_addi_i32(last, last, 1 << esz);
2327 if (is_power_of_2(vsz)) {
2328 tcg_gen_andi_i32(last, last, vsz - 1);
2329 } else {
2330 TCGv_i32 max = tcg_const_i32(vsz);
2331 TCGv_i32 zero = tcg_const_i32(0);
2332 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2333 tcg_temp_free_i32(max);
2334 tcg_temp_free_i32(zero);
2338 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2339 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2341 unsigned vsz = vec_full_reg_size(s);
2343 if (is_power_of_2(vsz)) {
2344 tcg_gen_andi_i32(last, last, vsz - 1);
2345 } else {
2346 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2347 TCGv_i32 zero = tcg_const_i32(0);
2348 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2349 tcg_temp_free_i32(max);
2350 tcg_temp_free_i32(zero);
2354 /* Load an unsigned element of ESZ from BASE+OFS. */
2355 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2357 TCGv_i64 r = tcg_temp_new_i64();
2359 switch (esz) {
2360 case 0:
2361 tcg_gen_ld8u_i64(r, base, ofs);
2362 break;
2363 case 1:
2364 tcg_gen_ld16u_i64(r, base, ofs);
2365 break;
2366 case 2:
2367 tcg_gen_ld32u_i64(r, base, ofs);
2368 break;
2369 case 3:
2370 tcg_gen_ld_i64(r, base, ofs);
2371 break;
2372 default:
2373 g_assert_not_reached();
2375 return r;
2378 /* Load an unsigned element of ESZ from RM[LAST]. */
2379 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2380 int rm, int esz)
2382 TCGv_ptr p = tcg_temp_new_ptr();
2383 TCGv_i64 r;
2385 /* Convert offset into vector into offset into ENV.
2386 * The final adjustment for the vector register base
2387 * is added via constant offset to the load.
2389 #ifdef HOST_WORDS_BIGENDIAN
2390 /* Adjust for element ordering. See vec_reg_offset. */
2391 if (esz < 3) {
2392 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2394 #endif
2395 tcg_gen_ext_i32_ptr(p, last);
2396 tcg_gen_add_ptr(p, p, cpu_env);
2398 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2399 tcg_temp_free_ptr(p);
2401 return r;
2404 /* Compute CLAST for a Zreg. */
2405 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2407 TCGv_i32 last;
2408 TCGLabel *over;
2409 TCGv_i64 ele;
2410 unsigned vsz, esz = a->esz;
2412 if (!sve_access_check(s)) {
2413 return true;
2416 last = tcg_temp_local_new_i32();
2417 over = gen_new_label();
2419 find_last_active(s, last, esz, a->pg);
2421 /* There is of course no movcond for a 2048-bit vector,
2422 * so we must branch over the actual store.
2424 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2426 if (!before) {
2427 incr_last_active(s, last, esz);
2430 ele = load_last_active(s, last, a->rm, esz);
2431 tcg_temp_free_i32(last);
2433 vsz = vec_full_reg_size(s);
2434 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2435 tcg_temp_free_i64(ele);
2437 /* If this insn used MOVPRFX, we may need a second move. */
2438 if (a->rd != a->rn) {
2439 TCGLabel *done = gen_new_label();
2440 tcg_gen_br(done);
2442 gen_set_label(over);
2443 do_mov_z(s, a->rd, a->rn);
2445 gen_set_label(done);
2446 } else {
2447 gen_set_label(over);
2449 return true;
2452 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
2454 return do_clast_vector(s, a, false);
2457 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
2459 return do_clast_vector(s, a, true);
2462 /* Compute CLAST for a scalar. */
2463 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2464 bool before, TCGv_i64 reg_val)
2466 TCGv_i32 last = tcg_temp_new_i32();
2467 TCGv_i64 ele, cmp, zero;
2469 find_last_active(s, last, esz, pg);
2471 /* Extend the original value of last prior to incrementing. */
2472 cmp = tcg_temp_new_i64();
2473 tcg_gen_ext_i32_i64(cmp, last);
2475 if (!before) {
2476 incr_last_active(s, last, esz);
2479 /* The conceit here is that while last < 0 indicates not found, after
2480 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2481 * from which we can load garbage. We then discard the garbage with
2482 * a conditional move.
2484 ele = load_last_active(s, last, rm, esz);
2485 tcg_temp_free_i32(last);
2487 zero = tcg_const_i64(0);
2488 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2490 tcg_temp_free_i64(zero);
2491 tcg_temp_free_i64(cmp);
2492 tcg_temp_free_i64(ele);
2495 /* Compute CLAST for a Vreg. */
2496 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2498 if (sve_access_check(s)) {
2499 int esz = a->esz;
2500 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2501 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2503 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2504 write_fp_dreg(s, a->rd, reg);
2505 tcg_temp_free_i64(reg);
2507 return true;
2510 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
2512 return do_clast_fp(s, a, false);
2515 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
2517 return do_clast_fp(s, a, true);
2520 /* Compute CLAST for a Xreg. */
2521 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2523 TCGv_i64 reg;
2525 if (!sve_access_check(s)) {
2526 return true;
2529 reg = cpu_reg(s, a->rd);
2530 switch (a->esz) {
2531 case 0:
2532 tcg_gen_ext8u_i64(reg, reg);
2533 break;
2534 case 1:
2535 tcg_gen_ext16u_i64(reg, reg);
2536 break;
2537 case 2:
2538 tcg_gen_ext32u_i64(reg, reg);
2539 break;
2540 case 3:
2541 break;
2542 default:
2543 g_assert_not_reached();
2546 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2547 return true;
2550 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
2552 return do_clast_general(s, a, false);
2555 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
2557 return do_clast_general(s, a, true);
2560 /* Compute LAST for a scalar. */
2561 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2562 int pg, int rm, bool before)
2564 TCGv_i32 last = tcg_temp_new_i32();
2565 TCGv_i64 ret;
2567 find_last_active(s, last, esz, pg);
2568 if (before) {
2569 wrap_last_active(s, last, esz);
2570 } else {
2571 incr_last_active(s, last, esz);
2574 ret = load_last_active(s, last, rm, esz);
2575 tcg_temp_free_i32(last);
2576 return ret;
2579 /* Compute LAST for a Vreg. */
2580 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2582 if (sve_access_check(s)) {
2583 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2584 write_fp_dreg(s, a->rd, val);
2585 tcg_temp_free_i64(val);
2587 return true;
2590 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
2592 return do_last_fp(s, a, false);
2595 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
2597 return do_last_fp(s, a, true);
2600 /* Compute LAST for a Xreg. */
2601 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2603 if (sve_access_check(s)) {
2604 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2605 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2606 tcg_temp_free_i64(val);
2608 return true;
2611 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
2613 return do_last_general(s, a, false);
2616 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
2618 return do_last_general(s, a, true);
2621 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
2623 if (sve_access_check(s)) {
2624 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2626 return true;
2629 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
2631 if (sve_access_check(s)) {
2632 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2633 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2634 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2635 tcg_temp_free_i64(t);
2637 return true;
2640 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
2642 static gen_helper_gvec_3 * const fns[4] = {
2643 NULL,
2644 gen_helper_sve_revb_h,
2645 gen_helper_sve_revb_s,
2646 gen_helper_sve_revb_d,
2648 return do_zpz_ool(s, a, fns[a->esz]);
2651 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
2653 static gen_helper_gvec_3 * const fns[4] = {
2654 NULL,
2655 NULL,
2656 gen_helper_sve_revh_s,
2657 gen_helper_sve_revh_d,
2659 return do_zpz_ool(s, a, fns[a->esz]);
2662 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
2664 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2667 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
2669 static gen_helper_gvec_3 * const fns[4] = {
2670 gen_helper_sve_rbit_b,
2671 gen_helper_sve_rbit_h,
2672 gen_helper_sve_rbit_s,
2673 gen_helper_sve_rbit_d,
2675 return do_zpz_ool(s, a, fns[a->esz]);
2678 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
2680 if (sve_access_check(s)) {
2681 gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
2682 a->rd, a->rn, a->rm, a->pg, a->esz);
2684 return true;
2688 *** SVE Integer Compare - Vectors Group
2691 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2692 gen_helper_gvec_flags_4 *gen_fn)
2694 TCGv_ptr pd, zn, zm, pg;
2695 unsigned vsz;
2696 TCGv_i32 t;
2698 if (gen_fn == NULL) {
2699 return false;
2701 if (!sve_access_check(s)) {
2702 return true;
2705 vsz = vec_full_reg_size(s);
2706 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2707 pd = tcg_temp_new_ptr();
2708 zn = tcg_temp_new_ptr();
2709 zm = tcg_temp_new_ptr();
2710 pg = tcg_temp_new_ptr();
2712 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2713 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2714 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2715 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2717 gen_fn(t, pd, zn, zm, pg, t);
2719 tcg_temp_free_ptr(pd);
2720 tcg_temp_free_ptr(zn);
2721 tcg_temp_free_ptr(zm);
2722 tcg_temp_free_ptr(pg);
2724 do_pred_flags(t);
2726 tcg_temp_free_i32(t);
2727 return true;
2730 #define DO_PPZZ(NAME, name) \
2731 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
2733 static gen_helper_gvec_flags_4 * const fns[4] = { \
2734 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2735 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2736 }; \
2737 return do_ppzz_flags(s, a, fns[a->esz]); \
2740 DO_PPZZ(CMPEQ, cmpeq)
2741 DO_PPZZ(CMPNE, cmpne)
2742 DO_PPZZ(CMPGT, cmpgt)
2743 DO_PPZZ(CMPGE, cmpge)
2744 DO_PPZZ(CMPHI, cmphi)
2745 DO_PPZZ(CMPHS, cmphs)
2747 #undef DO_PPZZ
2749 #define DO_PPZW(NAME, name) \
2750 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \
2752 static gen_helper_gvec_flags_4 * const fns[4] = { \
2753 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2754 gen_helper_sve_##name##_ppzw_s, NULL \
2755 }; \
2756 return do_ppzz_flags(s, a, fns[a->esz]); \
2759 DO_PPZW(CMPEQ, cmpeq)
2760 DO_PPZW(CMPNE, cmpne)
2761 DO_PPZW(CMPGT, cmpgt)
2762 DO_PPZW(CMPGE, cmpge)
2763 DO_PPZW(CMPHI, cmphi)
2764 DO_PPZW(CMPHS, cmphs)
2765 DO_PPZW(CMPLT, cmplt)
2766 DO_PPZW(CMPLE, cmple)
2767 DO_PPZW(CMPLO, cmplo)
2768 DO_PPZW(CMPLS, cmpls)
2770 #undef DO_PPZW
2773 *** SVE Integer Compare - Immediate Groups
2776 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2777 gen_helper_gvec_flags_3 *gen_fn)
2779 TCGv_ptr pd, zn, pg;
2780 unsigned vsz;
2781 TCGv_i32 t;
2783 if (gen_fn == NULL) {
2784 return false;
2786 if (!sve_access_check(s)) {
2787 return true;
2790 vsz = vec_full_reg_size(s);
2791 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2792 pd = tcg_temp_new_ptr();
2793 zn = tcg_temp_new_ptr();
2794 pg = tcg_temp_new_ptr();
2796 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2797 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2798 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2800 gen_fn(t, pd, zn, pg, t);
2802 tcg_temp_free_ptr(pd);
2803 tcg_temp_free_ptr(zn);
2804 tcg_temp_free_ptr(pg);
2806 do_pred_flags(t);
2808 tcg_temp_free_i32(t);
2809 return true;
2812 #define DO_PPZI(NAME, name) \
2813 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \
2815 static gen_helper_gvec_flags_3 * const fns[4] = { \
2816 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2817 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2818 }; \
2819 return do_ppzi_flags(s, a, fns[a->esz]); \
2822 DO_PPZI(CMPEQ, cmpeq)
2823 DO_PPZI(CMPNE, cmpne)
2824 DO_PPZI(CMPGT, cmpgt)
2825 DO_PPZI(CMPGE, cmpge)
2826 DO_PPZI(CMPHI, cmphi)
2827 DO_PPZI(CMPHS, cmphs)
2828 DO_PPZI(CMPLT, cmplt)
2829 DO_PPZI(CMPLE, cmple)
2830 DO_PPZI(CMPLO, cmplo)
2831 DO_PPZI(CMPLS, cmpls)
2833 #undef DO_PPZI
2836 *** SVE Partition Break Group
2839 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2840 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2842 if (!sve_access_check(s)) {
2843 return true;
2846 unsigned vsz = pred_full_reg_size(s);
2848 /* Predicate sizes may be smaller and cannot use simd_desc. */
2849 TCGv_ptr d = tcg_temp_new_ptr();
2850 TCGv_ptr n = tcg_temp_new_ptr();
2851 TCGv_ptr m = tcg_temp_new_ptr();
2852 TCGv_ptr g = tcg_temp_new_ptr();
2853 TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
2855 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2856 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2857 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2858 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2860 if (a->s) {
2861 fn_s(t, d, n, m, g, t);
2862 do_pred_flags(t);
2863 } else {
2864 fn(d, n, m, g, t);
2866 tcg_temp_free_ptr(d);
2867 tcg_temp_free_ptr(n);
2868 tcg_temp_free_ptr(m);
2869 tcg_temp_free_ptr(g);
2870 tcg_temp_free_i32(t);
2871 return true;
2874 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2875 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2877 if (!sve_access_check(s)) {
2878 return true;
2881 unsigned vsz = pred_full_reg_size(s);
2883 /* Predicate sizes may be smaller and cannot use simd_desc. */
2884 TCGv_ptr d = tcg_temp_new_ptr();
2885 TCGv_ptr n = tcg_temp_new_ptr();
2886 TCGv_ptr g = tcg_temp_new_ptr();
2887 TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
2889 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2890 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2891 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2893 if (a->s) {
2894 fn_s(t, d, n, g, t);
2895 do_pred_flags(t);
2896 } else {
2897 fn(d, n, g, t);
2899 tcg_temp_free_ptr(d);
2900 tcg_temp_free_ptr(n);
2901 tcg_temp_free_ptr(g);
2902 tcg_temp_free_i32(t);
2903 return true;
2906 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
2908 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2911 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
2913 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2916 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
2918 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2921 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
2923 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2926 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
2928 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2931 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
2933 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2936 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
2938 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
2942 *** SVE Predicate Count Group
2945 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
2947 unsigned psz = pred_full_reg_size(s);
2949 if (psz <= 8) {
2950 uint64_t psz_mask;
2952 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
2953 if (pn != pg) {
2954 TCGv_i64 g = tcg_temp_new_i64();
2955 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
2956 tcg_gen_and_i64(val, val, g);
2957 tcg_temp_free_i64(g);
2960 /* Reduce the pred_esz_masks value simply to reduce the
2961 * size of the code generated here.
2963 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
2964 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
2966 tcg_gen_ctpop_i64(val, val);
2967 } else {
2968 TCGv_ptr t_pn = tcg_temp_new_ptr();
2969 TCGv_ptr t_pg = tcg_temp_new_ptr();
2970 unsigned desc = 0;
2971 TCGv_i32 t_desc;
2973 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
2974 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
2976 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
2977 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2978 t_desc = tcg_const_i32(desc);
2980 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
2981 tcg_temp_free_ptr(t_pn);
2982 tcg_temp_free_ptr(t_pg);
2983 tcg_temp_free_i32(t_desc);
2987 static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
2989 if (sve_access_check(s)) {
2990 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
2992 return true;
2995 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
2997 if (sve_access_check(s)) {
2998 TCGv_i64 reg = cpu_reg(s, a->rd);
2999 TCGv_i64 val = tcg_temp_new_i64();
3001 do_cntp(s, val, a->esz, a->pg, a->pg);
3002 if (a->d) {
3003 tcg_gen_sub_i64(reg, reg, val);
3004 } else {
3005 tcg_gen_add_i64(reg, reg, val);
3007 tcg_temp_free_i64(val);
3009 return true;
3012 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3014 if (a->esz == 0) {
3015 return false;
3017 if (sve_access_check(s)) {
3018 unsigned vsz = vec_full_reg_size(s);
3019 TCGv_i64 val = tcg_temp_new_i64();
3020 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3022 do_cntp(s, val, a->esz, a->pg, a->pg);
3023 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3024 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3026 return true;
3029 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
3031 if (sve_access_check(s)) {
3032 TCGv_i64 reg = cpu_reg(s, a->rd);
3033 TCGv_i64 val = tcg_temp_new_i64();
3035 do_cntp(s, val, a->esz, a->pg, a->pg);
3036 do_sat_addsub_32(reg, val, a->u, a->d);
3038 return true;
3041 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
3043 if (sve_access_check(s)) {
3044 TCGv_i64 reg = cpu_reg(s, a->rd);
3045 TCGv_i64 val = tcg_temp_new_i64();
3047 do_cntp(s, val, a->esz, a->pg, a->pg);
3048 do_sat_addsub_64(reg, val, a->u, a->d);
3050 return true;
3053 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3055 if (a->esz == 0) {
3056 return false;
3058 if (sve_access_check(s)) {
3059 TCGv_i64 val = tcg_temp_new_i64();
3060 do_cntp(s, val, a->esz, a->pg, a->pg);
3061 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3063 return true;
3067 *** SVE Integer Compare Scalars Group
3070 static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
3072 if (!sve_access_check(s)) {
3073 return true;
3076 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3077 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3078 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3079 TCGv_i64 cmp = tcg_temp_new_i64();
3081 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3082 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3083 tcg_temp_free_i64(cmp);
3085 /* VF = !NF & !CF. */
3086 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3087 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3089 /* Both NF and VF actually look at bit 31. */
3090 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3091 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3092 return true;
3095 static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
3097 TCGv_i64 op0, op1, t0, t1, tmax;
3098 TCGv_i32 t2, t3;
3099 TCGv_ptr ptr;
3100 unsigned vsz = vec_full_reg_size(s);
3101 unsigned desc = 0;
3102 TCGCond cond;
3104 if (!sve_access_check(s)) {
3105 return true;
3108 op0 = read_cpu_reg(s, a->rn, 1);
3109 op1 = read_cpu_reg(s, a->rm, 1);
3111 if (!a->sf) {
3112 if (a->u) {
3113 tcg_gen_ext32u_i64(op0, op0);
3114 tcg_gen_ext32u_i64(op1, op1);
3115 } else {
3116 tcg_gen_ext32s_i64(op0, op0);
3117 tcg_gen_ext32s_i64(op1, op1);
3121 /* For the helper, compress the different conditions into a computation
3122 * of how many iterations for which the condition is true.
3124 t0 = tcg_temp_new_i64();
3125 t1 = tcg_temp_new_i64();
3126 tcg_gen_sub_i64(t0, op1, op0);
3128 tmax = tcg_const_i64(vsz >> a->esz);
3129 if (a->eq) {
3130 /* Equality means one more iteration. */
3131 tcg_gen_addi_i64(t0, t0, 1);
3133 /* If op1 is max (un)signed integer (and the only time the addition
3134 * above could overflow), then we produce an all-true predicate by
3135 * setting the count to the vector length. This is because the
3136 * pseudocode is described as an increment + compare loop, and the
3137 * max integer would always compare true.
3139 tcg_gen_movi_i64(t1, (a->sf
3140 ? (a->u ? UINT64_MAX : INT64_MAX)
3141 : (a->u ? UINT32_MAX : INT32_MAX)));
3142 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
3145 /* Bound to the maximum. */
3146 tcg_gen_umin_i64(t0, t0, tmax);
3147 tcg_temp_free_i64(tmax);
3149 /* Set the count to zero if the condition is false. */
3150 cond = (a->u
3151 ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3152 : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3153 tcg_gen_movi_i64(t1, 0);
3154 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3155 tcg_temp_free_i64(t1);
3157 /* Since we're bounded, pass as a 32-bit type. */
3158 t2 = tcg_temp_new_i32();
3159 tcg_gen_extrl_i64_i32(t2, t0);
3160 tcg_temp_free_i64(t0);
3162 /* Scale elements to bits. */
3163 tcg_gen_shli_i32(t2, t2, a->esz);
3165 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3166 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
3167 t3 = tcg_const_i32(desc);
3169 ptr = tcg_temp_new_ptr();
3170 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3172 gen_helper_sve_while(t2, ptr, t2, t3);
3173 do_pred_flags(t2);
3175 tcg_temp_free_ptr(ptr);
3176 tcg_temp_free_i32(t2);
3177 tcg_temp_free_i32(t3);
3178 return true;
3182 *** SVE Integer Wide Immediate - Unpredicated Group
3185 static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
3187 if (a->esz == 0) {
3188 return false;
3190 if (sve_access_check(s)) {
3191 unsigned vsz = vec_full_reg_size(s);
3192 int dofs = vec_full_reg_offset(s, a->rd);
3193 uint64_t imm;
3195 /* Decode the VFP immediate. */
3196 imm = vfp_expand_imm(a->esz, a->imm);
3197 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
3199 return true;
3202 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
3204 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3205 return false;
3207 if (sve_access_check(s)) {
3208 unsigned vsz = vec_full_reg_size(s);
3209 int dofs = vec_full_reg_offset(s, a->rd);
3211 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
3213 return true;
3216 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
3218 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3219 return false;
3221 if (sve_access_check(s)) {
3222 unsigned vsz = vec_full_reg_size(s);
3223 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3224 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3226 return true;
3229 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
3231 a->imm = -a->imm;
3232 return trans_ADD_zzi(s, a);
3235 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
3237 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
3238 static const GVecGen2s op[4] = {
3239 { .fni8 = tcg_gen_vec_sub8_i64,
3240 .fniv = tcg_gen_sub_vec,
3241 .fno = gen_helper_sve_subri_b,
3242 .opt_opc = vecop_list,
3243 .vece = MO_8,
3244 .scalar_first = true },
3245 { .fni8 = tcg_gen_vec_sub16_i64,
3246 .fniv = tcg_gen_sub_vec,
3247 .fno = gen_helper_sve_subri_h,
3248 .opt_opc = vecop_list,
3249 .vece = MO_16,
3250 .scalar_first = true },
3251 { .fni4 = tcg_gen_sub_i32,
3252 .fniv = tcg_gen_sub_vec,
3253 .fno = gen_helper_sve_subri_s,
3254 .opt_opc = vecop_list,
3255 .vece = MO_32,
3256 .scalar_first = true },
3257 { .fni8 = tcg_gen_sub_i64,
3258 .fniv = tcg_gen_sub_vec,
3259 .fno = gen_helper_sve_subri_d,
3260 .opt_opc = vecop_list,
3261 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3262 .vece = MO_64,
3263 .scalar_first = true }
3266 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3267 return false;
3269 if (sve_access_check(s)) {
3270 unsigned vsz = vec_full_reg_size(s);
3271 TCGv_i64 c = tcg_const_i64(a->imm);
3272 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3273 vec_full_reg_offset(s, a->rn),
3274 vsz, vsz, c, &op[a->esz]);
3275 tcg_temp_free_i64(c);
3277 return true;
3280 static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
3282 if (sve_access_check(s)) {
3283 unsigned vsz = vec_full_reg_size(s);
3284 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3285 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3287 return true;
3290 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
3292 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3293 return false;
3295 if (sve_access_check(s)) {
3296 TCGv_i64 val = tcg_const_i64(a->imm);
3297 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3298 tcg_temp_free_i64(val);
3300 return true;
3303 static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
3305 return do_zzi_sat(s, a, false, false);
3308 static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
3310 return do_zzi_sat(s, a, true, false);
3313 static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3315 return do_zzi_sat(s, a, false, true);
3318 static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3320 return do_zzi_sat(s, a, true, true);
3323 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3325 if (sve_access_check(s)) {
3326 unsigned vsz = vec_full_reg_size(s);
3327 TCGv_i64 c = tcg_const_i64(a->imm);
3329 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3330 vec_full_reg_offset(s, a->rn),
3331 c, vsz, vsz, 0, fn);
3332 tcg_temp_free_i64(c);
3334 return true;
3337 #define DO_ZZI(NAME, name) \
3338 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a) \
3340 static gen_helper_gvec_2i * const fns[4] = { \
3341 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3342 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3343 }; \
3344 return do_zzi_ool(s, a, fns[a->esz]); \
3347 DO_ZZI(SMAX, smax)
3348 DO_ZZI(UMAX, umax)
3349 DO_ZZI(SMIN, smin)
3350 DO_ZZI(UMIN, umin)
3352 #undef DO_ZZI
3354 static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a)
3356 static gen_helper_gvec_3 * const fns[2][2] = {
3357 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3358 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3361 if (sve_access_check(s)) {
3362 gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, 0);
3364 return true;
3367 static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a)
3369 static gen_helper_gvec_3 * const fns[2][2] = {
3370 { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3371 { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3374 if (sve_access_check(s)) {
3375 gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, a->index);
3377 return true;
3382 *** SVE Floating Point Multiply-Add Indexed Group
3385 static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
3387 static gen_helper_gvec_4_ptr * const fns[3] = {
3388 gen_helper_gvec_fmla_idx_h,
3389 gen_helper_gvec_fmla_idx_s,
3390 gen_helper_gvec_fmla_idx_d,
3393 if (sve_access_check(s)) {
3394 unsigned vsz = vec_full_reg_size(s);
3395 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3396 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3397 vec_full_reg_offset(s, a->rn),
3398 vec_full_reg_offset(s, a->rm),
3399 vec_full_reg_offset(s, a->ra),
3400 status, vsz, vsz, (a->index << 1) | a->sub,
3401 fns[a->esz - 1]);
3402 tcg_temp_free_ptr(status);
3404 return true;
3408 *** SVE Floating Point Multiply Indexed Group
3411 static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
3413 static gen_helper_gvec_3_ptr * const fns[3] = {
3414 gen_helper_gvec_fmul_idx_h,
3415 gen_helper_gvec_fmul_idx_s,
3416 gen_helper_gvec_fmul_idx_d,
3419 if (sve_access_check(s)) {
3420 unsigned vsz = vec_full_reg_size(s);
3421 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3422 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3423 vec_full_reg_offset(s, a->rn),
3424 vec_full_reg_offset(s, a->rm),
3425 status, vsz, vsz, a->index, fns[a->esz - 1]);
3426 tcg_temp_free_ptr(status);
3428 return true;
3432 *** SVE Floating Point Fast Reduction Group
3435 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3436 TCGv_ptr, TCGv_i32);
3438 static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3439 gen_helper_fp_reduce *fn)
3441 unsigned vsz = vec_full_reg_size(s);
3442 unsigned p2vsz = pow2ceil(vsz);
3443 TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, vsz, p2vsz));
3444 TCGv_ptr t_zn, t_pg, status;
3445 TCGv_i64 temp;
3447 temp = tcg_temp_new_i64();
3448 t_zn = tcg_temp_new_ptr();
3449 t_pg = tcg_temp_new_ptr();
3451 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3452 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3453 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3455 fn(temp, t_zn, t_pg, status, t_desc);
3456 tcg_temp_free_ptr(t_zn);
3457 tcg_temp_free_ptr(t_pg);
3458 tcg_temp_free_ptr(status);
3459 tcg_temp_free_i32(t_desc);
3461 write_fp_dreg(s, a->rd, temp);
3462 tcg_temp_free_i64(temp);
3465 #define DO_VPZ(NAME, name) \
3466 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
3468 static gen_helper_fp_reduce * const fns[3] = { \
3469 gen_helper_sve_##name##_h, \
3470 gen_helper_sve_##name##_s, \
3471 gen_helper_sve_##name##_d, \
3472 }; \
3473 if (a->esz == 0) { \
3474 return false; \
3476 if (sve_access_check(s)) { \
3477 do_reduce(s, a, fns[a->esz - 1]); \
3479 return true; \
3482 DO_VPZ(FADDV, faddv)
3483 DO_VPZ(FMINNMV, fminnmv)
3484 DO_VPZ(FMAXNMV, fmaxnmv)
3485 DO_VPZ(FMINV, fminv)
3486 DO_VPZ(FMAXV, fmaxv)
3489 *** SVE Floating Point Unary Operations - Unpredicated Group
3492 static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3494 unsigned vsz = vec_full_reg_size(s);
3495 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3497 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3498 vec_full_reg_offset(s, a->rn),
3499 status, vsz, vsz, 0, fn);
3500 tcg_temp_free_ptr(status);
3503 static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3505 static gen_helper_gvec_2_ptr * const fns[3] = {
3506 gen_helper_gvec_frecpe_h,
3507 gen_helper_gvec_frecpe_s,
3508 gen_helper_gvec_frecpe_d,
3510 if (a->esz == 0) {
3511 return false;
3513 if (sve_access_check(s)) {
3514 do_zz_fp(s, a, fns[a->esz - 1]);
3516 return true;
3519 static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3521 static gen_helper_gvec_2_ptr * const fns[3] = {
3522 gen_helper_gvec_frsqrte_h,
3523 gen_helper_gvec_frsqrte_s,
3524 gen_helper_gvec_frsqrte_d,
3526 if (a->esz == 0) {
3527 return false;
3529 if (sve_access_check(s)) {
3530 do_zz_fp(s, a, fns[a->esz - 1]);
3532 return true;
3536 *** SVE Floating Point Compare with Zero Group
3539 static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3540 gen_helper_gvec_3_ptr *fn)
3542 unsigned vsz = vec_full_reg_size(s);
3543 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3545 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3546 vec_full_reg_offset(s, a->rn),
3547 pred_full_reg_offset(s, a->pg),
3548 status, vsz, vsz, 0, fn);
3549 tcg_temp_free_ptr(status);
3552 #define DO_PPZ(NAME, name) \
3553 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
3555 static gen_helper_gvec_3_ptr * const fns[3] = { \
3556 gen_helper_sve_##name##_h, \
3557 gen_helper_sve_##name##_s, \
3558 gen_helper_sve_##name##_d, \
3559 }; \
3560 if (a->esz == 0) { \
3561 return false; \
3563 if (sve_access_check(s)) { \
3564 do_ppz_fp(s, a, fns[a->esz - 1]); \
3566 return true; \
3569 DO_PPZ(FCMGE_ppz0, fcmge0)
3570 DO_PPZ(FCMGT_ppz0, fcmgt0)
3571 DO_PPZ(FCMLE_ppz0, fcmle0)
3572 DO_PPZ(FCMLT_ppz0, fcmlt0)
3573 DO_PPZ(FCMEQ_ppz0, fcmeq0)
3574 DO_PPZ(FCMNE_ppz0, fcmne0)
3576 #undef DO_PPZ
3579 *** SVE floating-point trig multiply-add coefficient
3582 static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
3584 static gen_helper_gvec_3_ptr * const fns[3] = {
3585 gen_helper_sve_ftmad_h,
3586 gen_helper_sve_ftmad_s,
3587 gen_helper_sve_ftmad_d,
3590 if (a->esz == 0) {
3591 return false;
3593 if (sve_access_check(s)) {
3594 unsigned vsz = vec_full_reg_size(s);
3595 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3596 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3597 vec_full_reg_offset(s, a->rn),
3598 vec_full_reg_offset(s, a->rm),
3599 status, vsz, vsz, a->imm, fns[a->esz - 1]);
3600 tcg_temp_free_ptr(status);
3602 return true;
3606 *** SVE Floating Point Accumulating Reduction Group
3609 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
3611 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3612 TCGv_ptr, TCGv_ptr, TCGv_i32);
3613 static fadda_fn * const fns[3] = {
3614 gen_helper_sve_fadda_h,
3615 gen_helper_sve_fadda_s,
3616 gen_helper_sve_fadda_d,
3618 unsigned vsz = vec_full_reg_size(s);
3619 TCGv_ptr t_rm, t_pg, t_fpst;
3620 TCGv_i64 t_val;
3621 TCGv_i32 t_desc;
3623 if (a->esz == 0) {
3624 return false;
3626 if (!sve_access_check(s)) {
3627 return true;
3630 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3631 t_rm = tcg_temp_new_ptr();
3632 t_pg = tcg_temp_new_ptr();
3633 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3634 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3635 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3636 t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3638 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3640 tcg_temp_free_i32(t_desc);
3641 tcg_temp_free_ptr(t_fpst);
3642 tcg_temp_free_ptr(t_pg);
3643 tcg_temp_free_ptr(t_rm);
3645 write_fp_dreg(s, a->rd, t_val);
3646 tcg_temp_free_i64(t_val);
3647 return true;
3651 *** SVE Floating Point Arithmetic - Unpredicated Group
3654 static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3655 gen_helper_gvec_3_ptr *fn)
3657 if (fn == NULL) {
3658 return false;
3660 if (sve_access_check(s)) {
3661 unsigned vsz = vec_full_reg_size(s);
3662 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3663 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3664 vec_full_reg_offset(s, a->rn),
3665 vec_full_reg_offset(s, a->rm),
3666 status, vsz, vsz, 0, fn);
3667 tcg_temp_free_ptr(status);
3669 return true;
3673 #define DO_FP3(NAME, name) \
3674 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
3676 static gen_helper_gvec_3_ptr * const fns[4] = { \
3677 NULL, gen_helper_gvec_##name##_h, \
3678 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3679 }; \
3680 return do_zzz_fp(s, a, fns[a->esz]); \
3683 DO_FP3(FADD_zzz, fadd)
3684 DO_FP3(FSUB_zzz, fsub)
3685 DO_FP3(FMUL_zzz, fmul)
3686 DO_FP3(FTSMUL, ftsmul)
3687 DO_FP3(FRECPS, recps)
3688 DO_FP3(FRSQRTS, rsqrts)
3690 #undef DO_FP3
3693 *** SVE Floating Point Arithmetic - Predicated Group
3696 static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3697 gen_helper_gvec_4_ptr *fn)
3699 if (fn == NULL) {
3700 return false;
3702 if (sve_access_check(s)) {
3703 unsigned vsz = vec_full_reg_size(s);
3704 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3705 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3706 vec_full_reg_offset(s, a->rn),
3707 vec_full_reg_offset(s, a->rm),
3708 pred_full_reg_offset(s, a->pg),
3709 status, vsz, vsz, 0, fn);
3710 tcg_temp_free_ptr(status);
3712 return true;
3715 #define DO_FP3(NAME, name) \
3716 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
3718 static gen_helper_gvec_4_ptr * const fns[4] = { \
3719 NULL, gen_helper_sve_##name##_h, \
3720 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3721 }; \
3722 return do_zpzz_fp(s, a, fns[a->esz]); \
3725 DO_FP3(FADD_zpzz, fadd)
3726 DO_FP3(FSUB_zpzz, fsub)
3727 DO_FP3(FMUL_zpzz, fmul)
3728 DO_FP3(FMIN_zpzz, fmin)
3729 DO_FP3(FMAX_zpzz, fmax)
3730 DO_FP3(FMINNM_zpzz, fminnum)
3731 DO_FP3(FMAXNM_zpzz, fmaxnum)
3732 DO_FP3(FABD, fabd)
3733 DO_FP3(FSCALE, fscalbn)
3734 DO_FP3(FDIV, fdiv)
3735 DO_FP3(FMULX, fmulx)
3737 #undef DO_FP3
3739 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3740 TCGv_i64, TCGv_ptr, TCGv_i32);
3742 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3743 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3745 unsigned vsz = vec_full_reg_size(s);
3746 TCGv_ptr t_zd, t_zn, t_pg, status;
3747 TCGv_i32 desc;
3749 t_zd = tcg_temp_new_ptr();
3750 t_zn = tcg_temp_new_ptr();
3751 t_pg = tcg_temp_new_ptr();
3752 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3753 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3754 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3756 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
3757 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3758 fn(t_zd, t_zn, t_pg, scalar, status, desc);
3760 tcg_temp_free_i32(desc);
3761 tcg_temp_free_ptr(status);
3762 tcg_temp_free_ptr(t_pg);
3763 tcg_temp_free_ptr(t_zn);
3764 tcg_temp_free_ptr(t_zd);
3767 static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3768 gen_helper_sve_fp2scalar *fn)
3770 TCGv_i64 temp = tcg_const_i64(imm);
3771 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3772 tcg_temp_free_i64(temp);
3775 #define DO_FP_IMM(NAME, name, const0, const1) \
3776 static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
3778 static gen_helper_sve_fp2scalar * const fns[3] = { \
3779 gen_helper_sve_##name##_h, \
3780 gen_helper_sve_##name##_s, \
3781 gen_helper_sve_##name##_d \
3782 }; \
3783 static uint64_t const val[3][2] = { \
3784 { float16_##const0, float16_##const1 }, \
3785 { float32_##const0, float32_##const1 }, \
3786 { float64_##const0, float64_##const1 }, \
3787 }; \
3788 if (a->esz == 0) { \
3789 return false; \
3791 if (sve_access_check(s)) { \
3792 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
3794 return true; \
3797 DO_FP_IMM(FADD, fadds, half, one)
3798 DO_FP_IMM(FSUB, fsubs, half, one)
3799 DO_FP_IMM(FMUL, fmuls, half, two)
3800 DO_FP_IMM(FSUBR, fsubrs, half, one)
3801 DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3802 DO_FP_IMM(FMINNM, fminnms, zero, one)
3803 DO_FP_IMM(FMAX, fmaxs, zero, one)
3804 DO_FP_IMM(FMIN, fmins, zero, one)
3806 #undef DO_FP_IMM
3808 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3809 gen_helper_gvec_4_ptr *fn)
3811 if (fn == NULL) {
3812 return false;
3814 if (sve_access_check(s)) {
3815 unsigned vsz = vec_full_reg_size(s);
3816 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3817 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3818 vec_full_reg_offset(s, a->rn),
3819 vec_full_reg_offset(s, a->rm),
3820 pred_full_reg_offset(s, a->pg),
3821 status, vsz, vsz, 0, fn);
3822 tcg_temp_free_ptr(status);
3824 return true;
3827 #define DO_FPCMP(NAME, name) \
3828 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
3830 static gen_helper_gvec_4_ptr * const fns[4] = { \
3831 NULL, gen_helper_sve_##name##_h, \
3832 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3833 }; \
3834 return do_fp_cmp(s, a, fns[a->esz]); \
3837 DO_FPCMP(FCMGE, fcmge)
3838 DO_FPCMP(FCMGT, fcmgt)
3839 DO_FPCMP(FCMEQ, fcmeq)
3840 DO_FPCMP(FCMNE, fcmne)
3841 DO_FPCMP(FCMUO, fcmuo)
3842 DO_FPCMP(FACGE, facge)
3843 DO_FPCMP(FACGT, facgt)
3845 #undef DO_FPCMP
3847 static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
3849 static gen_helper_gvec_4_ptr * const fns[3] = {
3850 gen_helper_sve_fcadd_h,
3851 gen_helper_sve_fcadd_s,
3852 gen_helper_sve_fcadd_d
3855 if (a->esz == 0) {
3856 return false;
3858 if (sve_access_check(s)) {
3859 unsigned vsz = vec_full_reg_size(s);
3860 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3861 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3862 vec_full_reg_offset(s, a->rn),
3863 vec_full_reg_offset(s, a->rm),
3864 pred_full_reg_offset(s, a->pg),
3865 status, vsz, vsz, a->rot, fns[a->esz - 1]);
3866 tcg_temp_free_ptr(status);
3868 return true;
3871 static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
3872 gen_helper_gvec_5_ptr *fn)
3874 if (a->esz == 0) {
3875 return false;
3877 if (sve_access_check(s)) {
3878 unsigned vsz = vec_full_reg_size(s);
3879 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3880 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
3881 vec_full_reg_offset(s, a->rn),
3882 vec_full_reg_offset(s, a->rm),
3883 vec_full_reg_offset(s, a->ra),
3884 pred_full_reg_offset(s, a->pg),
3885 status, vsz, vsz, 0, fn);
3886 tcg_temp_free_ptr(status);
3888 return true;
3891 #define DO_FMLA(NAME, name) \
3892 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
3894 static gen_helper_gvec_5_ptr * const fns[4] = { \
3895 NULL, gen_helper_sve_##name##_h, \
3896 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3897 }; \
3898 return do_fmla(s, a, fns[a->esz]); \
3901 DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3902 DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3903 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3904 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3906 #undef DO_FMLA
3908 static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
3910 static gen_helper_gvec_5_ptr * const fns[4] = {
3911 NULL,
3912 gen_helper_sve_fcmla_zpzzz_h,
3913 gen_helper_sve_fcmla_zpzzz_s,
3914 gen_helper_sve_fcmla_zpzzz_d,
3917 if (a->esz == 0) {
3918 return false;
3920 if (sve_access_check(s)) {
3921 unsigned vsz = vec_full_reg_size(s);
3922 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3923 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
3924 vec_full_reg_offset(s, a->rn),
3925 vec_full_reg_offset(s, a->rm),
3926 vec_full_reg_offset(s, a->ra),
3927 pred_full_reg_offset(s, a->pg),
3928 status, vsz, vsz, a->rot, fns[a->esz]);
3929 tcg_temp_free_ptr(status);
3931 return true;
3934 static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
3936 static gen_helper_gvec_3_ptr * const fns[2] = {
3937 gen_helper_gvec_fcmlah_idx,
3938 gen_helper_gvec_fcmlas_idx,
3941 tcg_debug_assert(a->esz == 1 || a->esz == 2);
3942 tcg_debug_assert(a->rd == a->ra);
3943 if (sve_access_check(s)) {
3944 unsigned vsz = vec_full_reg_size(s);
3945 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3946 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3947 vec_full_reg_offset(s, a->rn),
3948 vec_full_reg_offset(s, a->rm),
3949 status, vsz, vsz,
3950 a->index * 4 + a->rot,
3951 fns[a->esz - 1]);
3952 tcg_temp_free_ptr(status);
3954 return true;
3958 *** SVE Floating Point Unary Operations Predicated Group
3961 static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
3962 bool is_fp16, gen_helper_gvec_3_ptr *fn)
3964 if (sve_access_check(s)) {
3965 unsigned vsz = vec_full_reg_size(s);
3966 TCGv_ptr status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
3967 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
3968 vec_full_reg_offset(s, rn),
3969 pred_full_reg_offset(s, pg),
3970 status, vsz, vsz, 0, fn);
3971 tcg_temp_free_ptr(status);
3973 return true;
3976 static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
3978 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
3981 static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
3983 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
3986 static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
3988 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
3991 static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
3993 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
3996 static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
3998 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4001 static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
4003 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4006 static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
4008 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4011 static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
4013 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4016 static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
4018 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4021 static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
4023 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4026 static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
4028 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4031 static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
4033 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4036 static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
4038 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4041 static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
4043 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4046 static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
4048 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4051 static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
4053 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4056 static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
4058 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4061 static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
4063 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4066 static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
4068 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4071 static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
4073 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4076 static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4077 gen_helper_sve_frint_h,
4078 gen_helper_sve_frint_s,
4079 gen_helper_sve_frint_d
4082 static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
4084 if (a->esz == 0) {
4085 return false;
4087 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4088 frint_fns[a->esz - 1]);
4091 static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
4093 static gen_helper_gvec_3_ptr * const fns[3] = {
4094 gen_helper_sve_frintx_h,
4095 gen_helper_sve_frintx_s,
4096 gen_helper_sve_frintx_d
4098 if (a->esz == 0) {
4099 return false;
4101 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4104 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4106 if (a->esz == 0) {
4107 return false;
4109 if (sve_access_check(s)) {
4110 unsigned vsz = vec_full_reg_size(s);
4111 TCGv_i32 tmode = tcg_const_i32(mode);
4112 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4114 gen_helper_set_rmode(tmode, tmode, status);
4116 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4117 vec_full_reg_offset(s, a->rn),
4118 pred_full_reg_offset(s, a->pg),
4119 status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4121 gen_helper_set_rmode(tmode, tmode, status);
4122 tcg_temp_free_i32(tmode);
4123 tcg_temp_free_ptr(status);
4125 return true;
4128 static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
4130 return do_frint_mode(s, a, float_round_nearest_even);
4133 static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
4135 return do_frint_mode(s, a, float_round_up);
4138 static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
4140 return do_frint_mode(s, a, float_round_down);
4143 static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
4145 return do_frint_mode(s, a, float_round_to_zero);
4148 static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
4150 return do_frint_mode(s, a, float_round_ties_away);
4153 static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
4155 static gen_helper_gvec_3_ptr * const fns[3] = {
4156 gen_helper_sve_frecpx_h,
4157 gen_helper_sve_frecpx_s,
4158 gen_helper_sve_frecpx_d
4160 if (a->esz == 0) {
4161 return false;
4163 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4166 static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
4168 static gen_helper_gvec_3_ptr * const fns[3] = {
4169 gen_helper_sve_fsqrt_h,
4170 gen_helper_sve_fsqrt_s,
4171 gen_helper_sve_fsqrt_d
4173 if (a->esz == 0) {
4174 return false;
4176 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4179 static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4181 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4184 static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4186 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4189 static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4191 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4194 static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
4196 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4199 static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
4201 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4204 static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
4206 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4209 static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
4211 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4214 static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4216 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4219 static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4221 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4224 static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4226 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4229 static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
4231 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4234 static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
4236 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4239 static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
4241 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4244 static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
4246 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4250 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4253 /* Subroutine loading a vector register at VOFS of LEN bytes.
4254 * The load should begin at the address Rn + IMM.
4257 static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4259 int len_align = QEMU_ALIGN_DOWN(len, 8);
4260 int len_remain = len % 8;
4261 int nparts = len / 8 + ctpop8(len_remain);
4262 int midx = get_mem_index(s);
4263 TCGv_i64 dirty_addr, clean_addr, t0, t1;
4265 dirty_addr = tcg_temp_new_i64();
4266 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4267 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
4268 tcg_temp_free_i64(dirty_addr);
4271 * Note that unpredicated load/store of vector/predicate registers
4272 * are defined as a stream of bytes, which equates to little-endian
4273 * operations on larger quantities.
4274 * Attempt to keep code expansion to a minimum by limiting the
4275 * amount of unrolling done.
4277 if (nparts <= 4) {
4278 int i;
4280 t0 = tcg_temp_new_i64();
4281 for (i = 0; i < len_align; i += 8) {
4282 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
4283 tcg_gen_st_i64(t0, cpu_env, vofs + i);
4284 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4286 tcg_temp_free_i64(t0);
4287 } else {
4288 TCGLabel *loop = gen_new_label();
4289 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4291 /* Copy the clean address into a local temp, live across the loop. */
4292 t0 = clean_addr;
4293 clean_addr = new_tmp_a64_local(s);
4294 tcg_gen_mov_i64(clean_addr, t0);
4296 gen_set_label(loop);
4298 t0 = tcg_temp_new_i64();
4299 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
4300 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4302 tp = tcg_temp_new_ptr();
4303 tcg_gen_add_ptr(tp, cpu_env, i);
4304 tcg_gen_addi_ptr(i, i, 8);
4305 tcg_gen_st_i64(t0, tp, vofs);
4306 tcg_temp_free_ptr(tp);
4307 tcg_temp_free_i64(t0);
4309 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4310 tcg_temp_free_ptr(i);
4314 * Predicate register loads can be any multiple of 2.
4315 * Note that we still store the entire 64-bit unit into cpu_env.
4317 if (len_remain) {
4318 t0 = tcg_temp_new_i64();
4319 switch (len_remain) {
4320 case 2:
4321 case 4:
4322 case 8:
4323 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4324 MO_LE | ctz32(len_remain));
4325 break;
4327 case 6:
4328 t1 = tcg_temp_new_i64();
4329 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
4330 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4331 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
4332 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4333 tcg_temp_free_i64(t1);
4334 break;
4336 default:
4337 g_assert_not_reached();
4339 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
4340 tcg_temp_free_i64(t0);
4344 /* Similarly for stores. */
4345 static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4347 int len_align = QEMU_ALIGN_DOWN(len, 8);
4348 int len_remain = len % 8;
4349 int nparts = len / 8 + ctpop8(len_remain);
4350 int midx = get_mem_index(s);
4351 TCGv_i64 dirty_addr, clean_addr, t0;
4353 dirty_addr = tcg_temp_new_i64();
4354 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4355 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
4356 tcg_temp_free_i64(dirty_addr);
4358 /* Note that unpredicated load/store of vector/predicate registers
4359 * are defined as a stream of bytes, which equates to little-endian
4360 * operations on larger quantities. There is no nice way to force
4361 * a little-endian store for aarch64_be-linux-user out of line.
4363 * Attempt to keep code expansion to a minimum by limiting the
4364 * amount of unrolling done.
4366 if (nparts <= 4) {
4367 int i;
4369 t0 = tcg_temp_new_i64();
4370 for (i = 0; i < len_align; i += 8) {
4371 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
4372 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
4373 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4375 tcg_temp_free_i64(t0);
4376 } else {
4377 TCGLabel *loop = gen_new_label();
4378 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4380 /* Copy the clean address into a local temp, live across the loop. */
4381 t0 = clean_addr;
4382 clean_addr = new_tmp_a64_local(s);
4383 tcg_gen_mov_i64(clean_addr, t0);
4385 gen_set_label(loop);
4387 t0 = tcg_temp_new_i64();
4388 tp = tcg_temp_new_ptr();
4389 tcg_gen_add_ptr(tp, cpu_env, i);
4390 tcg_gen_ld_i64(t0, tp, vofs);
4391 tcg_gen_addi_ptr(i, i, 8);
4392 tcg_temp_free_ptr(tp);
4394 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
4395 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4396 tcg_temp_free_i64(t0);
4398 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4399 tcg_temp_free_ptr(i);
4402 /* Predicate register stores can be any multiple of 2. */
4403 if (len_remain) {
4404 t0 = tcg_temp_new_i64();
4405 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
4407 switch (len_remain) {
4408 case 2:
4409 case 4:
4410 case 8:
4411 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4412 MO_LE | ctz32(len_remain));
4413 break;
4415 case 6:
4416 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
4417 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4418 tcg_gen_shri_i64(t0, t0, 32);
4419 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
4420 break;
4422 default:
4423 g_assert_not_reached();
4425 tcg_temp_free_i64(t0);
4429 static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
4431 if (sve_access_check(s)) {
4432 int size = vec_full_reg_size(s);
4433 int off = vec_full_reg_offset(s, a->rd);
4434 do_ldr(s, off, size, a->rn, a->imm * size);
4436 return true;
4439 static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
4441 if (sve_access_check(s)) {
4442 int size = pred_full_reg_size(s);
4443 int off = pred_full_reg_offset(s, a->rd);
4444 do_ldr(s, off, size, a->rn, a->imm * size);
4446 return true;
4449 static bool trans_STR_zri(DisasContext *s, arg_rri *a)
4451 if (sve_access_check(s)) {
4452 int size = vec_full_reg_size(s);
4453 int off = vec_full_reg_offset(s, a->rd);
4454 do_str(s, off, size, a->rn, a->imm * size);
4456 return true;
4459 static bool trans_STR_pri(DisasContext *s, arg_rri *a)
4461 if (sve_access_check(s)) {
4462 int size = pred_full_reg_size(s);
4463 int off = pred_full_reg_offset(s, a->rd);
4464 do_str(s, off, size, a->rn, a->imm * size);
4466 return true;
4470 *** SVE Memory - Contiguous Load Group
4473 /* The memory mode of the dtype. */
4474 static const MemOp dtype_mop[16] = {
4475 MO_UB, MO_UB, MO_UB, MO_UB,
4476 MO_SL, MO_UW, MO_UW, MO_UW,
4477 MO_SW, MO_SW, MO_UL, MO_UL,
4478 MO_SB, MO_SB, MO_SB, MO_Q
4481 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4483 /* The vector element size of dtype. */
4484 static const uint8_t dtype_esz[16] = {
4485 0, 1, 2, 3,
4486 3, 1, 2, 3,
4487 3, 2, 2, 3,
4488 3, 2, 1, 3
4491 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4492 int dtype, uint32_t mte_n, bool is_write,
4493 gen_helper_gvec_mem *fn)
4495 unsigned vsz = vec_full_reg_size(s);
4496 TCGv_ptr t_pg;
4497 TCGv_i32 t_desc;
4498 int desc = 0;
4501 * For e.g. LD4, there are not enough arguments to pass all 4
4502 * registers as pointers, so encode the regno into the data field.
4503 * For consistency, do this even for LD1.
4505 if (s->mte_active[0]) {
4506 int msz = dtype_msz(dtype);
4508 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
4509 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4510 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4511 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
4512 desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz);
4513 desc = FIELD_DP32(desc, MTEDESC, TSIZE, mte_n << msz);
4514 desc <<= SVE_MTEDESC_SHIFT;
4515 } else {
4516 addr = clean_data_tbi(s, addr);
4519 desc = simd_desc(vsz, vsz, zt | desc);
4520 t_desc = tcg_const_i32(desc);
4521 t_pg = tcg_temp_new_ptr();
4523 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4524 fn(cpu_env, t_pg, addr, t_desc);
4526 tcg_temp_free_ptr(t_pg);
4527 tcg_temp_free_i32(t_desc);
4530 static void do_ld_zpa(DisasContext *s, int zt, int pg,
4531 TCGv_i64 addr, int dtype, int nreg)
4533 static gen_helper_gvec_mem * const fns[2][2][16][4] = {
4534 { /* mte inactive, little-endian */
4535 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4536 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4537 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4538 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4539 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4541 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4542 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4543 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4544 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4545 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4547 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4548 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4549 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4550 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4551 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4553 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4554 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4555 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4556 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4557 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4559 /* mte inactive, big-endian */
4560 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4561 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4562 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4563 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4564 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4566 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4567 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4568 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4569 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4570 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4572 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4573 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4574 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4575 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4576 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4578 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4579 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4580 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4581 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4582 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
4584 { /* mte active, little-endian */
4585 { { gen_helper_sve_ld1bb_r_mte,
4586 gen_helper_sve_ld2bb_r_mte,
4587 gen_helper_sve_ld3bb_r_mte,
4588 gen_helper_sve_ld4bb_r_mte },
4589 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4590 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4591 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4593 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
4594 { gen_helper_sve_ld1hh_le_r_mte,
4595 gen_helper_sve_ld2hh_le_r_mte,
4596 gen_helper_sve_ld3hh_le_r_mte,
4597 gen_helper_sve_ld4hh_le_r_mte },
4598 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
4599 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
4601 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
4602 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
4603 { gen_helper_sve_ld1ss_le_r_mte,
4604 gen_helper_sve_ld2ss_le_r_mte,
4605 gen_helper_sve_ld3ss_le_r_mte,
4606 gen_helper_sve_ld4ss_le_r_mte },
4607 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
4609 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4610 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4611 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4612 { gen_helper_sve_ld1dd_le_r_mte,
4613 gen_helper_sve_ld2dd_le_r_mte,
4614 gen_helper_sve_ld3dd_le_r_mte,
4615 gen_helper_sve_ld4dd_le_r_mte } },
4617 /* mte active, big-endian */
4618 { { gen_helper_sve_ld1bb_r_mte,
4619 gen_helper_sve_ld2bb_r_mte,
4620 gen_helper_sve_ld3bb_r_mte,
4621 gen_helper_sve_ld4bb_r_mte },
4622 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4623 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4624 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4626 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
4627 { gen_helper_sve_ld1hh_be_r_mte,
4628 gen_helper_sve_ld2hh_be_r_mte,
4629 gen_helper_sve_ld3hh_be_r_mte,
4630 gen_helper_sve_ld4hh_be_r_mte },
4631 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
4632 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
4634 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
4635 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
4636 { gen_helper_sve_ld1ss_be_r_mte,
4637 gen_helper_sve_ld2ss_be_r_mte,
4638 gen_helper_sve_ld3ss_be_r_mte,
4639 gen_helper_sve_ld4ss_be_r_mte },
4640 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
4642 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4643 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4644 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4645 { gen_helper_sve_ld1dd_be_r_mte,
4646 gen_helper_sve_ld2dd_be_r_mte,
4647 gen_helper_sve_ld3dd_be_r_mte,
4648 gen_helper_sve_ld4dd_be_r_mte } } },
4650 gen_helper_gvec_mem *fn
4651 = fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
4654 * While there are holes in the table, they are not
4655 * accessible via the instruction encoding.
4657 assert(fn != NULL);
4658 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
4661 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
4663 if (a->rm == 31) {
4664 return false;
4666 if (sve_access_check(s)) {
4667 TCGv_i64 addr = new_tmp_a64(s);
4668 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4669 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4670 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4672 return true;
4675 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
4677 if (sve_access_check(s)) {
4678 int vsz = vec_full_reg_size(s);
4679 int elements = vsz >> dtype_esz[a->dtype];
4680 TCGv_i64 addr = new_tmp_a64(s);
4682 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4683 (a->imm * elements * (a->nreg + 1))
4684 << dtype_msz(a->dtype));
4685 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4687 return true;
4690 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
4692 static gen_helper_gvec_mem * const fns[2][2][16] = {
4693 { /* mte inactive, little-endian */
4694 { gen_helper_sve_ldff1bb_r,
4695 gen_helper_sve_ldff1bhu_r,
4696 gen_helper_sve_ldff1bsu_r,
4697 gen_helper_sve_ldff1bdu_r,
4699 gen_helper_sve_ldff1sds_le_r,
4700 gen_helper_sve_ldff1hh_le_r,
4701 gen_helper_sve_ldff1hsu_le_r,
4702 gen_helper_sve_ldff1hdu_le_r,
4704 gen_helper_sve_ldff1hds_le_r,
4705 gen_helper_sve_ldff1hss_le_r,
4706 gen_helper_sve_ldff1ss_le_r,
4707 gen_helper_sve_ldff1sdu_le_r,
4709 gen_helper_sve_ldff1bds_r,
4710 gen_helper_sve_ldff1bss_r,
4711 gen_helper_sve_ldff1bhs_r,
4712 gen_helper_sve_ldff1dd_le_r },
4714 /* mte inactive, big-endian */
4715 { gen_helper_sve_ldff1bb_r,
4716 gen_helper_sve_ldff1bhu_r,
4717 gen_helper_sve_ldff1bsu_r,
4718 gen_helper_sve_ldff1bdu_r,
4720 gen_helper_sve_ldff1sds_be_r,
4721 gen_helper_sve_ldff1hh_be_r,
4722 gen_helper_sve_ldff1hsu_be_r,
4723 gen_helper_sve_ldff1hdu_be_r,
4725 gen_helper_sve_ldff1hds_be_r,
4726 gen_helper_sve_ldff1hss_be_r,
4727 gen_helper_sve_ldff1ss_be_r,
4728 gen_helper_sve_ldff1sdu_be_r,
4730 gen_helper_sve_ldff1bds_r,
4731 gen_helper_sve_ldff1bss_r,
4732 gen_helper_sve_ldff1bhs_r,
4733 gen_helper_sve_ldff1dd_be_r } },
4735 { /* mte active, little-endian */
4736 { gen_helper_sve_ldff1bb_r_mte,
4737 gen_helper_sve_ldff1bhu_r_mte,
4738 gen_helper_sve_ldff1bsu_r_mte,
4739 gen_helper_sve_ldff1bdu_r_mte,
4741 gen_helper_sve_ldff1sds_le_r_mte,
4742 gen_helper_sve_ldff1hh_le_r_mte,
4743 gen_helper_sve_ldff1hsu_le_r_mte,
4744 gen_helper_sve_ldff1hdu_le_r_mte,
4746 gen_helper_sve_ldff1hds_le_r_mte,
4747 gen_helper_sve_ldff1hss_le_r_mte,
4748 gen_helper_sve_ldff1ss_le_r_mte,
4749 gen_helper_sve_ldff1sdu_le_r_mte,
4751 gen_helper_sve_ldff1bds_r_mte,
4752 gen_helper_sve_ldff1bss_r_mte,
4753 gen_helper_sve_ldff1bhs_r_mte,
4754 gen_helper_sve_ldff1dd_le_r_mte },
4756 /* mte active, big-endian */
4757 { gen_helper_sve_ldff1bb_r_mte,
4758 gen_helper_sve_ldff1bhu_r_mte,
4759 gen_helper_sve_ldff1bsu_r_mte,
4760 gen_helper_sve_ldff1bdu_r_mte,
4762 gen_helper_sve_ldff1sds_be_r_mte,
4763 gen_helper_sve_ldff1hh_be_r_mte,
4764 gen_helper_sve_ldff1hsu_be_r_mte,
4765 gen_helper_sve_ldff1hdu_be_r_mte,
4767 gen_helper_sve_ldff1hds_be_r_mte,
4768 gen_helper_sve_ldff1hss_be_r_mte,
4769 gen_helper_sve_ldff1ss_be_r_mte,
4770 gen_helper_sve_ldff1sdu_be_r_mte,
4772 gen_helper_sve_ldff1bds_r_mte,
4773 gen_helper_sve_ldff1bss_r_mte,
4774 gen_helper_sve_ldff1bhs_r_mte,
4775 gen_helper_sve_ldff1dd_be_r_mte } },
4778 if (sve_access_check(s)) {
4779 TCGv_i64 addr = new_tmp_a64(s);
4780 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4781 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4782 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4783 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
4785 return true;
4788 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
4790 static gen_helper_gvec_mem * const fns[2][2][16] = {
4791 { /* mte inactive, little-endian */
4792 { gen_helper_sve_ldnf1bb_r,
4793 gen_helper_sve_ldnf1bhu_r,
4794 gen_helper_sve_ldnf1bsu_r,
4795 gen_helper_sve_ldnf1bdu_r,
4797 gen_helper_sve_ldnf1sds_le_r,
4798 gen_helper_sve_ldnf1hh_le_r,
4799 gen_helper_sve_ldnf1hsu_le_r,
4800 gen_helper_sve_ldnf1hdu_le_r,
4802 gen_helper_sve_ldnf1hds_le_r,
4803 gen_helper_sve_ldnf1hss_le_r,
4804 gen_helper_sve_ldnf1ss_le_r,
4805 gen_helper_sve_ldnf1sdu_le_r,
4807 gen_helper_sve_ldnf1bds_r,
4808 gen_helper_sve_ldnf1bss_r,
4809 gen_helper_sve_ldnf1bhs_r,
4810 gen_helper_sve_ldnf1dd_le_r },
4812 /* mte inactive, big-endian */
4813 { gen_helper_sve_ldnf1bb_r,
4814 gen_helper_sve_ldnf1bhu_r,
4815 gen_helper_sve_ldnf1bsu_r,
4816 gen_helper_sve_ldnf1bdu_r,
4818 gen_helper_sve_ldnf1sds_be_r,
4819 gen_helper_sve_ldnf1hh_be_r,
4820 gen_helper_sve_ldnf1hsu_be_r,
4821 gen_helper_sve_ldnf1hdu_be_r,
4823 gen_helper_sve_ldnf1hds_be_r,
4824 gen_helper_sve_ldnf1hss_be_r,
4825 gen_helper_sve_ldnf1ss_be_r,
4826 gen_helper_sve_ldnf1sdu_be_r,
4828 gen_helper_sve_ldnf1bds_r,
4829 gen_helper_sve_ldnf1bss_r,
4830 gen_helper_sve_ldnf1bhs_r,
4831 gen_helper_sve_ldnf1dd_be_r } },
4833 { /* mte inactive, little-endian */
4834 { gen_helper_sve_ldnf1bb_r_mte,
4835 gen_helper_sve_ldnf1bhu_r_mte,
4836 gen_helper_sve_ldnf1bsu_r_mte,
4837 gen_helper_sve_ldnf1bdu_r_mte,
4839 gen_helper_sve_ldnf1sds_le_r_mte,
4840 gen_helper_sve_ldnf1hh_le_r_mte,
4841 gen_helper_sve_ldnf1hsu_le_r_mte,
4842 gen_helper_sve_ldnf1hdu_le_r_mte,
4844 gen_helper_sve_ldnf1hds_le_r_mte,
4845 gen_helper_sve_ldnf1hss_le_r_mte,
4846 gen_helper_sve_ldnf1ss_le_r_mte,
4847 gen_helper_sve_ldnf1sdu_le_r_mte,
4849 gen_helper_sve_ldnf1bds_r_mte,
4850 gen_helper_sve_ldnf1bss_r_mte,
4851 gen_helper_sve_ldnf1bhs_r_mte,
4852 gen_helper_sve_ldnf1dd_le_r_mte },
4854 /* mte inactive, big-endian */
4855 { gen_helper_sve_ldnf1bb_r_mte,
4856 gen_helper_sve_ldnf1bhu_r_mte,
4857 gen_helper_sve_ldnf1bsu_r_mte,
4858 gen_helper_sve_ldnf1bdu_r_mte,
4860 gen_helper_sve_ldnf1sds_be_r_mte,
4861 gen_helper_sve_ldnf1hh_be_r_mte,
4862 gen_helper_sve_ldnf1hsu_be_r_mte,
4863 gen_helper_sve_ldnf1hdu_be_r_mte,
4865 gen_helper_sve_ldnf1hds_be_r_mte,
4866 gen_helper_sve_ldnf1hss_be_r_mte,
4867 gen_helper_sve_ldnf1ss_be_r_mte,
4868 gen_helper_sve_ldnf1sdu_be_r_mte,
4870 gen_helper_sve_ldnf1bds_r_mte,
4871 gen_helper_sve_ldnf1bss_r_mte,
4872 gen_helper_sve_ldnf1bhs_r_mte,
4873 gen_helper_sve_ldnf1dd_be_r_mte } },
4876 if (sve_access_check(s)) {
4877 int vsz = vec_full_reg_size(s);
4878 int elements = vsz >> dtype_esz[a->dtype];
4879 int off = (a->imm * elements) << dtype_msz(a->dtype);
4880 TCGv_i64 addr = new_tmp_a64(s);
4882 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4883 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4884 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
4886 return true;
4889 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4891 static gen_helper_gvec_mem * const fns[2][4] = {
4892 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_le_r,
4893 gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld1dd_le_r },
4894 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_be_r,
4895 gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld1dd_be_r },
4897 unsigned vsz = vec_full_reg_size(s);
4898 TCGv_ptr t_pg;
4899 TCGv_i32 t_desc;
4900 int desc, poff;
4902 /* Load the first quadword using the normal predicated load helpers. */
4903 desc = simd_desc(16, 16, zt);
4904 t_desc = tcg_const_i32(desc);
4906 poff = pred_full_reg_offset(s, pg);
4907 if (vsz > 16) {
4909 * Zero-extend the first 16 bits of the predicate into a temporary.
4910 * This avoids triggering an assert making sure we don't have bits
4911 * set within a predicate beyond VQ, but we have lowered VQ to 1
4912 * for this load operation.
4914 TCGv_i64 tmp = tcg_temp_new_i64();
4915 #ifdef HOST_WORDS_BIGENDIAN
4916 poff += 6;
4917 #endif
4918 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
4920 poff = offsetof(CPUARMState, vfp.preg_tmp);
4921 tcg_gen_st_i64(tmp, cpu_env, poff);
4922 tcg_temp_free_i64(tmp);
4925 t_pg = tcg_temp_new_ptr();
4926 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
4928 fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc);
4930 tcg_temp_free_ptr(t_pg);
4931 tcg_temp_free_i32(t_desc);
4933 /* Replicate that first quadword. */
4934 if (vsz > 16) {
4935 unsigned dofs = vec_full_reg_offset(s, zt);
4936 tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4940 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
4942 if (a->rm == 31) {
4943 return false;
4945 if (sve_access_check(s)) {
4946 int msz = dtype_msz(a->dtype);
4947 TCGv_i64 addr = new_tmp_a64(s);
4948 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4949 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4950 do_ldrq(s, a->rd, a->pg, addr, msz);
4952 return true;
4955 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
4957 if (sve_access_check(s)) {
4958 TCGv_i64 addr = new_tmp_a64(s);
4959 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4960 do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4962 return true;
4965 /* Load and broadcast element. */
4966 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
4968 unsigned vsz = vec_full_reg_size(s);
4969 unsigned psz = pred_full_reg_size(s);
4970 unsigned esz = dtype_esz[a->dtype];
4971 unsigned msz = dtype_msz(a->dtype);
4972 TCGLabel *over;
4973 TCGv_i64 temp, clean_addr;
4975 if (!sve_access_check(s)) {
4976 return true;
4979 over = gen_new_label();
4981 /* If the guarding predicate has no bits set, no load occurs. */
4982 if (psz <= 8) {
4983 /* Reduce the pred_esz_masks value simply to reduce the
4984 * size of the code generated here.
4986 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4987 temp = tcg_temp_new_i64();
4988 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4989 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4990 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4991 tcg_temp_free_i64(temp);
4992 } else {
4993 TCGv_i32 t32 = tcg_temp_new_i32();
4994 find_last_active(s, t32, esz, a->pg);
4995 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4996 tcg_temp_free_i32(t32);
4999 /* Load the data. */
5000 temp = tcg_temp_new_i64();
5001 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
5002 clean_addr = gen_mte_check1(s, temp, false, true, msz);
5004 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
5005 s->be_data | dtype_mop[a->dtype]);
5007 /* Broadcast to *all* elements. */
5008 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5009 vsz, vsz, temp);
5010 tcg_temp_free_i64(temp);
5012 /* Zero the inactive elements. */
5013 gen_set_label(over);
5014 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
5017 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5018 int msz, int esz, int nreg)
5020 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5021 { { { gen_helper_sve_st1bb_r,
5022 gen_helper_sve_st1bh_r,
5023 gen_helper_sve_st1bs_r,
5024 gen_helper_sve_st1bd_r },
5025 { NULL,
5026 gen_helper_sve_st1hh_le_r,
5027 gen_helper_sve_st1hs_le_r,
5028 gen_helper_sve_st1hd_le_r },
5029 { NULL, NULL,
5030 gen_helper_sve_st1ss_le_r,
5031 gen_helper_sve_st1sd_le_r },
5032 { NULL, NULL, NULL,
5033 gen_helper_sve_st1dd_le_r } },
5034 { { gen_helper_sve_st1bb_r,
5035 gen_helper_sve_st1bh_r,
5036 gen_helper_sve_st1bs_r,
5037 gen_helper_sve_st1bd_r },
5038 { NULL,
5039 gen_helper_sve_st1hh_be_r,
5040 gen_helper_sve_st1hs_be_r,
5041 gen_helper_sve_st1hd_be_r },
5042 { NULL, NULL,
5043 gen_helper_sve_st1ss_be_r,
5044 gen_helper_sve_st1sd_be_r },
5045 { NULL, NULL, NULL,
5046 gen_helper_sve_st1dd_be_r } } },
5048 { { { gen_helper_sve_st1bb_r_mte,
5049 gen_helper_sve_st1bh_r_mte,
5050 gen_helper_sve_st1bs_r_mte,
5051 gen_helper_sve_st1bd_r_mte },
5052 { NULL,
5053 gen_helper_sve_st1hh_le_r_mte,
5054 gen_helper_sve_st1hs_le_r_mte,
5055 gen_helper_sve_st1hd_le_r_mte },
5056 { NULL, NULL,
5057 gen_helper_sve_st1ss_le_r_mte,
5058 gen_helper_sve_st1sd_le_r_mte },
5059 { NULL, NULL, NULL,
5060 gen_helper_sve_st1dd_le_r_mte } },
5061 { { gen_helper_sve_st1bb_r_mte,
5062 gen_helper_sve_st1bh_r_mte,
5063 gen_helper_sve_st1bs_r_mte,
5064 gen_helper_sve_st1bd_r_mte },
5065 { NULL,
5066 gen_helper_sve_st1hh_be_r_mte,
5067 gen_helper_sve_st1hs_be_r_mte,
5068 gen_helper_sve_st1hd_be_r_mte },
5069 { NULL, NULL,
5070 gen_helper_sve_st1ss_be_r_mte,
5071 gen_helper_sve_st1sd_be_r_mte },
5072 { NULL, NULL, NULL,
5073 gen_helper_sve_st1dd_be_r_mte } } },
5075 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5076 { { { gen_helper_sve_st2bb_r,
5077 gen_helper_sve_st2hh_le_r,
5078 gen_helper_sve_st2ss_le_r,
5079 gen_helper_sve_st2dd_le_r },
5080 { gen_helper_sve_st3bb_r,
5081 gen_helper_sve_st3hh_le_r,
5082 gen_helper_sve_st3ss_le_r,
5083 gen_helper_sve_st3dd_le_r },
5084 { gen_helper_sve_st4bb_r,
5085 gen_helper_sve_st4hh_le_r,
5086 gen_helper_sve_st4ss_le_r,
5087 gen_helper_sve_st4dd_le_r } },
5088 { { gen_helper_sve_st2bb_r,
5089 gen_helper_sve_st2hh_be_r,
5090 gen_helper_sve_st2ss_be_r,
5091 gen_helper_sve_st2dd_be_r },
5092 { gen_helper_sve_st3bb_r,
5093 gen_helper_sve_st3hh_be_r,
5094 gen_helper_sve_st3ss_be_r,
5095 gen_helper_sve_st3dd_be_r },
5096 { gen_helper_sve_st4bb_r,
5097 gen_helper_sve_st4hh_be_r,
5098 gen_helper_sve_st4ss_be_r,
5099 gen_helper_sve_st4dd_be_r } } },
5100 { { { gen_helper_sve_st2bb_r_mte,
5101 gen_helper_sve_st2hh_le_r_mte,
5102 gen_helper_sve_st2ss_le_r_mte,
5103 gen_helper_sve_st2dd_le_r_mte },
5104 { gen_helper_sve_st3bb_r_mte,
5105 gen_helper_sve_st3hh_le_r_mte,
5106 gen_helper_sve_st3ss_le_r_mte,
5107 gen_helper_sve_st3dd_le_r_mte },
5108 { gen_helper_sve_st4bb_r_mte,
5109 gen_helper_sve_st4hh_le_r_mte,
5110 gen_helper_sve_st4ss_le_r_mte,
5111 gen_helper_sve_st4dd_le_r_mte } },
5112 { { gen_helper_sve_st2bb_r_mte,
5113 gen_helper_sve_st2hh_be_r_mte,
5114 gen_helper_sve_st2ss_be_r_mte,
5115 gen_helper_sve_st2dd_be_r_mte },
5116 { gen_helper_sve_st3bb_r_mte,
5117 gen_helper_sve_st3hh_be_r_mte,
5118 gen_helper_sve_st3ss_be_r_mte,
5119 gen_helper_sve_st3dd_be_r_mte },
5120 { gen_helper_sve_st4bb_r_mte,
5121 gen_helper_sve_st4hh_be_r_mte,
5122 gen_helper_sve_st4ss_be_r_mte,
5123 gen_helper_sve_st4dd_be_r_mte } } },
5125 gen_helper_gvec_mem *fn;
5126 int be = s->be_data == MO_BE;
5128 if (nreg == 0) {
5129 /* ST1 */
5130 fn = fn_single[s->mte_active[0]][be][msz][esz];
5131 nreg = 1;
5132 } else {
5133 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5134 assert(msz == esz);
5135 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
5137 assert(fn != NULL);
5138 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
5141 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
5143 if (a->rm == 31 || a->msz > a->esz) {
5144 return false;
5146 if (sve_access_check(s)) {
5147 TCGv_i64 addr = new_tmp_a64(s);
5148 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
5149 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5150 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5152 return true;
5155 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
5157 if (a->msz > a->esz) {
5158 return false;
5160 if (sve_access_check(s)) {
5161 int vsz = vec_full_reg_size(s);
5162 int elements = vsz >> a->esz;
5163 TCGv_i64 addr = new_tmp_a64(s);
5165 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5166 (a->imm * elements * (a->nreg + 1)) << a->msz);
5167 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5169 return true;
5173 *** SVE gather loads / scatter stores
5176 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
5177 int scale, TCGv_i64 scalar, int msz, bool is_write,
5178 gen_helper_gvec_mem_scatter *fn)
5180 unsigned vsz = vec_full_reg_size(s);
5181 TCGv_ptr t_zm = tcg_temp_new_ptr();
5182 TCGv_ptr t_pg = tcg_temp_new_ptr();
5183 TCGv_ptr t_zt = tcg_temp_new_ptr();
5184 TCGv_i32 t_desc;
5185 int desc = 0;
5187 if (s->mte_active[0]) {
5188 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5189 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5190 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5191 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
5192 desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz);
5193 desc <<= SVE_MTEDESC_SHIFT;
5195 desc = simd_desc(vsz, vsz, desc | scale);
5196 t_desc = tcg_const_i32(desc);
5198 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5199 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5200 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
5201 fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
5203 tcg_temp_free_ptr(t_zt);
5204 tcg_temp_free_ptr(t_zm);
5205 tcg_temp_free_ptr(t_pg);
5206 tcg_temp_free_i32(t_desc);
5209 /* Indexed by [mte][be][ff][xs][u][msz]. */
5210 static gen_helper_gvec_mem_scatter * const
5211 gather_load_fn32[2][2][2][2][2][3] = {
5212 { /* MTE Inactive */
5213 { /* Little-endian */
5214 { { { gen_helper_sve_ldbss_zsu,
5215 gen_helper_sve_ldhss_le_zsu,
5216 NULL, },
5217 { gen_helper_sve_ldbsu_zsu,
5218 gen_helper_sve_ldhsu_le_zsu,
5219 gen_helper_sve_ldss_le_zsu, } },
5220 { { gen_helper_sve_ldbss_zss,
5221 gen_helper_sve_ldhss_le_zss,
5222 NULL, },
5223 { gen_helper_sve_ldbsu_zss,
5224 gen_helper_sve_ldhsu_le_zss,
5225 gen_helper_sve_ldss_le_zss, } } },
5227 /* First-fault */
5228 { { { gen_helper_sve_ldffbss_zsu,
5229 gen_helper_sve_ldffhss_le_zsu,
5230 NULL, },
5231 { gen_helper_sve_ldffbsu_zsu,
5232 gen_helper_sve_ldffhsu_le_zsu,
5233 gen_helper_sve_ldffss_le_zsu, } },
5234 { { gen_helper_sve_ldffbss_zss,
5235 gen_helper_sve_ldffhss_le_zss,
5236 NULL, },
5237 { gen_helper_sve_ldffbsu_zss,
5238 gen_helper_sve_ldffhsu_le_zss,
5239 gen_helper_sve_ldffss_le_zss, } } } },
5241 { /* Big-endian */
5242 { { { gen_helper_sve_ldbss_zsu,
5243 gen_helper_sve_ldhss_be_zsu,
5244 NULL, },
5245 { gen_helper_sve_ldbsu_zsu,
5246 gen_helper_sve_ldhsu_be_zsu,
5247 gen_helper_sve_ldss_be_zsu, } },
5248 { { gen_helper_sve_ldbss_zss,
5249 gen_helper_sve_ldhss_be_zss,
5250 NULL, },
5251 { gen_helper_sve_ldbsu_zss,
5252 gen_helper_sve_ldhsu_be_zss,
5253 gen_helper_sve_ldss_be_zss, } } },
5255 /* First-fault */
5256 { { { gen_helper_sve_ldffbss_zsu,
5257 gen_helper_sve_ldffhss_be_zsu,
5258 NULL, },
5259 { gen_helper_sve_ldffbsu_zsu,
5260 gen_helper_sve_ldffhsu_be_zsu,
5261 gen_helper_sve_ldffss_be_zsu, } },
5262 { { gen_helper_sve_ldffbss_zss,
5263 gen_helper_sve_ldffhss_be_zss,
5264 NULL, },
5265 { gen_helper_sve_ldffbsu_zss,
5266 gen_helper_sve_ldffhsu_be_zss,
5267 gen_helper_sve_ldffss_be_zss, } } } } },
5268 { /* MTE Active */
5269 { /* Little-endian */
5270 { { { gen_helper_sve_ldbss_zsu_mte,
5271 gen_helper_sve_ldhss_le_zsu_mte,
5272 NULL, },
5273 { gen_helper_sve_ldbsu_zsu_mte,
5274 gen_helper_sve_ldhsu_le_zsu_mte,
5275 gen_helper_sve_ldss_le_zsu_mte, } },
5276 { { gen_helper_sve_ldbss_zss_mte,
5277 gen_helper_sve_ldhss_le_zss_mte,
5278 NULL, },
5279 { gen_helper_sve_ldbsu_zss_mte,
5280 gen_helper_sve_ldhsu_le_zss_mte,
5281 gen_helper_sve_ldss_le_zss_mte, } } },
5283 /* First-fault */
5284 { { { gen_helper_sve_ldffbss_zsu_mte,
5285 gen_helper_sve_ldffhss_le_zsu_mte,
5286 NULL, },
5287 { gen_helper_sve_ldffbsu_zsu_mte,
5288 gen_helper_sve_ldffhsu_le_zsu_mte,
5289 gen_helper_sve_ldffss_le_zsu_mte, } },
5290 { { gen_helper_sve_ldffbss_zss_mte,
5291 gen_helper_sve_ldffhss_le_zss_mte,
5292 NULL, },
5293 { gen_helper_sve_ldffbsu_zss_mte,
5294 gen_helper_sve_ldffhsu_le_zss_mte,
5295 gen_helper_sve_ldffss_le_zss_mte, } } } },
5297 { /* Big-endian */
5298 { { { gen_helper_sve_ldbss_zsu_mte,
5299 gen_helper_sve_ldhss_be_zsu_mte,
5300 NULL, },
5301 { gen_helper_sve_ldbsu_zsu_mte,
5302 gen_helper_sve_ldhsu_be_zsu_mte,
5303 gen_helper_sve_ldss_be_zsu_mte, } },
5304 { { gen_helper_sve_ldbss_zss_mte,
5305 gen_helper_sve_ldhss_be_zss_mte,
5306 NULL, },
5307 { gen_helper_sve_ldbsu_zss_mte,
5308 gen_helper_sve_ldhsu_be_zss_mte,
5309 gen_helper_sve_ldss_be_zss_mte, } } },
5311 /* First-fault */
5312 { { { gen_helper_sve_ldffbss_zsu_mte,
5313 gen_helper_sve_ldffhss_be_zsu_mte,
5314 NULL, },
5315 { gen_helper_sve_ldffbsu_zsu_mte,
5316 gen_helper_sve_ldffhsu_be_zsu_mte,
5317 gen_helper_sve_ldffss_be_zsu_mte, } },
5318 { { gen_helper_sve_ldffbss_zss_mte,
5319 gen_helper_sve_ldffhss_be_zss_mte,
5320 NULL, },
5321 { gen_helper_sve_ldffbsu_zss_mte,
5322 gen_helper_sve_ldffhsu_be_zss_mte,
5323 gen_helper_sve_ldffss_be_zss_mte, } } } } },
5326 /* Note that we overload xs=2 to indicate 64-bit offset. */
5327 static gen_helper_gvec_mem_scatter * const
5328 gather_load_fn64[2][2][2][3][2][4] = {
5329 { /* MTE Inactive */
5330 { /* Little-endian */
5331 { { { gen_helper_sve_ldbds_zsu,
5332 gen_helper_sve_ldhds_le_zsu,
5333 gen_helper_sve_ldsds_le_zsu,
5334 NULL, },
5335 { gen_helper_sve_ldbdu_zsu,
5336 gen_helper_sve_ldhdu_le_zsu,
5337 gen_helper_sve_ldsdu_le_zsu,
5338 gen_helper_sve_lddd_le_zsu, } },
5339 { { gen_helper_sve_ldbds_zss,
5340 gen_helper_sve_ldhds_le_zss,
5341 gen_helper_sve_ldsds_le_zss,
5342 NULL, },
5343 { gen_helper_sve_ldbdu_zss,
5344 gen_helper_sve_ldhdu_le_zss,
5345 gen_helper_sve_ldsdu_le_zss,
5346 gen_helper_sve_lddd_le_zss, } },
5347 { { gen_helper_sve_ldbds_zd,
5348 gen_helper_sve_ldhds_le_zd,
5349 gen_helper_sve_ldsds_le_zd,
5350 NULL, },
5351 { gen_helper_sve_ldbdu_zd,
5352 gen_helper_sve_ldhdu_le_zd,
5353 gen_helper_sve_ldsdu_le_zd,
5354 gen_helper_sve_lddd_le_zd, } } },
5356 /* First-fault */
5357 { { { gen_helper_sve_ldffbds_zsu,
5358 gen_helper_sve_ldffhds_le_zsu,
5359 gen_helper_sve_ldffsds_le_zsu,
5360 NULL, },
5361 { gen_helper_sve_ldffbdu_zsu,
5362 gen_helper_sve_ldffhdu_le_zsu,
5363 gen_helper_sve_ldffsdu_le_zsu,
5364 gen_helper_sve_ldffdd_le_zsu, } },
5365 { { gen_helper_sve_ldffbds_zss,
5366 gen_helper_sve_ldffhds_le_zss,
5367 gen_helper_sve_ldffsds_le_zss,
5368 NULL, },
5369 { gen_helper_sve_ldffbdu_zss,
5370 gen_helper_sve_ldffhdu_le_zss,
5371 gen_helper_sve_ldffsdu_le_zss,
5372 gen_helper_sve_ldffdd_le_zss, } },
5373 { { gen_helper_sve_ldffbds_zd,
5374 gen_helper_sve_ldffhds_le_zd,
5375 gen_helper_sve_ldffsds_le_zd,
5376 NULL, },
5377 { gen_helper_sve_ldffbdu_zd,
5378 gen_helper_sve_ldffhdu_le_zd,
5379 gen_helper_sve_ldffsdu_le_zd,
5380 gen_helper_sve_ldffdd_le_zd, } } } },
5381 { /* Big-endian */
5382 { { { gen_helper_sve_ldbds_zsu,
5383 gen_helper_sve_ldhds_be_zsu,
5384 gen_helper_sve_ldsds_be_zsu,
5385 NULL, },
5386 { gen_helper_sve_ldbdu_zsu,
5387 gen_helper_sve_ldhdu_be_zsu,
5388 gen_helper_sve_ldsdu_be_zsu,
5389 gen_helper_sve_lddd_be_zsu, } },
5390 { { gen_helper_sve_ldbds_zss,
5391 gen_helper_sve_ldhds_be_zss,
5392 gen_helper_sve_ldsds_be_zss,
5393 NULL, },
5394 { gen_helper_sve_ldbdu_zss,
5395 gen_helper_sve_ldhdu_be_zss,
5396 gen_helper_sve_ldsdu_be_zss,
5397 gen_helper_sve_lddd_be_zss, } },
5398 { { gen_helper_sve_ldbds_zd,
5399 gen_helper_sve_ldhds_be_zd,
5400 gen_helper_sve_ldsds_be_zd,
5401 NULL, },
5402 { gen_helper_sve_ldbdu_zd,
5403 gen_helper_sve_ldhdu_be_zd,
5404 gen_helper_sve_ldsdu_be_zd,
5405 gen_helper_sve_lddd_be_zd, } } },
5407 /* First-fault */
5408 { { { gen_helper_sve_ldffbds_zsu,
5409 gen_helper_sve_ldffhds_be_zsu,
5410 gen_helper_sve_ldffsds_be_zsu,
5411 NULL, },
5412 { gen_helper_sve_ldffbdu_zsu,
5413 gen_helper_sve_ldffhdu_be_zsu,
5414 gen_helper_sve_ldffsdu_be_zsu,
5415 gen_helper_sve_ldffdd_be_zsu, } },
5416 { { gen_helper_sve_ldffbds_zss,
5417 gen_helper_sve_ldffhds_be_zss,
5418 gen_helper_sve_ldffsds_be_zss,
5419 NULL, },
5420 { gen_helper_sve_ldffbdu_zss,
5421 gen_helper_sve_ldffhdu_be_zss,
5422 gen_helper_sve_ldffsdu_be_zss,
5423 gen_helper_sve_ldffdd_be_zss, } },
5424 { { gen_helper_sve_ldffbds_zd,
5425 gen_helper_sve_ldffhds_be_zd,
5426 gen_helper_sve_ldffsds_be_zd,
5427 NULL, },
5428 { gen_helper_sve_ldffbdu_zd,
5429 gen_helper_sve_ldffhdu_be_zd,
5430 gen_helper_sve_ldffsdu_be_zd,
5431 gen_helper_sve_ldffdd_be_zd, } } } } },
5432 { /* MTE Active */
5433 { /* Little-endian */
5434 { { { gen_helper_sve_ldbds_zsu_mte,
5435 gen_helper_sve_ldhds_le_zsu_mte,
5436 gen_helper_sve_ldsds_le_zsu_mte,
5437 NULL, },
5438 { gen_helper_sve_ldbdu_zsu_mte,
5439 gen_helper_sve_ldhdu_le_zsu_mte,
5440 gen_helper_sve_ldsdu_le_zsu_mte,
5441 gen_helper_sve_lddd_le_zsu_mte, } },
5442 { { gen_helper_sve_ldbds_zss_mte,
5443 gen_helper_sve_ldhds_le_zss_mte,
5444 gen_helper_sve_ldsds_le_zss_mte,
5445 NULL, },
5446 { gen_helper_sve_ldbdu_zss_mte,
5447 gen_helper_sve_ldhdu_le_zss_mte,
5448 gen_helper_sve_ldsdu_le_zss_mte,
5449 gen_helper_sve_lddd_le_zss_mte, } },
5450 { { gen_helper_sve_ldbds_zd_mte,
5451 gen_helper_sve_ldhds_le_zd_mte,
5452 gen_helper_sve_ldsds_le_zd_mte,
5453 NULL, },
5454 { gen_helper_sve_ldbdu_zd_mte,
5455 gen_helper_sve_ldhdu_le_zd_mte,
5456 gen_helper_sve_ldsdu_le_zd_mte,
5457 gen_helper_sve_lddd_le_zd_mte, } } },
5459 /* First-fault */
5460 { { { gen_helper_sve_ldffbds_zsu_mte,
5461 gen_helper_sve_ldffhds_le_zsu_mte,
5462 gen_helper_sve_ldffsds_le_zsu_mte,
5463 NULL, },
5464 { gen_helper_sve_ldffbdu_zsu_mte,
5465 gen_helper_sve_ldffhdu_le_zsu_mte,
5466 gen_helper_sve_ldffsdu_le_zsu_mte,
5467 gen_helper_sve_ldffdd_le_zsu_mte, } },
5468 { { gen_helper_sve_ldffbds_zss_mte,
5469 gen_helper_sve_ldffhds_le_zss_mte,
5470 gen_helper_sve_ldffsds_le_zss_mte,
5471 NULL, },
5472 { gen_helper_sve_ldffbdu_zss_mte,
5473 gen_helper_sve_ldffhdu_le_zss_mte,
5474 gen_helper_sve_ldffsdu_le_zss_mte,
5475 gen_helper_sve_ldffdd_le_zss_mte, } },
5476 { { gen_helper_sve_ldffbds_zd_mte,
5477 gen_helper_sve_ldffhds_le_zd_mte,
5478 gen_helper_sve_ldffsds_le_zd_mte,
5479 NULL, },
5480 { gen_helper_sve_ldffbdu_zd_mte,
5481 gen_helper_sve_ldffhdu_le_zd_mte,
5482 gen_helper_sve_ldffsdu_le_zd_mte,
5483 gen_helper_sve_ldffdd_le_zd_mte, } } } },
5484 { /* Big-endian */
5485 { { { gen_helper_sve_ldbds_zsu_mte,
5486 gen_helper_sve_ldhds_be_zsu_mte,
5487 gen_helper_sve_ldsds_be_zsu_mte,
5488 NULL, },
5489 { gen_helper_sve_ldbdu_zsu_mte,
5490 gen_helper_sve_ldhdu_be_zsu_mte,
5491 gen_helper_sve_ldsdu_be_zsu_mte,
5492 gen_helper_sve_lddd_be_zsu_mte, } },
5493 { { gen_helper_sve_ldbds_zss_mte,
5494 gen_helper_sve_ldhds_be_zss_mte,
5495 gen_helper_sve_ldsds_be_zss_mte,
5496 NULL, },
5497 { gen_helper_sve_ldbdu_zss_mte,
5498 gen_helper_sve_ldhdu_be_zss_mte,
5499 gen_helper_sve_ldsdu_be_zss_mte,
5500 gen_helper_sve_lddd_be_zss_mte, } },
5501 { { gen_helper_sve_ldbds_zd_mte,
5502 gen_helper_sve_ldhds_be_zd_mte,
5503 gen_helper_sve_ldsds_be_zd_mte,
5504 NULL, },
5505 { gen_helper_sve_ldbdu_zd_mte,
5506 gen_helper_sve_ldhdu_be_zd_mte,
5507 gen_helper_sve_ldsdu_be_zd_mte,
5508 gen_helper_sve_lddd_be_zd_mte, } } },
5510 /* First-fault */
5511 { { { gen_helper_sve_ldffbds_zsu_mte,
5512 gen_helper_sve_ldffhds_be_zsu_mte,
5513 gen_helper_sve_ldffsds_be_zsu_mte,
5514 NULL, },
5515 { gen_helper_sve_ldffbdu_zsu_mte,
5516 gen_helper_sve_ldffhdu_be_zsu_mte,
5517 gen_helper_sve_ldffsdu_be_zsu_mte,
5518 gen_helper_sve_ldffdd_be_zsu_mte, } },
5519 { { gen_helper_sve_ldffbds_zss_mte,
5520 gen_helper_sve_ldffhds_be_zss_mte,
5521 gen_helper_sve_ldffsds_be_zss_mte,
5522 NULL, },
5523 { gen_helper_sve_ldffbdu_zss_mte,
5524 gen_helper_sve_ldffhdu_be_zss_mte,
5525 gen_helper_sve_ldffsdu_be_zss_mte,
5526 gen_helper_sve_ldffdd_be_zss_mte, } },
5527 { { gen_helper_sve_ldffbds_zd_mte,
5528 gen_helper_sve_ldffhds_be_zd_mte,
5529 gen_helper_sve_ldffsds_be_zd_mte,
5530 NULL, },
5531 { gen_helper_sve_ldffbdu_zd_mte,
5532 gen_helper_sve_ldffhdu_be_zd_mte,
5533 gen_helper_sve_ldffsdu_be_zd_mte,
5534 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
5537 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
5539 gen_helper_gvec_mem_scatter *fn = NULL;
5540 bool be = s->be_data == MO_BE;
5541 bool mte = s->mte_active[0];
5543 if (!sve_access_check(s)) {
5544 return true;
5547 switch (a->esz) {
5548 case MO_32:
5549 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
5550 break;
5551 case MO_64:
5552 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
5553 break;
5555 assert(fn != NULL);
5557 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5558 cpu_reg_sp(s, a->rn), a->msz, false, fn);
5559 return true;
5562 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
5564 gen_helper_gvec_mem_scatter *fn = NULL;
5565 bool be = s->be_data == MO_BE;
5566 bool mte = s->mte_active[0];
5567 TCGv_i64 imm;
5569 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5570 return false;
5572 if (!sve_access_check(s)) {
5573 return true;
5576 switch (a->esz) {
5577 case MO_32:
5578 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
5579 break;
5580 case MO_64:
5581 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
5582 break;
5584 assert(fn != NULL);
5586 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5587 * by loading the immediate into the scalar parameter.
5589 imm = tcg_const_i64(a->imm << a->msz);
5590 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, false, fn);
5591 tcg_temp_free_i64(imm);
5592 return true;
5595 /* Indexed by [mte][be][xs][msz]. */
5596 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
5597 { /* MTE Inactive */
5598 { /* Little-endian */
5599 { gen_helper_sve_stbs_zsu,
5600 gen_helper_sve_sths_le_zsu,
5601 gen_helper_sve_stss_le_zsu, },
5602 { gen_helper_sve_stbs_zss,
5603 gen_helper_sve_sths_le_zss,
5604 gen_helper_sve_stss_le_zss, } },
5605 { /* Big-endian */
5606 { gen_helper_sve_stbs_zsu,
5607 gen_helper_sve_sths_be_zsu,
5608 gen_helper_sve_stss_be_zsu, },
5609 { gen_helper_sve_stbs_zss,
5610 gen_helper_sve_sths_be_zss,
5611 gen_helper_sve_stss_be_zss, } } },
5612 { /* MTE Active */
5613 { /* Little-endian */
5614 { gen_helper_sve_stbs_zsu_mte,
5615 gen_helper_sve_sths_le_zsu_mte,
5616 gen_helper_sve_stss_le_zsu_mte, },
5617 { gen_helper_sve_stbs_zss_mte,
5618 gen_helper_sve_sths_le_zss_mte,
5619 gen_helper_sve_stss_le_zss_mte, } },
5620 { /* Big-endian */
5621 { gen_helper_sve_stbs_zsu_mte,
5622 gen_helper_sve_sths_be_zsu_mte,
5623 gen_helper_sve_stss_be_zsu_mte, },
5624 { gen_helper_sve_stbs_zss_mte,
5625 gen_helper_sve_sths_be_zss_mte,
5626 gen_helper_sve_stss_be_zss_mte, } } },
5629 /* Note that we overload xs=2 to indicate 64-bit offset. */
5630 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
5631 { /* MTE Inactive */
5632 { /* Little-endian */
5633 { gen_helper_sve_stbd_zsu,
5634 gen_helper_sve_sthd_le_zsu,
5635 gen_helper_sve_stsd_le_zsu,
5636 gen_helper_sve_stdd_le_zsu, },
5637 { gen_helper_sve_stbd_zss,
5638 gen_helper_sve_sthd_le_zss,
5639 gen_helper_sve_stsd_le_zss,
5640 gen_helper_sve_stdd_le_zss, },
5641 { gen_helper_sve_stbd_zd,
5642 gen_helper_sve_sthd_le_zd,
5643 gen_helper_sve_stsd_le_zd,
5644 gen_helper_sve_stdd_le_zd, } },
5645 { /* Big-endian */
5646 { gen_helper_sve_stbd_zsu,
5647 gen_helper_sve_sthd_be_zsu,
5648 gen_helper_sve_stsd_be_zsu,
5649 gen_helper_sve_stdd_be_zsu, },
5650 { gen_helper_sve_stbd_zss,
5651 gen_helper_sve_sthd_be_zss,
5652 gen_helper_sve_stsd_be_zss,
5653 gen_helper_sve_stdd_be_zss, },
5654 { gen_helper_sve_stbd_zd,
5655 gen_helper_sve_sthd_be_zd,
5656 gen_helper_sve_stsd_be_zd,
5657 gen_helper_sve_stdd_be_zd, } } },
5658 { /* MTE Inactive */
5659 { /* Little-endian */
5660 { gen_helper_sve_stbd_zsu_mte,
5661 gen_helper_sve_sthd_le_zsu_mte,
5662 gen_helper_sve_stsd_le_zsu_mte,
5663 gen_helper_sve_stdd_le_zsu_mte, },
5664 { gen_helper_sve_stbd_zss_mte,
5665 gen_helper_sve_sthd_le_zss_mte,
5666 gen_helper_sve_stsd_le_zss_mte,
5667 gen_helper_sve_stdd_le_zss_mte, },
5668 { gen_helper_sve_stbd_zd_mte,
5669 gen_helper_sve_sthd_le_zd_mte,
5670 gen_helper_sve_stsd_le_zd_mte,
5671 gen_helper_sve_stdd_le_zd_mte, } },
5672 { /* Big-endian */
5673 { gen_helper_sve_stbd_zsu_mte,
5674 gen_helper_sve_sthd_be_zsu_mte,
5675 gen_helper_sve_stsd_be_zsu_mte,
5676 gen_helper_sve_stdd_be_zsu_mte, },
5677 { gen_helper_sve_stbd_zss_mte,
5678 gen_helper_sve_sthd_be_zss_mte,
5679 gen_helper_sve_stsd_be_zss_mte,
5680 gen_helper_sve_stdd_be_zss_mte, },
5681 { gen_helper_sve_stbd_zd_mte,
5682 gen_helper_sve_sthd_be_zd_mte,
5683 gen_helper_sve_stsd_be_zd_mte,
5684 gen_helper_sve_stdd_be_zd_mte, } } },
5687 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
5689 gen_helper_gvec_mem_scatter *fn;
5690 bool be = s->be_data == MO_BE;
5691 bool mte = s->mte_active[0];
5693 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5694 return false;
5696 if (!sve_access_check(s)) {
5697 return true;
5699 switch (a->esz) {
5700 case MO_32:
5701 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
5702 break;
5703 case MO_64:
5704 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
5705 break;
5706 default:
5707 g_assert_not_reached();
5709 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5710 cpu_reg_sp(s, a->rn), a->msz, true, fn);
5711 return true;
5714 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
5716 gen_helper_gvec_mem_scatter *fn = NULL;
5717 bool be = s->be_data == MO_BE;
5718 bool mte = s->mte_active[0];
5719 TCGv_i64 imm;
5721 if (a->esz < a->msz) {
5722 return false;
5724 if (!sve_access_check(s)) {
5725 return true;
5728 switch (a->esz) {
5729 case MO_32:
5730 fn = scatter_store_fn32[mte][be][0][a->msz];
5731 break;
5732 case MO_64:
5733 fn = scatter_store_fn64[mte][be][2][a->msz];
5734 break;
5736 assert(fn != NULL);
5738 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5739 * by loading the immediate into the scalar parameter.
5741 imm = tcg_const_i64(a->imm << a->msz);
5742 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, true, fn);
5743 tcg_temp_free_i64(imm);
5744 return true;
5748 * Prefetches
5751 static bool trans_PRF(DisasContext *s, arg_PRF *a)
5753 /* Prefetch is a nop within QEMU. */
5754 (void)sve_access_check(s);
5755 return true;
5758 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
5760 if (a->rm == 31) {
5761 return false;
5763 /* Prefetch is a nop within QEMU. */
5764 (void)sve_access_check(s);
5765 return true;
5769 * Move Prefix
5771 * TODO: The implementation so far could handle predicated merging movprfx.
5772 * The helper functions as written take an extra source register to
5773 * use in the operation, but the result is only written when predication
5774 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
5775 * to allow the final write back to the destination to be unconditional.
5776 * For predicated zeroing movprfx, we need to rearrange the helpers to
5777 * allow the final write back to zero inactives.
5779 * In the meantime, just emit the moves.
5782 static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
5784 return do_mov_z(s, a->rd, a->rn);
5787 static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
5789 if (sve_access_check(s)) {
5790 do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
5792 return true;
5795 static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
5797 return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);