scripts: kernel-doc: allow passing desired Sphinx C domain dialect
[qemu/ar7.git] / target / arm / translate-sve.c
blob0c3a6d21210404a1340e58c9a02178e7fa048d5b
1 /*
2 * AArch64 SVE translation
4 * Copyright (c) 2018 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg/tcg-op.h"
24 #include "tcg/tcg-op-gvec.h"
25 #include "tcg/tcg-gvec-desc.h"
26 #include "qemu/log.h"
27 #include "arm_ldst.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
32 #include "exec/log.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
35 #include "fpu/softfloat.h"
38 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
39 TCGv_i64, uint32_t, uint32_t);
41 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
42 TCGv_ptr, TCGv_i32);
43 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
44 TCGv_ptr, TCGv_ptr, TCGv_i32);
46 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
47 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
48 TCGv_ptr, TCGv_i64, TCGv_i32);
51 * Helpers for extracting complex instruction fields.
54 /* See e.g. ASR (immediate, predicated).
55 * Returns -1 for unallocated encoding; diagnose later.
57 static int tszimm_esz(DisasContext *s, int x)
59 x >>= 3; /* discard imm3 */
60 return 31 - clz32(x);
63 static int tszimm_shr(DisasContext *s, int x)
65 return (16 << tszimm_esz(s, x)) - x;
68 /* See e.g. LSL (immediate, predicated). */
69 static int tszimm_shl(DisasContext *s, int x)
71 return x - (8 << tszimm_esz(s, x));
74 static inline int plus1(DisasContext *s, int x)
76 return x + 1;
79 /* The SH bit is in bit 8. Extract the low 8 and shift. */
80 static inline int expand_imm_sh8s(DisasContext *s, int x)
82 return (int8_t)x << (x & 0x100 ? 8 : 0);
85 static inline int expand_imm_sh8u(DisasContext *s, int x)
87 return (uint8_t)x << (x & 0x100 ? 8 : 0);
90 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
91 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
93 static inline int msz_dtype(DisasContext *s, int msz)
95 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
96 return dtype[msz];
100 * Include the generated decoder.
103 #include "decode-sve.c.inc"
106 * Implement all of the translator functions referenced by the decoder.
109 /* Return the offset info CPUARMState of the predicate vector register Pn.
110 * Note for this purpose, FFR is P16.
112 static inline int pred_full_reg_offset(DisasContext *s, int regno)
114 return offsetof(CPUARMState, vfp.pregs[regno]);
117 /* Return the byte size of the whole predicate register, VL / 64. */
118 static inline int pred_full_reg_size(DisasContext *s)
120 return s->sve_len >> 3;
123 /* Round up the size of a register to a size allowed by
124 * the tcg vector infrastructure. Any operation which uses this
125 * size may assume that the bits above pred_full_reg_size are zero,
126 * and must leave them the same way.
128 * Note that this is not needed for the vector registers as they
129 * are always properly sized for tcg vectors.
131 static int size_for_gvec(int size)
133 if (size <= 8) {
134 return 8;
135 } else {
136 return QEMU_ALIGN_UP(size, 16);
140 static int pred_gvec_reg_size(DisasContext *s)
142 return size_for_gvec(pred_full_reg_size(s));
145 /* Invoke an out-of-line helper on 2 Zregs. */
146 static void gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
147 int rd, int rn, int data)
149 unsigned vsz = vec_full_reg_size(s);
150 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
151 vec_full_reg_offset(s, rn),
152 vsz, vsz, data, fn);
155 /* Invoke an out-of-line helper on 3 Zregs. */
156 static void gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
157 int rd, int rn, int rm, int data)
159 unsigned vsz = vec_full_reg_size(s);
160 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
161 vec_full_reg_offset(s, rn),
162 vec_full_reg_offset(s, rm),
163 vsz, vsz, data, fn);
166 /* Invoke an out-of-line helper on 2 Zregs and a predicate. */
167 static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
168 int rd, int rn, int pg, int data)
170 unsigned vsz = vec_full_reg_size(s);
171 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
172 vec_full_reg_offset(s, rn),
173 pred_full_reg_offset(s, pg),
174 vsz, vsz, data, fn);
177 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */
178 static void gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
179 int rd, int rn, int rm, int pg, int data)
181 unsigned vsz = vec_full_reg_size(s);
182 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
183 vec_full_reg_offset(s, rn),
184 vec_full_reg_offset(s, rm),
185 pred_full_reg_offset(s, pg),
186 vsz, vsz, data, fn);
189 /* Invoke a vector expander on two Zregs. */
190 static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn,
191 int esz, int rd, int rn)
193 unsigned vsz = vec_full_reg_size(s);
194 gvec_fn(esz, vec_full_reg_offset(s, rd),
195 vec_full_reg_offset(s, rn), vsz, vsz);
198 /* Invoke a vector expander on three Zregs. */
199 static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
200 int esz, int rd, int rn, int rm)
202 unsigned vsz = vec_full_reg_size(s);
203 gvec_fn(esz, vec_full_reg_offset(s, rd),
204 vec_full_reg_offset(s, rn),
205 vec_full_reg_offset(s, rm), vsz, vsz);
208 /* Invoke a vector move on two Zregs. */
209 static bool do_mov_z(DisasContext *s, int rd, int rn)
211 if (sve_access_check(s)) {
212 gen_gvec_fn_zz(s, tcg_gen_gvec_mov, MO_8, rd, rn);
214 return true;
217 /* Initialize a Zreg with replications of a 64-bit immediate. */
218 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
220 unsigned vsz = vec_full_reg_size(s);
221 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
224 /* Invoke a vector expander on three Pregs. */
225 static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
226 int rd, int rn, int rm)
228 unsigned psz = pred_gvec_reg_size(s);
229 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
230 pred_full_reg_offset(s, rn),
231 pred_full_reg_offset(s, rm), psz, psz);
234 /* Invoke a vector move on two Pregs. */
235 static bool do_mov_p(DisasContext *s, int rd, int rn)
237 if (sve_access_check(s)) {
238 unsigned psz = pred_gvec_reg_size(s);
239 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
240 pred_full_reg_offset(s, rn), psz, psz);
242 return true;
245 /* Set the cpu flags as per a return from an SVE helper. */
246 static void do_pred_flags(TCGv_i32 t)
248 tcg_gen_mov_i32(cpu_NF, t);
249 tcg_gen_andi_i32(cpu_ZF, t, 2);
250 tcg_gen_andi_i32(cpu_CF, t, 1);
251 tcg_gen_movi_i32(cpu_VF, 0);
254 /* Subroutines computing the ARM PredTest psuedofunction. */
255 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
257 TCGv_i32 t = tcg_temp_new_i32();
259 gen_helper_sve_predtest1(t, d, g);
260 do_pred_flags(t);
261 tcg_temp_free_i32(t);
264 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
266 TCGv_ptr dptr = tcg_temp_new_ptr();
267 TCGv_ptr gptr = tcg_temp_new_ptr();
268 TCGv_i32 t;
270 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
271 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
272 t = tcg_const_i32(words);
274 gen_helper_sve_predtest(t, dptr, gptr, t);
275 tcg_temp_free_ptr(dptr);
276 tcg_temp_free_ptr(gptr);
278 do_pred_flags(t);
279 tcg_temp_free_i32(t);
282 /* For each element size, the bits within a predicate word that are active. */
283 const uint64_t pred_esz_masks[4] = {
284 0xffffffffffffffffull, 0x5555555555555555ull,
285 0x1111111111111111ull, 0x0101010101010101ull
289 *** SVE Logical - Unpredicated Group
292 static bool do_zzz_fn(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *gvec_fn)
294 if (sve_access_check(s)) {
295 gen_gvec_fn_zzz(s, gvec_fn, a->esz, a->rd, a->rn, a->rm);
297 return true;
300 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
302 return do_zzz_fn(s, a, tcg_gen_gvec_and);
305 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
307 return do_zzz_fn(s, a, tcg_gen_gvec_or);
310 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
312 return do_zzz_fn(s, a, tcg_gen_gvec_xor);
315 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
317 return do_zzz_fn(s, a, tcg_gen_gvec_andc);
321 *** SVE Integer Arithmetic - Unpredicated Group
324 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
326 return do_zzz_fn(s, a, tcg_gen_gvec_add);
329 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
331 return do_zzz_fn(s, a, tcg_gen_gvec_sub);
334 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
336 return do_zzz_fn(s, a, tcg_gen_gvec_ssadd);
339 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
341 return do_zzz_fn(s, a, tcg_gen_gvec_sssub);
344 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
346 return do_zzz_fn(s, a, tcg_gen_gvec_usadd);
349 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
351 return do_zzz_fn(s, a, tcg_gen_gvec_ussub);
355 *** SVE Integer Arithmetic - Binary Predicated Group
358 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
360 if (fn == NULL) {
361 return false;
363 if (sve_access_check(s)) {
364 gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0);
366 return true;
369 /* Select active elememnts from Zn and inactive elements from Zm,
370 * storing the result in Zd.
372 static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
374 static gen_helper_gvec_4 * const fns[4] = {
375 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
376 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
378 gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
381 #define DO_ZPZZ(NAME, name) \
382 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a) \
384 static gen_helper_gvec_4 * const fns[4] = { \
385 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
386 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
387 }; \
388 return do_zpzz_ool(s, a, fns[a->esz]); \
391 DO_ZPZZ(AND, and)
392 DO_ZPZZ(EOR, eor)
393 DO_ZPZZ(ORR, orr)
394 DO_ZPZZ(BIC, bic)
396 DO_ZPZZ(ADD, add)
397 DO_ZPZZ(SUB, sub)
399 DO_ZPZZ(SMAX, smax)
400 DO_ZPZZ(UMAX, umax)
401 DO_ZPZZ(SMIN, smin)
402 DO_ZPZZ(UMIN, umin)
403 DO_ZPZZ(SABD, sabd)
404 DO_ZPZZ(UABD, uabd)
406 DO_ZPZZ(MUL, mul)
407 DO_ZPZZ(SMULH, smulh)
408 DO_ZPZZ(UMULH, umulh)
410 DO_ZPZZ(ASR, asr)
411 DO_ZPZZ(LSR, lsr)
412 DO_ZPZZ(LSL, lsl)
414 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
416 static gen_helper_gvec_4 * const fns[4] = {
417 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
419 return do_zpzz_ool(s, a, fns[a->esz]);
422 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
424 static gen_helper_gvec_4 * const fns[4] = {
425 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
427 return do_zpzz_ool(s, a, fns[a->esz]);
430 static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
432 if (sve_access_check(s)) {
433 do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
435 return true;
438 #undef DO_ZPZZ
441 *** SVE Integer Arithmetic - Unary Predicated Group
444 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
446 if (fn == NULL) {
447 return false;
449 if (sve_access_check(s)) {
450 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, 0);
452 return true;
455 #define DO_ZPZ(NAME, name) \
456 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
458 static gen_helper_gvec_3 * const fns[4] = { \
459 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
460 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
461 }; \
462 return do_zpz_ool(s, a, fns[a->esz]); \
465 DO_ZPZ(CLS, cls)
466 DO_ZPZ(CLZ, clz)
467 DO_ZPZ(CNT_zpz, cnt_zpz)
468 DO_ZPZ(CNOT, cnot)
469 DO_ZPZ(NOT_zpz, not_zpz)
470 DO_ZPZ(ABS, abs)
471 DO_ZPZ(NEG, neg)
473 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
475 static gen_helper_gvec_3 * const fns[4] = {
476 NULL,
477 gen_helper_sve_fabs_h,
478 gen_helper_sve_fabs_s,
479 gen_helper_sve_fabs_d
481 return do_zpz_ool(s, a, fns[a->esz]);
484 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
486 static gen_helper_gvec_3 * const fns[4] = {
487 NULL,
488 gen_helper_sve_fneg_h,
489 gen_helper_sve_fneg_s,
490 gen_helper_sve_fneg_d
492 return do_zpz_ool(s, a, fns[a->esz]);
495 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
497 static gen_helper_gvec_3 * const fns[4] = {
498 NULL,
499 gen_helper_sve_sxtb_h,
500 gen_helper_sve_sxtb_s,
501 gen_helper_sve_sxtb_d
503 return do_zpz_ool(s, a, fns[a->esz]);
506 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
508 static gen_helper_gvec_3 * const fns[4] = {
509 NULL,
510 gen_helper_sve_uxtb_h,
511 gen_helper_sve_uxtb_s,
512 gen_helper_sve_uxtb_d
514 return do_zpz_ool(s, a, fns[a->esz]);
517 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
519 static gen_helper_gvec_3 * const fns[4] = {
520 NULL, NULL,
521 gen_helper_sve_sxth_s,
522 gen_helper_sve_sxth_d
524 return do_zpz_ool(s, a, fns[a->esz]);
527 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
529 static gen_helper_gvec_3 * const fns[4] = {
530 NULL, NULL,
531 gen_helper_sve_uxth_s,
532 gen_helper_sve_uxth_d
534 return do_zpz_ool(s, a, fns[a->esz]);
537 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
539 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
542 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
544 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
547 #undef DO_ZPZ
550 *** SVE Integer Reduction Group
553 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
554 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
555 gen_helper_gvec_reduc *fn)
557 unsigned vsz = vec_full_reg_size(s);
558 TCGv_ptr t_zn, t_pg;
559 TCGv_i32 desc;
560 TCGv_i64 temp;
562 if (fn == NULL) {
563 return false;
565 if (!sve_access_check(s)) {
566 return true;
569 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
570 temp = tcg_temp_new_i64();
571 t_zn = tcg_temp_new_ptr();
572 t_pg = tcg_temp_new_ptr();
574 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
575 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
576 fn(temp, t_zn, t_pg, desc);
577 tcg_temp_free_ptr(t_zn);
578 tcg_temp_free_ptr(t_pg);
579 tcg_temp_free_i32(desc);
581 write_fp_dreg(s, a->rd, temp);
582 tcg_temp_free_i64(temp);
583 return true;
586 #define DO_VPZ(NAME, name) \
587 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
589 static gen_helper_gvec_reduc * const fns[4] = { \
590 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
591 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
592 }; \
593 return do_vpz_ool(s, a, fns[a->esz]); \
596 DO_VPZ(ORV, orv)
597 DO_VPZ(ANDV, andv)
598 DO_VPZ(EORV, eorv)
600 DO_VPZ(UADDV, uaddv)
601 DO_VPZ(SMAXV, smaxv)
602 DO_VPZ(UMAXV, umaxv)
603 DO_VPZ(SMINV, sminv)
604 DO_VPZ(UMINV, uminv)
606 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
608 static gen_helper_gvec_reduc * const fns[4] = {
609 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
610 gen_helper_sve_saddv_s, NULL
612 return do_vpz_ool(s, a, fns[a->esz]);
615 #undef DO_VPZ
618 *** SVE Shift by Immediate - Predicated Group
622 * Copy Zn into Zd, storing zeros into inactive elements.
623 * If invert, store zeros into the active elements.
625 static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
626 int esz, bool invert)
628 static gen_helper_gvec_3 * const fns[4] = {
629 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
630 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
633 if (sve_access_check(s)) {
634 gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
636 return true;
639 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
640 gen_helper_gvec_3 *fn)
642 if (sve_access_check(s)) {
643 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
645 return true;
648 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
650 static gen_helper_gvec_3 * const fns[4] = {
651 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
652 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
654 if (a->esz < 0) {
655 /* Invalid tsz encoding -- see tszimm_esz. */
656 return false;
658 /* Shift by element size is architecturally valid. For
659 arithmetic right-shift, it's the same as by one less. */
660 a->imm = MIN(a->imm, (8 << a->esz) - 1);
661 return do_zpzi_ool(s, a, fns[a->esz]);
664 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
666 static gen_helper_gvec_3 * const fns[4] = {
667 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
668 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
670 if (a->esz < 0) {
671 return false;
673 /* Shift by element size is architecturally valid.
674 For logical shifts, it is a zeroing operation. */
675 if (a->imm >= (8 << a->esz)) {
676 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
677 } else {
678 return do_zpzi_ool(s, a, fns[a->esz]);
682 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
684 static gen_helper_gvec_3 * const fns[4] = {
685 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
686 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
688 if (a->esz < 0) {
689 return false;
691 /* Shift by element size is architecturally valid.
692 For logical shifts, it is a zeroing operation. */
693 if (a->imm >= (8 << a->esz)) {
694 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
695 } else {
696 return do_zpzi_ool(s, a, fns[a->esz]);
700 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
702 static gen_helper_gvec_3 * const fns[4] = {
703 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
704 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
706 if (a->esz < 0) {
707 return false;
709 /* Shift by element size is architecturally valid. For arithmetic
710 right shift for division, it is a zeroing operation. */
711 if (a->imm >= (8 << a->esz)) {
712 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
713 } else {
714 return do_zpzi_ool(s, a, fns[a->esz]);
719 *** SVE Bitwise Shift - Predicated Group
722 #define DO_ZPZW(NAME, name) \
723 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a) \
725 static gen_helper_gvec_4 * const fns[3] = { \
726 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
727 gen_helper_sve_##name##_zpzw_s, \
728 }; \
729 if (a->esz < 0 || a->esz >= 3) { \
730 return false; \
732 return do_zpzz_ool(s, a, fns[a->esz]); \
735 DO_ZPZW(ASR, asr)
736 DO_ZPZW(LSR, lsr)
737 DO_ZPZW(LSL, lsl)
739 #undef DO_ZPZW
742 *** SVE Bitwise Shift - Unpredicated Group
745 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
746 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
747 int64_t, uint32_t, uint32_t))
749 if (a->esz < 0) {
750 /* Invalid tsz encoding -- see tszimm_esz. */
751 return false;
753 if (sve_access_check(s)) {
754 unsigned vsz = vec_full_reg_size(s);
755 /* Shift by element size is architecturally valid. For
756 arithmetic right-shift, it's the same as by one less.
757 Otherwise it is a zeroing operation. */
758 if (a->imm >= 8 << a->esz) {
759 if (asr) {
760 a->imm = (8 << a->esz) - 1;
761 } else {
762 do_dupi_z(s, a->rd, 0);
763 return true;
766 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
767 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
769 return true;
772 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
774 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
777 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
779 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
782 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
784 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
787 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
789 if (fn == NULL) {
790 return false;
792 if (sve_access_check(s)) {
793 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
795 return true;
798 #define DO_ZZW(NAME, name) \
799 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a) \
801 static gen_helper_gvec_3 * const fns[4] = { \
802 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
803 gen_helper_sve_##name##_zzw_s, NULL \
804 }; \
805 return do_zzw_ool(s, a, fns[a->esz]); \
808 DO_ZZW(ASR, asr)
809 DO_ZZW(LSR, lsr)
810 DO_ZZW(LSL, lsl)
812 #undef DO_ZZW
815 *** SVE Integer Multiply-Add Group
818 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
819 gen_helper_gvec_5 *fn)
821 if (sve_access_check(s)) {
822 unsigned vsz = vec_full_reg_size(s);
823 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
824 vec_full_reg_offset(s, a->ra),
825 vec_full_reg_offset(s, a->rn),
826 vec_full_reg_offset(s, a->rm),
827 pred_full_reg_offset(s, a->pg),
828 vsz, vsz, 0, fn);
830 return true;
833 #define DO_ZPZZZ(NAME, name) \
834 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
836 static gen_helper_gvec_5 * const fns[4] = { \
837 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
838 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
839 }; \
840 return do_zpzzz_ool(s, a, fns[a->esz]); \
843 DO_ZPZZZ(MLA, mla)
844 DO_ZPZZZ(MLS, mls)
846 #undef DO_ZPZZZ
849 *** SVE Index Generation Group
852 static void do_index(DisasContext *s, int esz, int rd,
853 TCGv_i64 start, TCGv_i64 incr)
855 unsigned vsz = vec_full_reg_size(s);
856 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
857 TCGv_ptr t_zd = tcg_temp_new_ptr();
859 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
860 if (esz == 3) {
861 gen_helper_sve_index_d(t_zd, start, incr, desc);
862 } else {
863 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
864 static index_fn * const fns[3] = {
865 gen_helper_sve_index_b,
866 gen_helper_sve_index_h,
867 gen_helper_sve_index_s,
869 TCGv_i32 s32 = tcg_temp_new_i32();
870 TCGv_i32 i32 = tcg_temp_new_i32();
872 tcg_gen_extrl_i64_i32(s32, start);
873 tcg_gen_extrl_i64_i32(i32, incr);
874 fns[esz](t_zd, s32, i32, desc);
876 tcg_temp_free_i32(s32);
877 tcg_temp_free_i32(i32);
879 tcg_temp_free_ptr(t_zd);
880 tcg_temp_free_i32(desc);
883 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
885 if (sve_access_check(s)) {
886 TCGv_i64 start = tcg_const_i64(a->imm1);
887 TCGv_i64 incr = tcg_const_i64(a->imm2);
888 do_index(s, a->esz, a->rd, start, incr);
889 tcg_temp_free_i64(start);
890 tcg_temp_free_i64(incr);
892 return true;
895 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
897 if (sve_access_check(s)) {
898 TCGv_i64 start = tcg_const_i64(a->imm);
899 TCGv_i64 incr = cpu_reg(s, a->rm);
900 do_index(s, a->esz, a->rd, start, incr);
901 tcg_temp_free_i64(start);
903 return true;
906 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
908 if (sve_access_check(s)) {
909 TCGv_i64 start = cpu_reg(s, a->rn);
910 TCGv_i64 incr = tcg_const_i64(a->imm);
911 do_index(s, a->esz, a->rd, start, incr);
912 tcg_temp_free_i64(incr);
914 return true;
917 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
919 if (sve_access_check(s)) {
920 TCGv_i64 start = cpu_reg(s, a->rn);
921 TCGv_i64 incr = cpu_reg(s, a->rm);
922 do_index(s, a->esz, a->rd, start, incr);
924 return true;
928 *** SVE Stack Allocation Group
931 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
933 if (sve_access_check(s)) {
934 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
935 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
936 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
938 return true;
941 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
943 if (sve_access_check(s)) {
944 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
945 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
946 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
948 return true;
951 static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
953 if (sve_access_check(s)) {
954 TCGv_i64 reg = cpu_reg(s, a->rd);
955 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
957 return true;
961 *** SVE Compute Vector Address Group
964 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
966 if (sve_access_check(s)) {
967 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
969 return true;
972 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
974 return do_adr(s, a, gen_helper_sve_adr_p32);
977 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
979 return do_adr(s, a, gen_helper_sve_adr_p64);
982 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
984 return do_adr(s, a, gen_helper_sve_adr_s32);
987 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
989 return do_adr(s, a, gen_helper_sve_adr_u32);
993 *** SVE Integer Misc - Unpredicated Group
996 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
998 static gen_helper_gvec_2 * const fns[4] = {
999 NULL,
1000 gen_helper_sve_fexpa_h,
1001 gen_helper_sve_fexpa_s,
1002 gen_helper_sve_fexpa_d,
1004 if (a->esz == 0) {
1005 return false;
1007 if (sve_access_check(s)) {
1008 gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
1010 return true;
1013 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
1015 static gen_helper_gvec_3 * const fns[4] = {
1016 NULL,
1017 gen_helper_sve_ftssel_h,
1018 gen_helper_sve_ftssel_s,
1019 gen_helper_sve_ftssel_d,
1021 if (a->esz == 0) {
1022 return false;
1024 if (sve_access_check(s)) {
1025 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
1027 return true;
1031 *** SVE Predicate Logical Operations Group
1034 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1035 const GVecGen4 *gvec_op)
1037 if (!sve_access_check(s)) {
1038 return true;
1041 unsigned psz = pred_gvec_reg_size(s);
1042 int dofs = pred_full_reg_offset(s, a->rd);
1043 int nofs = pred_full_reg_offset(s, a->rn);
1044 int mofs = pred_full_reg_offset(s, a->rm);
1045 int gofs = pred_full_reg_offset(s, a->pg);
1047 if (!a->s) {
1048 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1049 return true;
1052 if (psz == 8) {
1053 /* Do the operation and the flags generation in temps. */
1054 TCGv_i64 pd = tcg_temp_new_i64();
1055 TCGv_i64 pn = tcg_temp_new_i64();
1056 TCGv_i64 pm = tcg_temp_new_i64();
1057 TCGv_i64 pg = tcg_temp_new_i64();
1059 tcg_gen_ld_i64(pn, cpu_env, nofs);
1060 tcg_gen_ld_i64(pm, cpu_env, mofs);
1061 tcg_gen_ld_i64(pg, cpu_env, gofs);
1063 gvec_op->fni8(pd, pn, pm, pg);
1064 tcg_gen_st_i64(pd, cpu_env, dofs);
1066 do_predtest1(pd, pg);
1068 tcg_temp_free_i64(pd);
1069 tcg_temp_free_i64(pn);
1070 tcg_temp_free_i64(pm);
1071 tcg_temp_free_i64(pg);
1072 } else {
1073 /* The operation and flags generation is large. The computation
1074 * of the flags depends on the original contents of the guarding
1075 * predicate. If the destination overwrites the guarding predicate,
1076 * then the easiest way to get this right is to save a copy.
1078 int tofs = gofs;
1079 if (a->rd == a->pg) {
1080 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1081 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1084 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1085 do_predtest(s, dofs, tofs, psz / 8);
1087 return true;
1090 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1092 tcg_gen_and_i64(pd, pn, pm);
1093 tcg_gen_and_i64(pd, pd, pg);
1096 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1097 TCGv_vec pm, TCGv_vec pg)
1099 tcg_gen_and_vec(vece, pd, pn, pm);
1100 tcg_gen_and_vec(vece, pd, pd, pg);
1103 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
1105 static const GVecGen4 op = {
1106 .fni8 = gen_and_pg_i64,
1107 .fniv = gen_and_pg_vec,
1108 .fno = gen_helper_sve_and_pppp,
1109 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1112 if (!a->s) {
1113 if (!sve_access_check(s)) {
1114 return true;
1116 if (a->rn == a->rm) {
1117 if (a->pg == a->rn) {
1118 do_mov_p(s, a->rd, a->rn);
1119 } else {
1120 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
1122 return true;
1123 } else if (a->pg == a->rn || a->pg == a->rm) {
1124 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
1125 return true;
1128 return do_pppp_flags(s, a, &op);
1131 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1133 tcg_gen_andc_i64(pd, pn, pm);
1134 tcg_gen_and_i64(pd, pd, pg);
1137 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1138 TCGv_vec pm, TCGv_vec pg)
1140 tcg_gen_andc_vec(vece, pd, pn, pm);
1141 tcg_gen_and_vec(vece, pd, pd, pg);
1144 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
1146 static const GVecGen4 op = {
1147 .fni8 = gen_bic_pg_i64,
1148 .fniv = gen_bic_pg_vec,
1149 .fno = gen_helper_sve_bic_pppp,
1150 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1153 if (!a->s && a->pg == a->rn) {
1154 if (sve_access_check(s)) {
1155 gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
1157 return true;
1159 return do_pppp_flags(s, a, &op);
1162 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1164 tcg_gen_xor_i64(pd, pn, pm);
1165 tcg_gen_and_i64(pd, pd, pg);
1168 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1169 TCGv_vec pm, TCGv_vec pg)
1171 tcg_gen_xor_vec(vece, pd, pn, pm);
1172 tcg_gen_and_vec(vece, pd, pd, pg);
1175 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
1177 static const GVecGen4 op = {
1178 .fni8 = gen_eor_pg_i64,
1179 .fniv = gen_eor_pg_vec,
1180 .fno = gen_helper_sve_eor_pppp,
1181 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1183 return do_pppp_flags(s, a, &op);
1186 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
1188 if (a->s) {
1189 return false;
1191 if (sve_access_check(s)) {
1192 unsigned psz = pred_gvec_reg_size(s);
1193 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1194 pred_full_reg_offset(s, a->pg),
1195 pred_full_reg_offset(s, a->rn),
1196 pred_full_reg_offset(s, a->rm), psz, psz);
1198 return true;
1201 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1203 tcg_gen_or_i64(pd, pn, pm);
1204 tcg_gen_and_i64(pd, pd, pg);
1207 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1208 TCGv_vec pm, TCGv_vec pg)
1210 tcg_gen_or_vec(vece, pd, pn, pm);
1211 tcg_gen_and_vec(vece, pd, pd, pg);
1214 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
1216 static const GVecGen4 op = {
1217 .fni8 = gen_orr_pg_i64,
1218 .fniv = gen_orr_pg_vec,
1219 .fno = gen_helper_sve_orr_pppp,
1220 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1223 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
1224 return do_mov_p(s, a->rd, a->rn);
1226 return do_pppp_flags(s, a, &op);
1229 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1231 tcg_gen_orc_i64(pd, pn, pm);
1232 tcg_gen_and_i64(pd, pd, pg);
1235 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1236 TCGv_vec pm, TCGv_vec pg)
1238 tcg_gen_orc_vec(vece, pd, pn, pm);
1239 tcg_gen_and_vec(vece, pd, pd, pg);
1242 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
1244 static const GVecGen4 op = {
1245 .fni8 = gen_orn_pg_i64,
1246 .fniv = gen_orn_pg_vec,
1247 .fno = gen_helper_sve_orn_pppp,
1248 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1250 return do_pppp_flags(s, a, &op);
1253 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1255 tcg_gen_or_i64(pd, pn, pm);
1256 tcg_gen_andc_i64(pd, pg, pd);
1259 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1260 TCGv_vec pm, TCGv_vec pg)
1262 tcg_gen_or_vec(vece, pd, pn, pm);
1263 tcg_gen_andc_vec(vece, pd, pg, pd);
1266 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
1268 static const GVecGen4 op = {
1269 .fni8 = gen_nor_pg_i64,
1270 .fniv = gen_nor_pg_vec,
1271 .fno = gen_helper_sve_nor_pppp,
1272 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1274 return do_pppp_flags(s, a, &op);
1277 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1279 tcg_gen_and_i64(pd, pn, pm);
1280 tcg_gen_andc_i64(pd, pg, pd);
1283 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1284 TCGv_vec pm, TCGv_vec pg)
1286 tcg_gen_and_vec(vece, pd, pn, pm);
1287 tcg_gen_andc_vec(vece, pd, pg, pd);
1290 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
1292 static const GVecGen4 op = {
1293 .fni8 = gen_nand_pg_i64,
1294 .fniv = gen_nand_pg_vec,
1295 .fno = gen_helper_sve_nand_pppp,
1296 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1298 return do_pppp_flags(s, a, &op);
1302 *** SVE Predicate Misc Group
1305 static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
1307 if (sve_access_check(s)) {
1308 int nofs = pred_full_reg_offset(s, a->rn);
1309 int gofs = pred_full_reg_offset(s, a->pg);
1310 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1312 if (words == 1) {
1313 TCGv_i64 pn = tcg_temp_new_i64();
1314 TCGv_i64 pg = tcg_temp_new_i64();
1316 tcg_gen_ld_i64(pn, cpu_env, nofs);
1317 tcg_gen_ld_i64(pg, cpu_env, gofs);
1318 do_predtest1(pn, pg);
1320 tcg_temp_free_i64(pn);
1321 tcg_temp_free_i64(pg);
1322 } else {
1323 do_predtest(s, nofs, gofs, words);
1326 return true;
1329 /* See the ARM pseudocode DecodePredCount. */
1330 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1332 unsigned elements = fullsz >> esz;
1333 unsigned bound;
1335 switch (pattern) {
1336 case 0x0: /* POW2 */
1337 return pow2floor(elements);
1338 case 0x1: /* VL1 */
1339 case 0x2: /* VL2 */
1340 case 0x3: /* VL3 */
1341 case 0x4: /* VL4 */
1342 case 0x5: /* VL5 */
1343 case 0x6: /* VL6 */
1344 case 0x7: /* VL7 */
1345 case 0x8: /* VL8 */
1346 bound = pattern;
1347 break;
1348 case 0x9: /* VL16 */
1349 case 0xa: /* VL32 */
1350 case 0xb: /* VL64 */
1351 case 0xc: /* VL128 */
1352 case 0xd: /* VL256 */
1353 bound = 16 << (pattern - 9);
1354 break;
1355 case 0x1d: /* MUL4 */
1356 return elements - elements % 4;
1357 case 0x1e: /* MUL3 */
1358 return elements - elements % 3;
1359 case 0x1f: /* ALL */
1360 return elements;
1361 default: /* #uimm5 */
1362 return 0;
1364 return elements >= bound ? bound : 0;
1367 /* This handles all of the predicate initialization instructions,
1368 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1369 * so that decode_pred_count returns 0. For SETFFR, we will have
1370 * set RD == 16 == FFR.
1372 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1374 if (!sve_access_check(s)) {
1375 return true;
1378 unsigned fullsz = vec_full_reg_size(s);
1379 unsigned ofs = pred_full_reg_offset(s, rd);
1380 unsigned numelem, setsz, i;
1381 uint64_t word, lastword;
1382 TCGv_i64 t;
1384 numelem = decode_pred_count(fullsz, pat, esz);
1386 /* Determine what we must store into each bit, and how many. */
1387 if (numelem == 0) {
1388 lastword = word = 0;
1389 setsz = fullsz;
1390 } else {
1391 setsz = numelem << esz;
1392 lastword = word = pred_esz_masks[esz];
1393 if (setsz % 64) {
1394 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
1398 t = tcg_temp_new_i64();
1399 if (fullsz <= 64) {
1400 tcg_gen_movi_i64(t, lastword);
1401 tcg_gen_st_i64(t, cpu_env, ofs);
1402 goto done;
1405 if (word == lastword) {
1406 unsigned maxsz = size_for_gvec(fullsz / 8);
1407 unsigned oprsz = size_for_gvec(setsz / 8);
1409 if (oprsz * 8 == setsz) {
1410 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
1411 goto done;
1415 setsz /= 8;
1416 fullsz /= 8;
1418 tcg_gen_movi_i64(t, word);
1419 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
1420 tcg_gen_st_i64(t, cpu_env, ofs + i);
1422 if (lastword != word) {
1423 tcg_gen_movi_i64(t, lastword);
1424 tcg_gen_st_i64(t, cpu_env, ofs + i);
1425 i += 8;
1427 if (i < fullsz) {
1428 tcg_gen_movi_i64(t, 0);
1429 for (; i < fullsz; i += 8) {
1430 tcg_gen_st_i64(t, cpu_env, ofs + i);
1434 done:
1435 tcg_temp_free_i64(t);
1437 /* PTRUES */
1438 if (setflag) {
1439 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1440 tcg_gen_movi_i32(cpu_CF, word == 0);
1441 tcg_gen_movi_i32(cpu_VF, 0);
1442 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1444 return true;
1447 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
1449 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1452 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
1454 /* Note pat == 31 is #all, to set all elements. */
1455 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1458 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
1460 /* Note pat == 32 is #unimp, to set no elements. */
1461 return do_predset(s, 0, a->rd, 32, false);
1464 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
1466 /* The path through do_pppp_flags is complicated enough to want to avoid
1467 * duplication. Frob the arguments into the form of a predicated AND.
1469 arg_rprr_s alt_a = {
1470 .rd = a->rd, .pg = a->pg, .s = a->s,
1471 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1473 return trans_AND_pppp(s, &alt_a);
1476 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
1478 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1481 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
1483 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1486 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1487 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1488 TCGv_ptr, TCGv_i32))
1490 if (!sve_access_check(s)) {
1491 return true;
1494 TCGv_ptr t_pd = tcg_temp_new_ptr();
1495 TCGv_ptr t_pg = tcg_temp_new_ptr();
1496 TCGv_i32 t;
1497 unsigned desc;
1499 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1500 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1502 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1503 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1504 t = tcg_const_i32(desc);
1506 gen_fn(t, t_pd, t_pg, t);
1507 tcg_temp_free_ptr(t_pd);
1508 tcg_temp_free_ptr(t_pg);
1510 do_pred_flags(t);
1511 tcg_temp_free_i32(t);
1512 return true;
1515 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
1517 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1520 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
1522 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1526 *** SVE Element Count Group
1529 /* Perform an inline saturating addition of a 32-bit value within
1530 * a 64-bit register. The second operand is known to be positive,
1531 * which halves the comparisions we must perform to bound the result.
1533 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1535 int64_t ibound;
1536 TCGv_i64 bound;
1537 TCGCond cond;
1539 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1540 if (u) {
1541 tcg_gen_ext32u_i64(reg, reg);
1542 } else {
1543 tcg_gen_ext32s_i64(reg, reg);
1545 if (d) {
1546 tcg_gen_sub_i64(reg, reg, val);
1547 ibound = (u ? 0 : INT32_MIN);
1548 cond = TCG_COND_LT;
1549 } else {
1550 tcg_gen_add_i64(reg, reg, val);
1551 ibound = (u ? UINT32_MAX : INT32_MAX);
1552 cond = TCG_COND_GT;
1554 bound = tcg_const_i64(ibound);
1555 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1556 tcg_temp_free_i64(bound);
1559 /* Similarly with 64-bit values. */
1560 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1562 TCGv_i64 t0 = tcg_temp_new_i64();
1563 TCGv_i64 t1 = tcg_temp_new_i64();
1564 TCGv_i64 t2;
1566 if (u) {
1567 if (d) {
1568 tcg_gen_sub_i64(t0, reg, val);
1569 tcg_gen_movi_i64(t1, 0);
1570 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1571 } else {
1572 tcg_gen_add_i64(t0, reg, val);
1573 tcg_gen_movi_i64(t1, -1);
1574 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1576 } else {
1577 if (d) {
1578 /* Detect signed overflow for subtraction. */
1579 tcg_gen_xor_i64(t0, reg, val);
1580 tcg_gen_sub_i64(t1, reg, val);
1581 tcg_gen_xor_i64(reg, reg, t1);
1582 tcg_gen_and_i64(t0, t0, reg);
1584 /* Bound the result. */
1585 tcg_gen_movi_i64(reg, INT64_MIN);
1586 t2 = tcg_const_i64(0);
1587 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1588 } else {
1589 /* Detect signed overflow for addition. */
1590 tcg_gen_xor_i64(t0, reg, val);
1591 tcg_gen_add_i64(reg, reg, val);
1592 tcg_gen_xor_i64(t1, reg, val);
1593 tcg_gen_andc_i64(t0, t1, t0);
1595 /* Bound the result. */
1596 tcg_gen_movi_i64(t1, INT64_MAX);
1597 t2 = tcg_const_i64(0);
1598 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1600 tcg_temp_free_i64(t2);
1602 tcg_temp_free_i64(t0);
1603 tcg_temp_free_i64(t1);
1606 /* Similarly with a vector and a scalar operand. */
1607 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1608 TCGv_i64 val, bool u, bool d)
1610 unsigned vsz = vec_full_reg_size(s);
1611 TCGv_ptr dptr, nptr;
1612 TCGv_i32 t32, desc;
1613 TCGv_i64 t64;
1615 dptr = tcg_temp_new_ptr();
1616 nptr = tcg_temp_new_ptr();
1617 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1618 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1619 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1621 switch (esz) {
1622 case MO_8:
1623 t32 = tcg_temp_new_i32();
1624 tcg_gen_extrl_i64_i32(t32, val);
1625 if (d) {
1626 tcg_gen_neg_i32(t32, t32);
1628 if (u) {
1629 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1630 } else {
1631 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1633 tcg_temp_free_i32(t32);
1634 break;
1636 case MO_16:
1637 t32 = tcg_temp_new_i32();
1638 tcg_gen_extrl_i64_i32(t32, val);
1639 if (d) {
1640 tcg_gen_neg_i32(t32, t32);
1642 if (u) {
1643 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1644 } else {
1645 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1647 tcg_temp_free_i32(t32);
1648 break;
1650 case MO_32:
1651 t64 = tcg_temp_new_i64();
1652 if (d) {
1653 tcg_gen_neg_i64(t64, val);
1654 } else {
1655 tcg_gen_mov_i64(t64, val);
1657 if (u) {
1658 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1659 } else {
1660 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1662 tcg_temp_free_i64(t64);
1663 break;
1665 case MO_64:
1666 if (u) {
1667 if (d) {
1668 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1669 } else {
1670 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1672 } else if (d) {
1673 t64 = tcg_temp_new_i64();
1674 tcg_gen_neg_i64(t64, val);
1675 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1676 tcg_temp_free_i64(t64);
1677 } else {
1678 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1680 break;
1682 default:
1683 g_assert_not_reached();
1686 tcg_temp_free_ptr(dptr);
1687 tcg_temp_free_ptr(nptr);
1688 tcg_temp_free_i32(desc);
1691 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
1693 if (sve_access_check(s)) {
1694 unsigned fullsz = vec_full_reg_size(s);
1695 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1696 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1698 return true;
1701 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
1703 if (sve_access_check(s)) {
1704 unsigned fullsz = vec_full_reg_size(s);
1705 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1706 int inc = numelem * a->imm * (a->d ? -1 : 1);
1707 TCGv_i64 reg = cpu_reg(s, a->rd);
1709 tcg_gen_addi_i64(reg, reg, inc);
1711 return true;
1714 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
1716 if (!sve_access_check(s)) {
1717 return true;
1720 unsigned fullsz = vec_full_reg_size(s);
1721 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1722 int inc = numelem * a->imm;
1723 TCGv_i64 reg = cpu_reg(s, a->rd);
1725 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1726 if (inc == 0) {
1727 if (a->u) {
1728 tcg_gen_ext32u_i64(reg, reg);
1729 } else {
1730 tcg_gen_ext32s_i64(reg, reg);
1732 } else {
1733 TCGv_i64 t = tcg_const_i64(inc);
1734 do_sat_addsub_32(reg, t, a->u, a->d);
1735 tcg_temp_free_i64(t);
1737 return true;
1740 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
1742 if (!sve_access_check(s)) {
1743 return true;
1746 unsigned fullsz = vec_full_reg_size(s);
1747 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1748 int inc = numelem * a->imm;
1749 TCGv_i64 reg = cpu_reg(s, a->rd);
1751 if (inc != 0) {
1752 TCGv_i64 t = tcg_const_i64(inc);
1753 do_sat_addsub_64(reg, t, a->u, a->d);
1754 tcg_temp_free_i64(t);
1756 return true;
1759 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1761 if (a->esz == 0) {
1762 return false;
1765 unsigned fullsz = vec_full_reg_size(s);
1766 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1767 int inc = numelem * a->imm;
1769 if (inc != 0) {
1770 if (sve_access_check(s)) {
1771 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1772 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1773 vec_full_reg_offset(s, a->rn),
1774 t, fullsz, fullsz);
1775 tcg_temp_free_i64(t);
1777 } else {
1778 do_mov_z(s, a->rd, a->rn);
1780 return true;
1783 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1785 if (a->esz == 0) {
1786 return false;
1789 unsigned fullsz = vec_full_reg_size(s);
1790 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1791 int inc = numelem * a->imm;
1793 if (inc != 0) {
1794 if (sve_access_check(s)) {
1795 TCGv_i64 t = tcg_const_i64(inc);
1796 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1797 tcg_temp_free_i64(t);
1799 } else {
1800 do_mov_z(s, a->rd, a->rn);
1802 return true;
1806 *** SVE Bitwise Immediate Group
1809 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1811 uint64_t imm;
1812 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1813 extract32(a->dbm, 0, 6),
1814 extract32(a->dbm, 6, 6))) {
1815 return false;
1817 if (sve_access_check(s)) {
1818 unsigned vsz = vec_full_reg_size(s);
1819 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1820 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1822 return true;
1825 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
1827 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1830 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
1832 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1835 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
1837 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1840 static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
1842 uint64_t imm;
1843 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1844 extract32(a->dbm, 0, 6),
1845 extract32(a->dbm, 6, 6))) {
1846 return false;
1848 if (sve_access_check(s)) {
1849 do_dupi_z(s, a->rd, imm);
1851 return true;
1855 *** SVE Integer Wide Immediate - Predicated Group
1858 /* Implement all merging copies. This is used for CPY (immediate),
1859 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1861 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1862 TCGv_i64 val)
1864 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1865 static gen_cpy * const fns[4] = {
1866 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1867 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1869 unsigned vsz = vec_full_reg_size(s);
1870 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1871 TCGv_ptr t_zd = tcg_temp_new_ptr();
1872 TCGv_ptr t_zn = tcg_temp_new_ptr();
1873 TCGv_ptr t_pg = tcg_temp_new_ptr();
1875 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1876 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1877 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1879 fns[esz](t_zd, t_zn, t_pg, val, desc);
1881 tcg_temp_free_ptr(t_zd);
1882 tcg_temp_free_ptr(t_zn);
1883 tcg_temp_free_ptr(t_pg);
1884 tcg_temp_free_i32(desc);
1887 static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
1889 if (a->esz == 0) {
1890 return false;
1892 if (sve_access_check(s)) {
1893 /* Decode the VFP immediate. */
1894 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1895 TCGv_i64 t_imm = tcg_const_i64(imm);
1896 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1897 tcg_temp_free_i64(t_imm);
1899 return true;
1902 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
1904 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1905 return false;
1907 if (sve_access_check(s)) {
1908 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1909 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1910 tcg_temp_free_i64(t_imm);
1912 return true;
1915 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
1917 static gen_helper_gvec_2i * const fns[4] = {
1918 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1919 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1922 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1923 return false;
1925 if (sve_access_check(s)) {
1926 unsigned vsz = vec_full_reg_size(s);
1927 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1928 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1929 pred_full_reg_offset(s, a->pg),
1930 t_imm, vsz, vsz, 0, fns[a->esz]);
1931 tcg_temp_free_i64(t_imm);
1933 return true;
1937 *** SVE Permute Extract Group
1940 static bool trans_EXT(DisasContext *s, arg_EXT *a)
1942 if (!sve_access_check(s)) {
1943 return true;
1946 unsigned vsz = vec_full_reg_size(s);
1947 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1948 unsigned n_siz = vsz - n_ofs;
1949 unsigned d = vec_full_reg_offset(s, a->rd);
1950 unsigned n = vec_full_reg_offset(s, a->rn);
1951 unsigned m = vec_full_reg_offset(s, a->rm);
1953 /* Use host vector move insns if we have appropriate sizes
1954 * and no unfortunate overlap.
1956 if (m != d
1957 && n_ofs == size_for_gvec(n_ofs)
1958 && n_siz == size_for_gvec(n_siz)
1959 && (d != n || n_siz <= n_ofs)) {
1960 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1961 if (n_ofs != 0) {
1962 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1964 } else {
1965 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1967 return true;
1971 *** SVE Permute - Unpredicated Group
1974 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
1976 if (sve_access_check(s)) {
1977 unsigned vsz = vec_full_reg_size(s);
1978 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
1979 vsz, vsz, cpu_reg_sp(s, a->rn));
1981 return true;
1984 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
1986 if ((a->imm & 0x1f) == 0) {
1987 return false;
1989 if (sve_access_check(s)) {
1990 unsigned vsz = vec_full_reg_size(s);
1991 unsigned dofs = vec_full_reg_offset(s, a->rd);
1992 unsigned esz, index;
1994 esz = ctz32(a->imm);
1995 index = a->imm >> (esz + 1);
1997 if ((index << esz) < vsz) {
1998 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
1999 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2000 } else {
2002 * While dup_mem handles 128-bit elements, dup_imm does not.
2003 * Thankfully element size doesn't matter for splatting zero.
2005 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
2008 return true;
2011 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2013 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2014 static gen_insr * const fns[4] = {
2015 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2016 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2018 unsigned vsz = vec_full_reg_size(s);
2019 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2020 TCGv_ptr t_zd = tcg_temp_new_ptr();
2021 TCGv_ptr t_zn = tcg_temp_new_ptr();
2023 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2024 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2026 fns[a->esz](t_zd, t_zn, val, desc);
2028 tcg_temp_free_ptr(t_zd);
2029 tcg_temp_free_ptr(t_zn);
2030 tcg_temp_free_i32(desc);
2033 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
2035 if (sve_access_check(s)) {
2036 TCGv_i64 t = tcg_temp_new_i64();
2037 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2038 do_insr_i64(s, a, t);
2039 tcg_temp_free_i64(t);
2041 return true;
2044 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
2046 if (sve_access_check(s)) {
2047 do_insr_i64(s, a, cpu_reg(s, a->rm));
2049 return true;
2052 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
2054 static gen_helper_gvec_2 * const fns[4] = {
2055 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2056 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2059 if (sve_access_check(s)) {
2060 gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
2062 return true;
2065 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
2067 static gen_helper_gvec_3 * const fns[4] = {
2068 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2069 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2072 if (sve_access_check(s)) {
2073 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
2075 return true;
2078 static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
2080 static gen_helper_gvec_2 * const fns[4][2] = {
2081 { NULL, NULL },
2082 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2083 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2084 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2087 if (a->esz == 0) {
2088 return false;
2090 if (sve_access_check(s)) {
2091 unsigned vsz = vec_full_reg_size(s);
2092 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2093 vec_full_reg_offset(s, a->rn)
2094 + (a->h ? vsz / 2 : 0),
2095 vsz, vsz, 0, fns[a->esz][a->u]);
2097 return true;
2101 *** SVE Permute - Predicates Group
2104 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2105 gen_helper_gvec_3 *fn)
2107 if (!sve_access_check(s)) {
2108 return true;
2111 unsigned vsz = pred_full_reg_size(s);
2113 /* Predicate sizes may be smaller and cannot use simd_desc.
2114 We cannot round up, as we do elsewhere, because we need
2115 the exact size for ZIP2 and REV. We retain the style for
2116 the other helpers for consistency. */
2117 TCGv_ptr t_d = tcg_temp_new_ptr();
2118 TCGv_ptr t_n = tcg_temp_new_ptr();
2119 TCGv_ptr t_m = tcg_temp_new_ptr();
2120 TCGv_i32 t_desc;
2121 int desc;
2123 desc = vsz - 2;
2124 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2125 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2127 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2128 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2129 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2130 t_desc = tcg_const_i32(desc);
2132 fn(t_d, t_n, t_m, t_desc);
2134 tcg_temp_free_ptr(t_d);
2135 tcg_temp_free_ptr(t_n);
2136 tcg_temp_free_ptr(t_m);
2137 tcg_temp_free_i32(t_desc);
2138 return true;
2141 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2142 gen_helper_gvec_2 *fn)
2144 if (!sve_access_check(s)) {
2145 return true;
2148 unsigned vsz = pred_full_reg_size(s);
2149 TCGv_ptr t_d = tcg_temp_new_ptr();
2150 TCGv_ptr t_n = tcg_temp_new_ptr();
2151 TCGv_i32 t_desc;
2152 int desc;
2154 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2155 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2157 /* Predicate sizes may be smaller and cannot use simd_desc.
2158 We cannot round up, as we do elsewhere, because we need
2159 the exact size for ZIP2 and REV. We retain the style for
2160 the other helpers for consistency. */
2162 desc = vsz - 2;
2163 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2164 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2165 t_desc = tcg_const_i32(desc);
2167 fn(t_d, t_n, t_desc);
2169 tcg_temp_free_i32(t_desc);
2170 tcg_temp_free_ptr(t_d);
2171 tcg_temp_free_ptr(t_n);
2172 return true;
2175 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
2177 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2180 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
2182 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2185 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
2187 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2190 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
2192 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2195 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
2197 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2200 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
2202 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2205 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
2207 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2210 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
2212 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2215 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
2217 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2221 *** SVE Permute - Interleaving Group
2224 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2226 static gen_helper_gvec_3 * const fns[4] = {
2227 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2228 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2231 if (sve_access_check(s)) {
2232 unsigned vsz = vec_full_reg_size(s);
2233 unsigned high_ofs = high ? vsz / 2 : 0;
2234 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2235 vec_full_reg_offset(s, a->rn) + high_ofs,
2236 vec_full_reg_offset(s, a->rm) + high_ofs,
2237 vsz, vsz, 0, fns[a->esz]);
2239 return true;
2242 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2243 gen_helper_gvec_3 *fn)
2245 if (sve_access_check(s)) {
2246 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
2248 return true;
2251 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
2253 return do_zip(s, a, false);
2256 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
2258 return do_zip(s, a, true);
2261 static gen_helper_gvec_3 * const uzp_fns[4] = {
2262 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2263 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2266 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
2268 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2271 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
2273 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2276 static gen_helper_gvec_3 * const trn_fns[4] = {
2277 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2278 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2281 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
2283 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2286 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
2288 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2292 *** SVE Permute Vector - Predicated Group
2295 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
2297 static gen_helper_gvec_3 * const fns[4] = {
2298 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2300 return do_zpz_ool(s, a, fns[a->esz]);
2303 /* Call the helper that computes the ARM LastActiveElement pseudocode
2304 * function, scaled by the element size. This includes the not found
2305 * indication; e.g. not found for esz=3 is -8.
2307 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2309 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2310 * round up, as we do elsewhere, because we need the exact size.
2312 TCGv_ptr t_p = tcg_temp_new_ptr();
2313 TCGv_i32 t_desc;
2314 unsigned vsz = pred_full_reg_size(s);
2315 unsigned desc;
2317 desc = vsz - 2;
2318 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2320 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2321 t_desc = tcg_const_i32(desc);
2323 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2325 tcg_temp_free_i32(t_desc);
2326 tcg_temp_free_ptr(t_p);
2329 /* Increment LAST to the offset of the next element in the vector,
2330 * wrapping around to 0.
2332 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2334 unsigned vsz = vec_full_reg_size(s);
2336 tcg_gen_addi_i32(last, last, 1 << esz);
2337 if (is_power_of_2(vsz)) {
2338 tcg_gen_andi_i32(last, last, vsz - 1);
2339 } else {
2340 TCGv_i32 max = tcg_const_i32(vsz);
2341 TCGv_i32 zero = tcg_const_i32(0);
2342 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2343 tcg_temp_free_i32(max);
2344 tcg_temp_free_i32(zero);
2348 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2349 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2351 unsigned vsz = vec_full_reg_size(s);
2353 if (is_power_of_2(vsz)) {
2354 tcg_gen_andi_i32(last, last, vsz - 1);
2355 } else {
2356 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2357 TCGv_i32 zero = tcg_const_i32(0);
2358 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2359 tcg_temp_free_i32(max);
2360 tcg_temp_free_i32(zero);
2364 /* Load an unsigned element of ESZ from BASE+OFS. */
2365 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2367 TCGv_i64 r = tcg_temp_new_i64();
2369 switch (esz) {
2370 case 0:
2371 tcg_gen_ld8u_i64(r, base, ofs);
2372 break;
2373 case 1:
2374 tcg_gen_ld16u_i64(r, base, ofs);
2375 break;
2376 case 2:
2377 tcg_gen_ld32u_i64(r, base, ofs);
2378 break;
2379 case 3:
2380 tcg_gen_ld_i64(r, base, ofs);
2381 break;
2382 default:
2383 g_assert_not_reached();
2385 return r;
2388 /* Load an unsigned element of ESZ from RM[LAST]. */
2389 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2390 int rm, int esz)
2392 TCGv_ptr p = tcg_temp_new_ptr();
2393 TCGv_i64 r;
2395 /* Convert offset into vector into offset into ENV.
2396 * The final adjustment for the vector register base
2397 * is added via constant offset to the load.
2399 #ifdef HOST_WORDS_BIGENDIAN
2400 /* Adjust for element ordering. See vec_reg_offset. */
2401 if (esz < 3) {
2402 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2404 #endif
2405 tcg_gen_ext_i32_ptr(p, last);
2406 tcg_gen_add_ptr(p, p, cpu_env);
2408 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2409 tcg_temp_free_ptr(p);
2411 return r;
2414 /* Compute CLAST for a Zreg. */
2415 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2417 TCGv_i32 last;
2418 TCGLabel *over;
2419 TCGv_i64 ele;
2420 unsigned vsz, esz = a->esz;
2422 if (!sve_access_check(s)) {
2423 return true;
2426 last = tcg_temp_local_new_i32();
2427 over = gen_new_label();
2429 find_last_active(s, last, esz, a->pg);
2431 /* There is of course no movcond for a 2048-bit vector,
2432 * so we must branch over the actual store.
2434 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2436 if (!before) {
2437 incr_last_active(s, last, esz);
2440 ele = load_last_active(s, last, a->rm, esz);
2441 tcg_temp_free_i32(last);
2443 vsz = vec_full_reg_size(s);
2444 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2445 tcg_temp_free_i64(ele);
2447 /* If this insn used MOVPRFX, we may need a second move. */
2448 if (a->rd != a->rn) {
2449 TCGLabel *done = gen_new_label();
2450 tcg_gen_br(done);
2452 gen_set_label(over);
2453 do_mov_z(s, a->rd, a->rn);
2455 gen_set_label(done);
2456 } else {
2457 gen_set_label(over);
2459 return true;
2462 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
2464 return do_clast_vector(s, a, false);
2467 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
2469 return do_clast_vector(s, a, true);
2472 /* Compute CLAST for a scalar. */
2473 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2474 bool before, TCGv_i64 reg_val)
2476 TCGv_i32 last = tcg_temp_new_i32();
2477 TCGv_i64 ele, cmp, zero;
2479 find_last_active(s, last, esz, pg);
2481 /* Extend the original value of last prior to incrementing. */
2482 cmp = tcg_temp_new_i64();
2483 tcg_gen_ext_i32_i64(cmp, last);
2485 if (!before) {
2486 incr_last_active(s, last, esz);
2489 /* The conceit here is that while last < 0 indicates not found, after
2490 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2491 * from which we can load garbage. We then discard the garbage with
2492 * a conditional move.
2494 ele = load_last_active(s, last, rm, esz);
2495 tcg_temp_free_i32(last);
2497 zero = tcg_const_i64(0);
2498 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2500 tcg_temp_free_i64(zero);
2501 tcg_temp_free_i64(cmp);
2502 tcg_temp_free_i64(ele);
2505 /* Compute CLAST for a Vreg. */
2506 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2508 if (sve_access_check(s)) {
2509 int esz = a->esz;
2510 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2511 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2513 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2514 write_fp_dreg(s, a->rd, reg);
2515 tcg_temp_free_i64(reg);
2517 return true;
2520 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
2522 return do_clast_fp(s, a, false);
2525 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
2527 return do_clast_fp(s, a, true);
2530 /* Compute CLAST for a Xreg. */
2531 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2533 TCGv_i64 reg;
2535 if (!sve_access_check(s)) {
2536 return true;
2539 reg = cpu_reg(s, a->rd);
2540 switch (a->esz) {
2541 case 0:
2542 tcg_gen_ext8u_i64(reg, reg);
2543 break;
2544 case 1:
2545 tcg_gen_ext16u_i64(reg, reg);
2546 break;
2547 case 2:
2548 tcg_gen_ext32u_i64(reg, reg);
2549 break;
2550 case 3:
2551 break;
2552 default:
2553 g_assert_not_reached();
2556 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2557 return true;
2560 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
2562 return do_clast_general(s, a, false);
2565 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
2567 return do_clast_general(s, a, true);
2570 /* Compute LAST for a scalar. */
2571 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2572 int pg, int rm, bool before)
2574 TCGv_i32 last = tcg_temp_new_i32();
2575 TCGv_i64 ret;
2577 find_last_active(s, last, esz, pg);
2578 if (before) {
2579 wrap_last_active(s, last, esz);
2580 } else {
2581 incr_last_active(s, last, esz);
2584 ret = load_last_active(s, last, rm, esz);
2585 tcg_temp_free_i32(last);
2586 return ret;
2589 /* Compute LAST for a Vreg. */
2590 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2592 if (sve_access_check(s)) {
2593 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2594 write_fp_dreg(s, a->rd, val);
2595 tcg_temp_free_i64(val);
2597 return true;
2600 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
2602 return do_last_fp(s, a, false);
2605 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
2607 return do_last_fp(s, a, true);
2610 /* Compute LAST for a Xreg. */
2611 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2613 if (sve_access_check(s)) {
2614 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2615 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2616 tcg_temp_free_i64(val);
2618 return true;
2621 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
2623 return do_last_general(s, a, false);
2626 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
2628 return do_last_general(s, a, true);
2631 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
2633 if (sve_access_check(s)) {
2634 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2636 return true;
2639 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
2641 if (sve_access_check(s)) {
2642 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2643 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2644 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2645 tcg_temp_free_i64(t);
2647 return true;
2650 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
2652 static gen_helper_gvec_3 * const fns[4] = {
2653 NULL,
2654 gen_helper_sve_revb_h,
2655 gen_helper_sve_revb_s,
2656 gen_helper_sve_revb_d,
2658 return do_zpz_ool(s, a, fns[a->esz]);
2661 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
2663 static gen_helper_gvec_3 * const fns[4] = {
2664 NULL,
2665 NULL,
2666 gen_helper_sve_revh_s,
2667 gen_helper_sve_revh_d,
2669 return do_zpz_ool(s, a, fns[a->esz]);
2672 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
2674 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2677 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
2679 static gen_helper_gvec_3 * const fns[4] = {
2680 gen_helper_sve_rbit_b,
2681 gen_helper_sve_rbit_h,
2682 gen_helper_sve_rbit_s,
2683 gen_helper_sve_rbit_d,
2685 return do_zpz_ool(s, a, fns[a->esz]);
2688 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
2690 if (sve_access_check(s)) {
2691 gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
2692 a->rd, a->rn, a->rm, a->pg, a->esz);
2694 return true;
2698 *** SVE Integer Compare - Vectors Group
2701 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2702 gen_helper_gvec_flags_4 *gen_fn)
2704 TCGv_ptr pd, zn, zm, pg;
2705 unsigned vsz;
2706 TCGv_i32 t;
2708 if (gen_fn == NULL) {
2709 return false;
2711 if (!sve_access_check(s)) {
2712 return true;
2715 vsz = vec_full_reg_size(s);
2716 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2717 pd = tcg_temp_new_ptr();
2718 zn = tcg_temp_new_ptr();
2719 zm = tcg_temp_new_ptr();
2720 pg = tcg_temp_new_ptr();
2722 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2723 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2724 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2725 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2727 gen_fn(t, pd, zn, zm, pg, t);
2729 tcg_temp_free_ptr(pd);
2730 tcg_temp_free_ptr(zn);
2731 tcg_temp_free_ptr(zm);
2732 tcg_temp_free_ptr(pg);
2734 do_pred_flags(t);
2736 tcg_temp_free_i32(t);
2737 return true;
2740 #define DO_PPZZ(NAME, name) \
2741 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
2743 static gen_helper_gvec_flags_4 * const fns[4] = { \
2744 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2745 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2746 }; \
2747 return do_ppzz_flags(s, a, fns[a->esz]); \
2750 DO_PPZZ(CMPEQ, cmpeq)
2751 DO_PPZZ(CMPNE, cmpne)
2752 DO_PPZZ(CMPGT, cmpgt)
2753 DO_PPZZ(CMPGE, cmpge)
2754 DO_PPZZ(CMPHI, cmphi)
2755 DO_PPZZ(CMPHS, cmphs)
2757 #undef DO_PPZZ
2759 #define DO_PPZW(NAME, name) \
2760 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \
2762 static gen_helper_gvec_flags_4 * const fns[4] = { \
2763 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2764 gen_helper_sve_##name##_ppzw_s, NULL \
2765 }; \
2766 return do_ppzz_flags(s, a, fns[a->esz]); \
2769 DO_PPZW(CMPEQ, cmpeq)
2770 DO_PPZW(CMPNE, cmpne)
2771 DO_PPZW(CMPGT, cmpgt)
2772 DO_PPZW(CMPGE, cmpge)
2773 DO_PPZW(CMPHI, cmphi)
2774 DO_PPZW(CMPHS, cmphs)
2775 DO_PPZW(CMPLT, cmplt)
2776 DO_PPZW(CMPLE, cmple)
2777 DO_PPZW(CMPLO, cmplo)
2778 DO_PPZW(CMPLS, cmpls)
2780 #undef DO_PPZW
2783 *** SVE Integer Compare - Immediate Groups
2786 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2787 gen_helper_gvec_flags_3 *gen_fn)
2789 TCGv_ptr pd, zn, pg;
2790 unsigned vsz;
2791 TCGv_i32 t;
2793 if (gen_fn == NULL) {
2794 return false;
2796 if (!sve_access_check(s)) {
2797 return true;
2800 vsz = vec_full_reg_size(s);
2801 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2802 pd = tcg_temp_new_ptr();
2803 zn = tcg_temp_new_ptr();
2804 pg = tcg_temp_new_ptr();
2806 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2807 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2808 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2810 gen_fn(t, pd, zn, pg, t);
2812 tcg_temp_free_ptr(pd);
2813 tcg_temp_free_ptr(zn);
2814 tcg_temp_free_ptr(pg);
2816 do_pred_flags(t);
2818 tcg_temp_free_i32(t);
2819 return true;
2822 #define DO_PPZI(NAME, name) \
2823 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \
2825 static gen_helper_gvec_flags_3 * const fns[4] = { \
2826 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2827 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2828 }; \
2829 return do_ppzi_flags(s, a, fns[a->esz]); \
2832 DO_PPZI(CMPEQ, cmpeq)
2833 DO_PPZI(CMPNE, cmpne)
2834 DO_PPZI(CMPGT, cmpgt)
2835 DO_PPZI(CMPGE, cmpge)
2836 DO_PPZI(CMPHI, cmphi)
2837 DO_PPZI(CMPHS, cmphs)
2838 DO_PPZI(CMPLT, cmplt)
2839 DO_PPZI(CMPLE, cmple)
2840 DO_PPZI(CMPLO, cmplo)
2841 DO_PPZI(CMPLS, cmpls)
2843 #undef DO_PPZI
2846 *** SVE Partition Break Group
2849 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2850 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2852 if (!sve_access_check(s)) {
2853 return true;
2856 unsigned vsz = pred_full_reg_size(s);
2858 /* Predicate sizes may be smaller and cannot use simd_desc. */
2859 TCGv_ptr d = tcg_temp_new_ptr();
2860 TCGv_ptr n = tcg_temp_new_ptr();
2861 TCGv_ptr m = tcg_temp_new_ptr();
2862 TCGv_ptr g = tcg_temp_new_ptr();
2863 TCGv_i32 t = tcg_const_i32(vsz - 2);
2865 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2866 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2867 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2868 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2870 if (a->s) {
2871 fn_s(t, d, n, m, g, t);
2872 do_pred_flags(t);
2873 } else {
2874 fn(d, n, m, g, t);
2876 tcg_temp_free_ptr(d);
2877 tcg_temp_free_ptr(n);
2878 tcg_temp_free_ptr(m);
2879 tcg_temp_free_ptr(g);
2880 tcg_temp_free_i32(t);
2881 return true;
2884 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2885 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2887 if (!sve_access_check(s)) {
2888 return true;
2891 unsigned vsz = pred_full_reg_size(s);
2893 /* Predicate sizes may be smaller and cannot use simd_desc. */
2894 TCGv_ptr d = tcg_temp_new_ptr();
2895 TCGv_ptr n = tcg_temp_new_ptr();
2896 TCGv_ptr g = tcg_temp_new_ptr();
2897 TCGv_i32 t = tcg_const_i32(vsz - 2);
2899 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2900 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2901 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2903 if (a->s) {
2904 fn_s(t, d, n, g, t);
2905 do_pred_flags(t);
2906 } else {
2907 fn(d, n, g, t);
2909 tcg_temp_free_ptr(d);
2910 tcg_temp_free_ptr(n);
2911 tcg_temp_free_ptr(g);
2912 tcg_temp_free_i32(t);
2913 return true;
2916 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
2918 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2921 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
2923 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2926 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
2928 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2931 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
2933 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2936 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
2938 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2941 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
2943 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2946 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
2948 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
2952 *** SVE Predicate Count Group
2955 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
2957 unsigned psz = pred_full_reg_size(s);
2959 if (psz <= 8) {
2960 uint64_t psz_mask;
2962 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
2963 if (pn != pg) {
2964 TCGv_i64 g = tcg_temp_new_i64();
2965 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
2966 tcg_gen_and_i64(val, val, g);
2967 tcg_temp_free_i64(g);
2970 /* Reduce the pred_esz_masks value simply to reduce the
2971 * size of the code generated here.
2973 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
2974 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
2976 tcg_gen_ctpop_i64(val, val);
2977 } else {
2978 TCGv_ptr t_pn = tcg_temp_new_ptr();
2979 TCGv_ptr t_pg = tcg_temp_new_ptr();
2980 unsigned desc;
2981 TCGv_i32 t_desc;
2983 desc = psz - 2;
2984 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2986 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
2987 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2988 t_desc = tcg_const_i32(desc);
2990 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
2991 tcg_temp_free_ptr(t_pn);
2992 tcg_temp_free_ptr(t_pg);
2993 tcg_temp_free_i32(t_desc);
2997 static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
2999 if (sve_access_check(s)) {
3000 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3002 return true;
3005 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
3007 if (sve_access_check(s)) {
3008 TCGv_i64 reg = cpu_reg(s, a->rd);
3009 TCGv_i64 val = tcg_temp_new_i64();
3011 do_cntp(s, val, a->esz, a->pg, a->pg);
3012 if (a->d) {
3013 tcg_gen_sub_i64(reg, reg, val);
3014 } else {
3015 tcg_gen_add_i64(reg, reg, val);
3017 tcg_temp_free_i64(val);
3019 return true;
3022 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3024 if (a->esz == 0) {
3025 return false;
3027 if (sve_access_check(s)) {
3028 unsigned vsz = vec_full_reg_size(s);
3029 TCGv_i64 val = tcg_temp_new_i64();
3030 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3032 do_cntp(s, val, a->esz, a->pg, a->pg);
3033 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3034 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3036 return true;
3039 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
3041 if (sve_access_check(s)) {
3042 TCGv_i64 reg = cpu_reg(s, a->rd);
3043 TCGv_i64 val = tcg_temp_new_i64();
3045 do_cntp(s, val, a->esz, a->pg, a->pg);
3046 do_sat_addsub_32(reg, val, a->u, a->d);
3048 return true;
3051 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
3053 if (sve_access_check(s)) {
3054 TCGv_i64 reg = cpu_reg(s, a->rd);
3055 TCGv_i64 val = tcg_temp_new_i64();
3057 do_cntp(s, val, a->esz, a->pg, a->pg);
3058 do_sat_addsub_64(reg, val, a->u, a->d);
3060 return true;
3063 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3065 if (a->esz == 0) {
3066 return false;
3068 if (sve_access_check(s)) {
3069 TCGv_i64 val = tcg_temp_new_i64();
3070 do_cntp(s, val, a->esz, a->pg, a->pg);
3071 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3073 return true;
3077 *** SVE Integer Compare Scalars Group
3080 static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
3082 if (!sve_access_check(s)) {
3083 return true;
3086 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3087 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3088 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3089 TCGv_i64 cmp = tcg_temp_new_i64();
3091 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3092 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3093 tcg_temp_free_i64(cmp);
3095 /* VF = !NF & !CF. */
3096 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3097 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3099 /* Both NF and VF actually look at bit 31. */
3100 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3101 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3102 return true;
3105 static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
3107 TCGv_i64 op0, op1, t0, t1, tmax;
3108 TCGv_i32 t2, t3;
3109 TCGv_ptr ptr;
3110 unsigned desc, vsz = vec_full_reg_size(s);
3111 TCGCond cond;
3113 if (!sve_access_check(s)) {
3114 return true;
3117 op0 = read_cpu_reg(s, a->rn, 1);
3118 op1 = read_cpu_reg(s, a->rm, 1);
3120 if (!a->sf) {
3121 if (a->u) {
3122 tcg_gen_ext32u_i64(op0, op0);
3123 tcg_gen_ext32u_i64(op1, op1);
3124 } else {
3125 tcg_gen_ext32s_i64(op0, op0);
3126 tcg_gen_ext32s_i64(op1, op1);
3130 /* For the helper, compress the different conditions into a computation
3131 * of how many iterations for which the condition is true.
3133 t0 = tcg_temp_new_i64();
3134 t1 = tcg_temp_new_i64();
3135 tcg_gen_sub_i64(t0, op1, op0);
3137 tmax = tcg_const_i64(vsz >> a->esz);
3138 if (a->eq) {
3139 /* Equality means one more iteration. */
3140 tcg_gen_addi_i64(t0, t0, 1);
3142 /* If op1 is max (un)signed integer (and the only time the addition
3143 * above could overflow), then we produce an all-true predicate by
3144 * setting the count to the vector length. This is because the
3145 * pseudocode is described as an increment + compare loop, and the
3146 * max integer would always compare true.
3148 tcg_gen_movi_i64(t1, (a->sf
3149 ? (a->u ? UINT64_MAX : INT64_MAX)
3150 : (a->u ? UINT32_MAX : INT32_MAX)));
3151 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
3154 /* Bound to the maximum. */
3155 tcg_gen_umin_i64(t0, t0, tmax);
3156 tcg_temp_free_i64(tmax);
3158 /* Set the count to zero if the condition is false. */
3159 cond = (a->u
3160 ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3161 : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3162 tcg_gen_movi_i64(t1, 0);
3163 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3164 tcg_temp_free_i64(t1);
3166 /* Since we're bounded, pass as a 32-bit type. */
3167 t2 = tcg_temp_new_i32();
3168 tcg_gen_extrl_i64_i32(t2, t0);
3169 tcg_temp_free_i64(t0);
3171 /* Scale elements to bits. */
3172 tcg_gen_shli_i32(t2, t2, a->esz);
3174 desc = (vsz / 8) - 2;
3175 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3176 t3 = tcg_const_i32(desc);
3178 ptr = tcg_temp_new_ptr();
3179 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3181 gen_helper_sve_while(t2, ptr, t2, t3);
3182 do_pred_flags(t2);
3184 tcg_temp_free_ptr(ptr);
3185 tcg_temp_free_i32(t2);
3186 tcg_temp_free_i32(t3);
3187 return true;
3191 *** SVE Integer Wide Immediate - Unpredicated Group
3194 static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
3196 if (a->esz == 0) {
3197 return false;
3199 if (sve_access_check(s)) {
3200 unsigned vsz = vec_full_reg_size(s);
3201 int dofs = vec_full_reg_offset(s, a->rd);
3202 uint64_t imm;
3204 /* Decode the VFP immediate. */
3205 imm = vfp_expand_imm(a->esz, a->imm);
3206 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
3208 return true;
3211 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
3213 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3214 return false;
3216 if (sve_access_check(s)) {
3217 unsigned vsz = vec_full_reg_size(s);
3218 int dofs = vec_full_reg_offset(s, a->rd);
3220 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
3222 return true;
3225 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
3227 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3228 return false;
3230 if (sve_access_check(s)) {
3231 unsigned vsz = vec_full_reg_size(s);
3232 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3233 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3235 return true;
3238 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
3240 a->imm = -a->imm;
3241 return trans_ADD_zzi(s, a);
3244 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
3246 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
3247 static const GVecGen2s op[4] = {
3248 { .fni8 = tcg_gen_vec_sub8_i64,
3249 .fniv = tcg_gen_sub_vec,
3250 .fno = gen_helper_sve_subri_b,
3251 .opt_opc = vecop_list,
3252 .vece = MO_8,
3253 .scalar_first = true },
3254 { .fni8 = tcg_gen_vec_sub16_i64,
3255 .fniv = tcg_gen_sub_vec,
3256 .fno = gen_helper_sve_subri_h,
3257 .opt_opc = vecop_list,
3258 .vece = MO_16,
3259 .scalar_first = true },
3260 { .fni4 = tcg_gen_sub_i32,
3261 .fniv = tcg_gen_sub_vec,
3262 .fno = gen_helper_sve_subri_s,
3263 .opt_opc = vecop_list,
3264 .vece = MO_32,
3265 .scalar_first = true },
3266 { .fni8 = tcg_gen_sub_i64,
3267 .fniv = tcg_gen_sub_vec,
3268 .fno = gen_helper_sve_subri_d,
3269 .opt_opc = vecop_list,
3270 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3271 .vece = MO_64,
3272 .scalar_first = true }
3275 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3276 return false;
3278 if (sve_access_check(s)) {
3279 unsigned vsz = vec_full_reg_size(s);
3280 TCGv_i64 c = tcg_const_i64(a->imm);
3281 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3282 vec_full_reg_offset(s, a->rn),
3283 vsz, vsz, c, &op[a->esz]);
3284 tcg_temp_free_i64(c);
3286 return true;
3289 static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
3291 if (sve_access_check(s)) {
3292 unsigned vsz = vec_full_reg_size(s);
3293 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3294 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3296 return true;
3299 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
3301 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3302 return false;
3304 if (sve_access_check(s)) {
3305 TCGv_i64 val = tcg_const_i64(a->imm);
3306 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3307 tcg_temp_free_i64(val);
3309 return true;
3312 static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
3314 return do_zzi_sat(s, a, false, false);
3317 static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
3319 return do_zzi_sat(s, a, true, false);
3322 static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3324 return do_zzi_sat(s, a, false, true);
3327 static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3329 return do_zzi_sat(s, a, true, true);
3332 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3334 if (sve_access_check(s)) {
3335 unsigned vsz = vec_full_reg_size(s);
3336 TCGv_i64 c = tcg_const_i64(a->imm);
3338 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3339 vec_full_reg_offset(s, a->rn),
3340 c, vsz, vsz, 0, fn);
3341 tcg_temp_free_i64(c);
3343 return true;
3346 #define DO_ZZI(NAME, name) \
3347 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a) \
3349 static gen_helper_gvec_2i * const fns[4] = { \
3350 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3351 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3352 }; \
3353 return do_zzi_ool(s, a, fns[a->esz]); \
3356 DO_ZZI(SMAX, smax)
3357 DO_ZZI(UMAX, umax)
3358 DO_ZZI(SMIN, smin)
3359 DO_ZZI(UMIN, umin)
3361 #undef DO_ZZI
3363 static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a)
3365 static gen_helper_gvec_3 * const fns[2][2] = {
3366 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3367 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3370 if (sve_access_check(s)) {
3371 gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, 0);
3373 return true;
3376 static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a)
3378 static gen_helper_gvec_3 * const fns[2][2] = {
3379 { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3380 { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3383 if (sve_access_check(s)) {
3384 gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, a->index);
3386 return true;
3391 *** SVE Floating Point Multiply-Add Indexed Group
3394 static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
3396 static gen_helper_gvec_4_ptr * const fns[3] = {
3397 gen_helper_gvec_fmla_idx_h,
3398 gen_helper_gvec_fmla_idx_s,
3399 gen_helper_gvec_fmla_idx_d,
3402 if (sve_access_check(s)) {
3403 unsigned vsz = vec_full_reg_size(s);
3404 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3405 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3406 vec_full_reg_offset(s, a->rn),
3407 vec_full_reg_offset(s, a->rm),
3408 vec_full_reg_offset(s, a->ra),
3409 status, vsz, vsz, (a->index << 1) | a->sub,
3410 fns[a->esz - 1]);
3411 tcg_temp_free_ptr(status);
3413 return true;
3417 *** SVE Floating Point Multiply Indexed Group
3420 static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
3422 static gen_helper_gvec_3_ptr * const fns[3] = {
3423 gen_helper_gvec_fmul_idx_h,
3424 gen_helper_gvec_fmul_idx_s,
3425 gen_helper_gvec_fmul_idx_d,
3428 if (sve_access_check(s)) {
3429 unsigned vsz = vec_full_reg_size(s);
3430 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3431 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3432 vec_full_reg_offset(s, a->rn),
3433 vec_full_reg_offset(s, a->rm),
3434 status, vsz, vsz, a->index, fns[a->esz - 1]);
3435 tcg_temp_free_ptr(status);
3437 return true;
3441 *** SVE Floating Point Fast Reduction Group
3444 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3445 TCGv_ptr, TCGv_i32);
3447 static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3448 gen_helper_fp_reduce *fn)
3450 unsigned vsz = vec_full_reg_size(s);
3451 unsigned p2vsz = pow2ceil(vsz);
3452 TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
3453 TCGv_ptr t_zn, t_pg, status;
3454 TCGv_i64 temp;
3456 temp = tcg_temp_new_i64();
3457 t_zn = tcg_temp_new_ptr();
3458 t_pg = tcg_temp_new_ptr();
3460 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3461 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3462 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3464 fn(temp, t_zn, t_pg, status, t_desc);
3465 tcg_temp_free_ptr(t_zn);
3466 tcg_temp_free_ptr(t_pg);
3467 tcg_temp_free_ptr(status);
3468 tcg_temp_free_i32(t_desc);
3470 write_fp_dreg(s, a->rd, temp);
3471 tcg_temp_free_i64(temp);
3474 #define DO_VPZ(NAME, name) \
3475 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
3477 static gen_helper_fp_reduce * const fns[3] = { \
3478 gen_helper_sve_##name##_h, \
3479 gen_helper_sve_##name##_s, \
3480 gen_helper_sve_##name##_d, \
3481 }; \
3482 if (a->esz == 0) { \
3483 return false; \
3485 if (sve_access_check(s)) { \
3486 do_reduce(s, a, fns[a->esz - 1]); \
3488 return true; \
3491 DO_VPZ(FADDV, faddv)
3492 DO_VPZ(FMINNMV, fminnmv)
3493 DO_VPZ(FMAXNMV, fmaxnmv)
3494 DO_VPZ(FMINV, fminv)
3495 DO_VPZ(FMAXV, fmaxv)
3498 *** SVE Floating Point Unary Operations - Unpredicated Group
3501 static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3503 unsigned vsz = vec_full_reg_size(s);
3504 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3506 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3507 vec_full_reg_offset(s, a->rn),
3508 status, vsz, vsz, 0, fn);
3509 tcg_temp_free_ptr(status);
3512 static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3514 static gen_helper_gvec_2_ptr * const fns[3] = {
3515 gen_helper_gvec_frecpe_h,
3516 gen_helper_gvec_frecpe_s,
3517 gen_helper_gvec_frecpe_d,
3519 if (a->esz == 0) {
3520 return false;
3522 if (sve_access_check(s)) {
3523 do_zz_fp(s, a, fns[a->esz - 1]);
3525 return true;
3528 static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3530 static gen_helper_gvec_2_ptr * const fns[3] = {
3531 gen_helper_gvec_frsqrte_h,
3532 gen_helper_gvec_frsqrte_s,
3533 gen_helper_gvec_frsqrte_d,
3535 if (a->esz == 0) {
3536 return false;
3538 if (sve_access_check(s)) {
3539 do_zz_fp(s, a, fns[a->esz - 1]);
3541 return true;
3545 *** SVE Floating Point Compare with Zero Group
3548 static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3549 gen_helper_gvec_3_ptr *fn)
3551 unsigned vsz = vec_full_reg_size(s);
3552 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3554 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3555 vec_full_reg_offset(s, a->rn),
3556 pred_full_reg_offset(s, a->pg),
3557 status, vsz, vsz, 0, fn);
3558 tcg_temp_free_ptr(status);
3561 #define DO_PPZ(NAME, name) \
3562 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
3564 static gen_helper_gvec_3_ptr * const fns[3] = { \
3565 gen_helper_sve_##name##_h, \
3566 gen_helper_sve_##name##_s, \
3567 gen_helper_sve_##name##_d, \
3568 }; \
3569 if (a->esz == 0) { \
3570 return false; \
3572 if (sve_access_check(s)) { \
3573 do_ppz_fp(s, a, fns[a->esz - 1]); \
3575 return true; \
3578 DO_PPZ(FCMGE_ppz0, fcmge0)
3579 DO_PPZ(FCMGT_ppz0, fcmgt0)
3580 DO_PPZ(FCMLE_ppz0, fcmle0)
3581 DO_PPZ(FCMLT_ppz0, fcmlt0)
3582 DO_PPZ(FCMEQ_ppz0, fcmeq0)
3583 DO_PPZ(FCMNE_ppz0, fcmne0)
3585 #undef DO_PPZ
3588 *** SVE floating-point trig multiply-add coefficient
3591 static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
3593 static gen_helper_gvec_3_ptr * const fns[3] = {
3594 gen_helper_sve_ftmad_h,
3595 gen_helper_sve_ftmad_s,
3596 gen_helper_sve_ftmad_d,
3599 if (a->esz == 0) {
3600 return false;
3602 if (sve_access_check(s)) {
3603 unsigned vsz = vec_full_reg_size(s);
3604 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3605 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3606 vec_full_reg_offset(s, a->rn),
3607 vec_full_reg_offset(s, a->rm),
3608 status, vsz, vsz, a->imm, fns[a->esz - 1]);
3609 tcg_temp_free_ptr(status);
3611 return true;
3615 *** SVE Floating Point Accumulating Reduction Group
3618 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
3620 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3621 TCGv_ptr, TCGv_ptr, TCGv_i32);
3622 static fadda_fn * const fns[3] = {
3623 gen_helper_sve_fadda_h,
3624 gen_helper_sve_fadda_s,
3625 gen_helper_sve_fadda_d,
3627 unsigned vsz = vec_full_reg_size(s);
3628 TCGv_ptr t_rm, t_pg, t_fpst;
3629 TCGv_i64 t_val;
3630 TCGv_i32 t_desc;
3632 if (a->esz == 0) {
3633 return false;
3635 if (!sve_access_check(s)) {
3636 return true;
3639 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3640 t_rm = tcg_temp_new_ptr();
3641 t_pg = tcg_temp_new_ptr();
3642 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3643 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3644 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3645 t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3647 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3649 tcg_temp_free_i32(t_desc);
3650 tcg_temp_free_ptr(t_fpst);
3651 tcg_temp_free_ptr(t_pg);
3652 tcg_temp_free_ptr(t_rm);
3654 write_fp_dreg(s, a->rd, t_val);
3655 tcg_temp_free_i64(t_val);
3656 return true;
3660 *** SVE Floating Point Arithmetic - Unpredicated Group
3663 static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3664 gen_helper_gvec_3_ptr *fn)
3666 if (fn == NULL) {
3667 return false;
3669 if (sve_access_check(s)) {
3670 unsigned vsz = vec_full_reg_size(s);
3671 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3672 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3673 vec_full_reg_offset(s, a->rn),
3674 vec_full_reg_offset(s, a->rm),
3675 status, vsz, vsz, 0, fn);
3676 tcg_temp_free_ptr(status);
3678 return true;
3682 #define DO_FP3(NAME, name) \
3683 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
3685 static gen_helper_gvec_3_ptr * const fns[4] = { \
3686 NULL, gen_helper_gvec_##name##_h, \
3687 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3688 }; \
3689 return do_zzz_fp(s, a, fns[a->esz]); \
3692 DO_FP3(FADD_zzz, fadd)
3693 DO_FP3(FSUB_zzz, fsub)
3694 DO_FP3(FMUL_zzz, fmul)
3695 DO_FP3(FTSMUL, ftsmul)
3696 DO_FP3(FRECPS, recps)
3697 DO_FP3(FRSQRTS, rsqrts)
3699 #undef DO_FP3
3702 *** SVE Floating Point Arithmetic - Predicated Group
3705 static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3706 gen_helper_gvec_4_ptr *fn)
3708 if (fn == NULL) {
3709 return false;
3711 if (sve_access_check(s)) {
3712 unsigned vsz = vec_full_reg_size(s);
3713 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3714 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3715 vec_full_reg_offset(s, a->rn),
3716 vec_full_reg_offset(s, a->rm),
3717 pred_full_reg_offset(s, a->pg),
3718 status, vsz, vsz, 0, fn);
3719 tcg_temp_free_ptr(status);
3721 return true;
3724 #define DO_FP3(NAME, name) \
3725 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
3727 static gen_helper_gvec_4_ptr * const fns[4] = { \
3728 NULL, gen_helper_sve_##name##_h, \
3729 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3730 }; \
3731 return do_zpzz_fp(s, a, fns[a->esz]); \
3734 DO_FP3(FADD_zpzz, fadd)
3735 DO_FP3(FSUB_zpzz, fsub)
3736 DO_FP3(FMUL_zpzz, fmul)
3737 DO_FP3(FMIN_zpzz, fmin)
3738 DO_FP3(FMAX_zpzz, fmax)
3739 DO_FP3(FMINNM_zpzz, fminnum)
3740 DO_FP3(FMAXNM_zpzz, fmaxnum)
3741 DO_FP3(FABD, fabd)
3742 DO_FP3(FSCALE, fscalbn)
3743 DO_FP3(FDIV, fdiv)
3744 DO_FP3(FMULX, fmulx)
3746 #undef DO_FP3
3748 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3749 TCGv_i64, TCGv_ptr, TCGv_i32);
3751 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3752 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3754 unsigned vsz = vec_full_reg_size(s);
3755 TCGv_ptr t_zd, t_zn, t_pg, status;
3756 TCGv_i32 desc;
3758 t_zd = tcg_temp_new_ptr();
3759 t_zn = tcg_temp_new_ptr();
3760 t_pg = tcg_temp_new_ptr();
3761 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3762 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3763 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3765 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
3766 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3767 fn(t_zd, t_zn, t_pg, scalar, status, desc);
3769 tcg_temp_free_i32(desc);
3770 tcg_temp_free_ptr(status);
3771 tcg_temp_free_ptr(t_pg);
3772 tcg_temp_free_ptr(t_zn);
3773 tcg_temp_free_ptr(t_zd);
3776 static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3777 gen_helper_sve_fp2scalar *fn)
3779 TCGv_i64 temp = tcg_const_i64(imm);
3780 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3781 tcg_temp_free_i64(temp);
3784 #define DO_FP_IMM(NAME, name, const0, const1) \
3785 static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
3787 static gen_helper_sve_fp2scalar * const fns[3] = { \
3788 gen_helper_sve_##name##_h, \
3789 gen_helper_sve_##name##_s, \
3790 gen_helper_sve_##name##_d \
3791 }; \
3792 static uint64_t const val[3][2] = { \
3793 { float16_##const0, float16_##const1 }, \
3794 { float32_##const0, float32_##const1 }, \
3795 { float64_##const0, float64_##const1 }, \
3796 }; \
3797 if (a->esz == 0) { \
3798 return false; \
3800 if (sve_access_check(s)) { \
3801 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
3803 return true; \
3806 DO_FP_IMM(FADD, fadds, half, one)
3807 DO_FP_IMM(FSUB, fsubs, half, one)
3808 DO_FP_IMM(FMUL, fmuls, half, two)
3809 DO_FP_IMM(FSUBR, fsubrs, half, one)
3810 DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3811 DO_FP_IMM(FMINNM, fminnms, zero, one)
3812 DO_FP_IMM(FMAX, fmaxs, zero, one)
3813 DO_FP_IMM(FMIN, fmins, zero, one)
3815 #undef DO_FP_IMM
3817 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3818 gen_helper_gvec_4_ptr *fn)
3820 if (fn == NULL) {
3821 return false;
3823 if (sve_access_check(s)) {
3824 unsigned vsz = vec_full_reg_size(s);
3825 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3826 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3827 vec_full_reg_offset(s, a->rn),
3828 vec_full_reg_offset(s, a->rm),
3829 pred_full_reg_offset(s, a->pg),
3830 status, vsz, vsz, 0, fn);
3831 tcg_temp_free_ptr(status);
3833 return true;
3836 #define DO_FPCMP(NAME, name) \
3837 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
3839 static gen_helper_gvec_4_ptr * const fns[4] = { \
3840 NULL, gen_helper_sve_##name##_h, \
3841 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3842 }; \
3843 return do_fp_cmp(s, a, fns[a->esz]); \
3846 DO_FPCMP(FCMGE, fcmge)
3847 DO_FPCMP(FCMGT, fcmgt)
3848 DO_FPCMP(FCMEQ, fcmeq)
3849 DO_FPCMP(FCMNE, fcmne)
3850 DO_FPCMP(FCMUO, fcmuo)
3851 DO_FPCMP(FACGE, facge)
3852 DO_FPCMP(FACGT, facgt)
3854 #undef DO_FPCMP
3856 static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
3858 static gen_helper_gvec_4_ptr * const fns[3] = {
3859 gen_helper_sve_fcadd_h,
3860 gen_helper_sve_fcadd_s,
3861 gen_helper_sve_fcadd_d
3864 if (a->esz == 0) {
3865 return false;
3867 if (sve_access_check(s)) {
3868 unsigned vsz = vec_full_reg_size(s);
3869 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3870 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3871 vec_full_reg_offset(s, a->rn),
3872 vec_full_reg_offset(s, a->rm),
3873 pred_full_reg_offset(s, a->pg),
3874 status, vsz, vsz, a->rot, fns[a->esz - 1]);
3875 tcg_temp_free_ptr(status);
3877 return true;
3880 static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
3881 gen_helper_gvec_5_ptr *fn)
3883 if (a->esz == 0) {
3884 return false;
3886 if (sve_access_check(s)) {
3887 unsigned vsz = vec_full_reg_size(s);
3888 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3889 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
3890 vec_full_reg_offset(s, a->rn),
3891 vec_full_reg_offset(s, a->rm),
3892 vec_full_reg_offset(s, a->ra),
3893 pred_full_reg_offset(s, a->pg),
3894 status, vsz, vsz, 0, fn);
3895 tcg_temp_free_ptr(status);
3897 return true;
3900 #define DO_FMLA(NAME, name) \
3901 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
3903 static gen_helper_gvec_5_ptr * const fns[4] = { \
3904 NULL, gen_helper_sve_##name##_h, \
3905 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3906 }; \
3907 return do_fmla(s, a, fns[a->esz]); \
3910 DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3911 DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3912 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3913 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3915 #undef DO_FMLA
3917 static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
3919 static gen_helper_gvec_5_ptr * const fns[4] = {
3920 NULL,
3921 gen_helper_sve_fcmla_zpzzz_h,
3922 gen_helper_sve_fcmla_zpzzz_s,
3923 gen_helper_sve_fcmla_zpzzz_d,
3926 if (a->esz == 0) {
3927 return false;
3929 if (sve_access_check(s)) {
3930 unsigned vsz = vec_full_reg_size(s);
3931 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3932 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
3933 vec_full_reg_offset(s, a->rn),
3934 vec_full_reg_offset(s, a->rm),
3935 vec_full_reg_offset(s, a->ra),
3936 pred_full_reg_offset(s, a->pg),
3937 status, vsz, vsz, a->rot, fns[a->esz]);
3938 tcg_temp_free_ptr(status);
3940 return true;
3943 static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
3945 static gen_helper_gvec_3_ptr * const fns[2] = {
3946 gen_helper_gvec_fcmlah_idx,
3947 gen_helper_gvec_fcmlas_idx,
3950 tcg_debug_assert(a->esz == 1 || a->esz == 2);
3951 tcg_debug_assert(a->rd == a->ra);
3952 if (sve_access_check(s)) {
3953 unsigned vsz = vec_full_reg_size(s);
3954 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3955 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3956 vec_full_reg_offset(s, a->rn),
3957 vec_full_reg_offset(s, a->rm),
3958 status, vsz, vsz,
3959 a->index * 4 + a->rot,
3960 fns[a->esz - 1]);
3961 tcg_temp_free_ptr(status);
3963 return true;
3967 *** SVE Floating Point Unary Operations Predicated Group
3970 static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
3971 bool is_fp16, gen_helper_gvec_3_ptr *fn)
3973 if (sve_access_check(s)) {
3974 unsigned vsz = vec_full_reg_size(s);
3975 TCGv_ptr status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
3976 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
3977 vec_full_reg_offset(s, rn),
3978 pred_full_reg_offset(s, pg),
3979 status, vsz, vsz, 0, fn);
3980 tcg_temp_free_ptr(status);
3982 return true;
3985 static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
3987 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
3990 static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
3992 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
3995 static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
3997 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
4000 static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
4002 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4005 static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
4007 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4010 static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
4012 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4015 static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
4017 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4020 static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
4022 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4025 static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
4027 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4030 static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
4032 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4035 static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
4037 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4040 static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
4042 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4045 static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
4047 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4050 static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
4052 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4055 static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
4057 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4060 static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
4062 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4065 static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
4067 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4070 static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
4072 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4075 static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
4077 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4080 static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
4082 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4085 static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4086 gen_helper_sve_frint_h,
4087 gen_helper_sve_frint_s,
4088 gen_helper_sve_frint_d
4091 static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
4093 if (a->esz == 0) {
4094 return false;
4096 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4097 frint_fns[a->esz - 1]);
4100 static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
4102 static gen_helper_gvec_3_ptr * const fns[3] = {
4103 gen_helper_sve_frintx_h,
4104 gen_helper_sve_frintx_s,
4105 gen_helper_sve_frintx_d
4107 if (a->esz == 0) {
4108 return false;
4110 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4113 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4115 if (a->esz == 0) {
4116 return false;
4118 if (sve_access_check(s)) {
4119 unsigned vsz = vec_full_reg_size(s);
4120 TCGv_i32 tmode = tcg_const_i32(mode);
4121 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4123 gen_helper_set_rmode(tmode, tmode, status);
4125 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4126 vec_full_reg_offset(s, a->rn),
4127 pred_full_reg_offset(s, a->pg),
4128 status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4130 gen_helper_set_rmode(tmode, tmode, status);
4131 tcg_temp_free_i32(tmode);
4132 tcg_temp_free_ptr(status);
4134 return true;
4137 static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
4139 return do_frint_mode(s, a, float_round_nearest_even);
4142 static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
4144 return do_frint_mode(s, a, float_round_up);
4147 static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
4149 return do_frint_mode(s, a, float_round_down);
4152 static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
4154 return do_frint_mode(s, a, float_round_to_zero);
4157 static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
4159 return do_frint_mode(s, a, float_round_ties_away);
4162 static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
4164 static gen_helper_gvec_3_ptr * const fns[3] = {
4165 gen_helper_sve_frecpx_h,
4166 gen_helper_sve_frecpx_s,
4167 gen_helper_sve_frecpx_d
4169 if (a->esz == 0) {
4170 return false;
4172 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4175 static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
4177 static gen_helper_gvec_3_ptr * const fns[3] = {
4178 gen_helper_sve_fsqrt_h,
4179 gen_helper_sve_fsqrt_s,
4180 gen_helper_sve_fsqrt_d
4182 if (a->esz == 0) {
4183 return false;
4185 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4188 static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4190 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4193 static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4195 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4198 static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4200 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4203 static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
4205 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4208 static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
4210 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4213 static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
4215 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4218 static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
4220 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4223 static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4225 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4228 static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4230 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4233 static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4235 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4238 static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
4240 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4243 static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
4245 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4248 static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
4250 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4253 static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
4255 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4259 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4262 /* Subroutine loading a vector register at VOFS of LEN bytes.
4263 * The load should begin at the address Rn + IMM.
4266 static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4268 int len_align = QEMU_ALIGN_DOWN(len, 8);
4269 int len_remain = len % 8;
4270 int nparts = len / 8 + ctpop8(len_remain);
4271 int midx = get_mem_index(s);
4272 TCGv_i64 dirty_addr, clean_addr, t0, t1;
4274 dirty_addr = tcg_temp_new_i64();
4275 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4276 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
4277 tcg_temp_free_i64(dirty_addr);
4280 * Note that unpredicated load/store of vector/predicate registers
4281 * are defined as a stream of bytes, which equates to little-endian
4282 * operations on larger quantities.
4283 * Attempt to keep code expansion to a minimum by limiting the
4284 * amount of unrolling done.
4286 if (nparts <= 4) {
4287 int i;
4289 t0 = tcg_temp_new_i64();
4290 for (i = 0; i < len_align; i += 8) {
4291 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
4292 tcg_gen_st_i64(t0, cpu_env, vofs + i);
4293 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4295 tcg_temp_free_i64(t0);
4296 } else {
4297 TCGLabel *loop = gen_new_label();
4298 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4300 /* Copy the clean address into a local temp, live across the loop. */
4301 t0 = clean_addr;
4302 clean_addr = new_tmp_a64_local(s);
4303 tcg_gen_mov_i64(clean_addr, t0);
4305 gen_set_label(loop);
4307 t0 = tcg_temp_new_i64();
4308 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
4309 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4311 tp = tcg_temp_new_ptr();
4312 tcg_gen_add_ptr(tp, cpu_env, i);
4313 tcg_gen_addi_ptr(i, i, 8);
4314 tcg_gen_st_i64(t0, tp, vofs);
4315 tcg_temp_free_ptr(tp);
4316 tcg_temp_free_i64(t0);
4318 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4319 tcg_temp_free_ptr(i);
4323 * Predicate register loads can be any multiple of 2.
4324 * Note that we still store the entire 64-bit unit into cpu_env.
4326 if (len_remain) {
4327 t0 = tcg_temp_new_i64();
4328 switch (len_remain) {
4329 case 2:
4330 case 4:
4331 case 8:
4332 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4333 MO_LE | ctz32(len_remain));
4334 break;
4336 case 6:
4337 t1 = tcg_temp_new_i64();
4338 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
4339 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4340 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
4341 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4342 tcg_temp_free_i64(t1);
4343 break;
4345 default:
4346 g_assert_not_reached();
4348 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
4349 tcg_temp_free_i64(t0);
4353 /* Similarly for stores. */
4354 static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4356 int len_align = QEMU_ALIGN_DOWN(len, 8);
4357 int len_remain = len % 8;
4358 int nparts = len / 8 + ctpop8(len_remain);
4359 int midx = get_mem_index(s);
4360 TCGv_i64 dirty_addr, clean_addr, t0;
4362 dirty_addr = tcg_temp_new_i64();
4363 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4364 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
4365 tcg_temp_free_i64(dirty_addr);
4367 /* Note that unpredicated load/store of vector/predicate registers
4368 * are defined as a stream of bytes, which equates to little-endian
4369 * operations on larger quantities. There is no nice way to force
4370 * a little-endian store for aarch64_be-linux-user out of line.
4372 * Attempt to keep code expansion to a minimum by limiting the
4373 * amount of unrolling done.
4375 if (nparts <= 4) {
4376 int i;
4378 t0 = tcg_temp_new_i64();
4379 for (i = 0; i < len_align; i += 8) {
4380 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
4381 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
4382 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4384 tcg_temp_free_i64(t0);
4385 } else {
4386 TCGLabel *loop = gen_new_label();
4387 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4389 /* Copy the clean address into a local temp, live across the loop. */
4390 t0 = clean_addr;
4391 clean_addr = new_tmp_a64_local(s);
4392 tcg_gen_mov_i64(clean_addr, t0);
4394 gen_set_label(loop);
4396 t0 = tcg_temp_new_i64();
4397 tp = tcg_temp_new_ptr();
4398 tcg_gen_add_ptr(tp, cpu_env, i);
4399 tcg_gen_ld_i64(t0, tp, vofs);
4400 tcg_gen_addi_ptr(i, i, 8);
4401 tcg_temp_free_ptr(tp);
4403 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
4404 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4405 tcg_temp_free_i64(t0);
4407 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4408 tcg_temp_free_ptr(i);
4411 /* Predicate register stores can be any multiple of 2. */
4412 if (len_remain) {
4413 t0 = tcg_temp_new_i64();
4414 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
4416 switch (len_remain) {
4417 case 2:
4418 case 4:
4419 case 8:
4420 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4421 MO_LE | ctz32(len_remain));
4422 break;
4424 case 6:
4425 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
4426 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4427 tcg_gen_shri_i64(t0, t0, 32);
4428 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
4429 break;
4431 default:
4432 g_assert_not_reached();
4434 tcg_temp_free_i64(t0);
4438 static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
4440 if (sve_access_check(s)) {
4441 int size = vec_full_reg_size(s);
4442 int off = vec_full_reg_offset(s, a->rd);
4443 do_ldr(s, off, size, a->rn, a->imm * size);
4445 return true;
4448 static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
4450 if (sve_access_check(s)) {
4451 int size = pred_full_reg_size(s);
4452 int off = pred_full_reg_offset(s, a->rd);
4453 do_ldr(s, off, size, a->rn, a->imm * size);
4455 return true;
4458 static bool trans_STR_zri(DisasContext *s, arg_rri *a)
4460 if (sve_access_check(s)) {
4461 int size = vec_full_reg_size(s);
4462 int off = vec_full_reg_offset(s, a->rd);
4463 do_str(s, off, size, a->rn, a->imm * size);
4465 return true;
4468 static bool trans_STR_pri(DisasContext *s, arg_rri *a)
4470 if (sve_access_check(s)) {
4471 int size = pred_full_reg_size(s);
4472 int off = pred_full_reg_offset(s, a->rd);
4473 do_str(s, off, size, a->rn, a->imm * size);
4475 return true;
4479 *** SVE Memory - Contiguous Load Group
4482 /* The memory mode of the dtype. */
4483 static const MemOp dtype_mop[16] = {
4484 MO_UB, MO_UB, MO_UB, MO_UB,
4485 MO_SL, MO_UW, MO_UW, MO_UW,
4486 MO_SW, MO_SW, MO_UL, MO_UL,
4487 MO_SB, MO_SB, MO_SB, MO_Q
4490 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4492 /* The vector element size of dtype. */
4493 static const uint8_t dtype_esz[16] = {
4494 0, 1, 2, 3,
4495 3, 1, 2, 3,
4496 3, 2, 2, 3,
4497 3, 2, 1, 3
4500 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4501 int dtype, uint32_t mte_n, bool is_write,
4502 gen_helper_gvec_mem *fn)
4504 unsigned vsz = vec_full_reg_size(s);
4505 TCGv_ptr t_pg;
4506 TCGv_i32 t_desc;
4507 int desc = 0;
4510 * For e.g. LD4, there are not enough arguments to pass all 4
4511 * registers as pointers, so encode the regno into the data field.
4512 * For consistency, do this even for LD1.
4514 if (s->mte_active[0]) {
4515 int msz = dtype_msz(dtype);
4517 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
4518 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4519 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4520 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
4521 desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz);
4522 desc = FIELD_DP32(desc, MTEDESC, TSIZE, mte_n << msz);
4523 desc <<= SVE_MTEDESC_SHIFT;
4524 } else {
4525 addr = clean_data_tbi(s, addr);
4528 desc = simd_desc(vsz, vsz, zt | desc);
4529 t_desc = tcg_const_i32(desc);
4530 t_pg = tcg_temp_new_ptr();
4532 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4533 fn(cpu_env, t_pg, addr, t_desc);
4535 tcg_temp_free_ptr(t_pg);
4536 tcg_temp_free_i32(t_desc);
4539 static void do_ld_zpa(DisasContext *s, int zt, int pg,
4540 TCGv_i64 addr, int dtype, int nreg)
4542 static gen_helper_gvec_mem * const fns[2][2][16][4] = {
4543 { /* mte inactive, little-endian */
4544 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4545 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4546 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4547 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4548 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4550 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4551 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4552 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4553 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4554 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4556 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4557 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4558 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4559 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4560 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4562 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4563 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4564 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4565 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4566 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4568 /* mte inactive, big-endian */
4569 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4570 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4571 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4572 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4573 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4575 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4576 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4577 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4578 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4579 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4581 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4582 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4583 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4584 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4585 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4587 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4588 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4589 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4590 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4591 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
4593 { /* mte active, little-endian */
4594 { { gen_helper_sve_ld1bb_r_mte,
4595 gen_helper_sve_ld2bb_r_mte,
4596 gen_helper_sve_ld3bb_r_mte,
4597 gen_helper_sve_ld4bb_r_mte },
4598 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4599 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4600 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4602 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
4603 { gen_helper_sve_ld1hh_le_r_mte,
4604 gen_helper_sve_ld2hh_le_r_mte,
4605 gen_helper_sve_ld3hh_le_r_mte,
4606 gen_helper_sve_ld4hh_le_r_mte },
4607 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
4608 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
4610 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
4611 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
4612 { gen_helper_sve_ld1ss_le_r_mte,
4613 gen_helper_sve_ld2ss_le_r_mte,
4614 gen_helper_sve_ld3ss_le_r_mte,
4615 gen_helper_sve_ld4ss_le_r_mte },
4616 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
4618 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4619 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4620 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4621 { gen_helper_sve_ld1dd_le_r_mte,
4622 gen_helper_sve_ld2dd_le_r_mte,
4623 gen_helper_sve_ld3dd_le_r_mte,
4624 gen_helper_sve_ld4dd_le_r_mte } },
4626 /* mte active, big-endian */
4627 { { gen_helper_sve_ld1bb_r_mte,
4628 gen_helper_sve_ld2bb_r_mte,
4629 gen_helper_sve_ld3bb_r_mte,
4630 gen_helper_sve_ld4bb_r_mte },
4631 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4632 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4633 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4635 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
4636 { gen_helper_sve_ld1hh_be_r_mte,
4637 gen_helper_sve_ld2hh_be_r_mte,
4638 gen_helper_sve_ld3hh_be_r_mte,
4639 gen_helper_sve_ld4hh_be_r_mte },
4640 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
4641 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
4643 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
4644 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
4645 { gen_helper_sve_ld1ss_be_r_mte,
4646 gen_helper_sve_ld2ss_be_r_mte,
4647 gen_helper_sve_ld3ss_be_r_mte,
4648 gen_helper_sve_ld4ss_be_r_mte },
4649 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
4651 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4652 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4653 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4654 { gen_helper_sve_ld1dd_be_r_mte,
4655 gen_helper_sve_ld2dd_be_r_mte,
4656 gen_helper_sve_ld3dd_be_r_mte,
4657 gen_helper_sve_ld4dd_be_r_mte } } },
4659 gen_helper_gvec_mem *fn
4660 = fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
4663 * While there are holes in the table, they are not
4664 * accessible via the instruction encoding.
4666 assert(fn != NULL);
4667 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
4670 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
4672 if (a->rm == 31) {
4673 return false;
4675 if (sve_access_check(s)) {
4676 TCGv_i64 addr = new_tmp_a64(s);
4677 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4678 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4679 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4681 return true;
4684 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
4686 if (sve_access_check(s)) {
4687 int vsz = vec_full_reg_size(s);
4688 int elements = vsz >> dtype_esz[a->dtype];
4689 TCGv_i64 addr = new_tmp_a64(s);
4691 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4692 (a->imm * elements * (a->nreg + 1))
4693 << dtype_msz(a->dtype));
4694 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4696 return true;
4699 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
4701 static gen_helper_gvec_mem * const fns[2][2][16] = {
4702 { /* mte inactive, little-endian */
4703 { gen_helper_sve_ldff1bb_r,
4704 gen_helper_sve_ldff1bhu_r,
4705 gen_helper_sve_ldff1bsu_r,
4706 gen_helper_sve_ldff1bdu_r,
4708 gen_helper_sve_ldff1sds_le_r,
4709 gen_helper_sve_ldff1hh_le_r,
4710 gen_helper_sve_ldff1hsu_le_r,
4711 gen_helper_sve_ldff1hdu_le_r,
4713 gen_helper_sve_ldff1hds_le_r,
4714 gen_helper_sve_ldff1hss_le_r,
4715 gen_helper_sve_ldff1ss_le_r,
4716 gen_helper_sve_ldff1sdu_le_r,
4718 gen_helper_sve_ldff1bds_r,
4719 gen_helper_sve_ldff1bss_r,
4720 gen_helper_sve_ldff1bhs_r,
4721 gen_helper_sve_ldff1dd_le_r },
4723 /* mte inactive, big-endian */
4724 { gen_helper_sve_ldff1bb_r,
4725 gen_helper_sve_ldff1bhu_r,
4726 gen_helper_sve_ldff1bsu_r,
4727 gen_helper_sve_ldff1bdu_r,
4729 gen_helper_sve_ldff1sds_be_r,
4730 gen_helper_sve_ldff1hh_be_r,
4731 gen_helper_sve_ldff1hsu_be_r,
4732 gen_helper_sve_ldff1hdu_be_r,
4734 gen_helper_sve_ldff1hds_be_r,
4735 gen_helper_sve_ldff1hss_be_r,
4736 gen_helper_sve_ldff1ss_be_r,
4737 gen_helper_sve_ldff1sdu_be_r,
4739 gen_helper_sve_ldff1bds_r,
4740 gen_helper_sve_ldff1bss_r,
4741 gen_helper_sve_ldff1bhs_r,
4742 gen_helper_sve_ldff1dd_be_r } },
4744 { /* mte active, little-endian */
4745 { gen_helper_sve_ldff1bb_r_mte,
4746 gen_helper_sve_ldff1bhu_r_mte,
4747 gen_helper_sve_ldff1bsu_r_mte,
4748 gen_helper_sve_ldff1bdu_r_mte,
4750 gen_helper_sve_ldff1sds_le_r_mte,
4751 gen_helper_sve_ldff1hh_le_r_mte,
4752 gen_helper_sve_ldff1hsu_le_r_mte,
4753 gen_helper_sve_ldff1hdu_le_r_mte,
4755 gen_helper_sve_ldff1hds_le_r_mte,
4756 gen_helper_sve_ldff1hss_le_r_mte,
4757 gen_helper_sve_ldff1ss_le_r_mte,
4758 gen_helper_sve_ldff1sdu_le_r_mte,
4760 gen_helper_sve_ldff1bds_r_mte,
4761 gen_helper_sve_ldff1bss_r_mte,
4762 gen_helper_sve_ldff1bhs_r_mte,
4763 gen_helper_sve_ldff1dd_le_r_mte },
4765 /* mte active, big-endian */
4766 { gen_helper_sve_ldff1bb_r_mte,
4767 gen_helper_sve_ldff1bhu_r_mte,
4768 gen_helper_sve_ldff1bsu_r_mte,
4769 gen_helper_sve_ldff1bdu_r_mte,
4771 gen_helper_sve_ldff1sds_be_r_mte,
4772 gen_helper_sve_ldff1hh_be_r_mte,
4773 gen_helper_sve_ldff1hsu_be_r_mte,
4774 gen_helper_sve_ldff1hdu_be_r_mte,
4776 gen_helper_sve_ldff1hds_be_r_mte,
4777 gen_helper_sve_ldff1hss_be_r_mte,
4778 gen_helper_sve_ldff1ss_be_r_mte,
4779 gen_helper_sve_ldff1sdu_be_r_mte,
4781 gen_helper_sve_ldff1bds_r_mte,
4782 gen_helper_sve_ldff1bss_r_mte,
4783 gen_helper_sve_ldff1bhs_r_mte,
4784 gen_helper_sve_ldff1dd_be_r_mte } },
4787 if (sve_access_check(s)) {
4788 TCGv_i64 addr = new_tmp_a64(s);
4789 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4790 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4791 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4792 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
4794 return true;
4797 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
4799 static gen_helper_gvec_mem * const fns[2][2][16] = {
4800 { /* mte inactive, little-endian */
4801 { gen_helper_sve_ldnf1bb_r,
4802 gen_helper_sve_ldnf1bhu_r,
4803 gen_helper_sve_ldnf1bsu_r,
4804 gen_helper_sve_ldnf1bdu_r,
4806 gen_helper_sve_ldnf1sds_le_r,
4807 gen_helper_sve_ldnf1hh_le_r,
4808 gen_helper_sve_ldnf1hsu_le_r,
4809 gen_helper_sve_ldnf1hdu_le_r,
4811 gen_helper_sve_ldnf1hds_le_r,
4812 gen_helper_sve_ldnf1hss_le_r,
4813 gen_helper_sve_ldnf1ss_le_r,
4814 gen_helper_sve_ldnf1sdu_le_r,
4816 gen_helper_sve_ldnf1bds_r,
4817 gen_helper_sve_ldnf1bss_r,
4818 gen_helper_sve_ldnf1bhs_r,
4819 gen_helper_sve_ldnf1dd_le_r },
4821 /* mte inactive, big-endian */
4822 { gen_helper_sve_ldnf1bb_r,
4823 gen_helper_sve_ldnf1bhu_r,
4824 gen_helper_sve_ldnf1bsu_r,
4825 gen_helper_sve_ldnf1bdu_r,
4827 gen_helper_sve_ldnf1sds_be_r,
4828 gen_helper_sve_ldnf1hh_be_r,
4829 gen_helper_sve_ldnf1hsu_be_r,
4830 gen_helper_sve_ldnf1hdu_be_r,
4832 gen_helper_sve_ldnf1hds_be_r,
4833 gen_helper_sve_ldnf1hss_be_r,
4834 gen_helper_sve_ldnf1ss_be_r,
4835 gen_helper_sve_ldnf1sdu_be_r,
4837 gen_helper_sve_ldnf1bds_r,
4838 gen_helper_sve_ldnf1bss_r,
4839 gen_helper_sve_ldnf1bhs_r,
4840 gen_helper_sve_ldnf1dd_be_r } },
4842 { /* mte inactive, little-endian */
4843 { gen_helper_sve_ldnf1bb_r_mte,
4844 gen_helper_sve_ldnf1bhu_r_mte,
4845 gen_helper_sve_ldnf1bsu_r_mte,
4846 gen_helper_sve_ldnf1bdu_r_mte,
4848 gen_helper_sve_ldnf1sds_le_r_mte,
4849 gen_helper_sve_ldnf1hh_le_r_mte,
4850 gen_helper_sve_ldnf1hsu_le_r_mte,
4851 gen_helper_sve_ldnf1hdu_le_r_mte,
4853 gen_helper_sve_ldnf1hds_le_r_mte,
4854 gen_helper_sve_ldnf1hss_le_r_mte,
4855 gen_helper_sve_ldnf1ss_le_r_mte,
4856 gen_helper_sve_ldnf1sdu_le_r_mte,
4858 gen_helper_sve_ldnf1bds_r_mte,
4859 gen_helper_sve_ldnf1bss_r_mte,
4860 gen_helper_sve_ldnf1bhs_r_mte,
4861 gen_helper_sve_ldnf1dd_le_r_mte },
4863 /* mte inactive, big-endian */
4864 { gen_helper_sve_ldnf1bb_r_mte,
4865 gen_helper_sve_ldnf1bhu_r_mte,
4866 gen_helper_sve_ldnf1bsu_r_mte,
4867 gen_helper_sve_ldnf1bdu_r_mte,
4869 gen_helper_sve_ldnf1sds_be_r_mte,
4870 gen_helper_sve_ldnf1hh_be_r_mte,
4871 gen_helper_sve_ldnf1hsu_be_r_mte,
4872 gen_helper_sve_ldnf1hdu_be_r_mte,
4874 gen_helper_sve_ldnf1hds_be_r_mte,
4875 gen_helper_sve_ldnf1hss_be_r_mte,
4876 gen_helper_sve_ldnf1ss_be_r_mte,
4877 gen_helper_sve_ldnf1sdu_be_r_mte,
4879 gen_helper_sve_ldnf1bds_r_mte,
4880 gen_helper_sve_ldnf1bss_r_mte,
4881 gen_helper_sve_ldnf1bhs_r_mte,
4882 gen_helper_sve_ldnf1dd_be_r_mte } },
4885 if (sve_access_check(s)) {
4886 int vsz = vec_full_reg_size(s);
4887 int elements = vsz >> dtype_esz[a->dtype];
4888 int off = (a->imm * elements) << dtype_msz(a->dtype);
4889 TCGv_i64 addr = new_tmp_a64(s);
4891 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4892 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4893 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
4895 return true;
4898 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4900 static gen_helper_gvec_mem * const fns[2][4] = {
4901 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_le_r,
4902 gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld1dd_le_r },
4903 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_be_r,
4904 gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld1dd_be_r },
4906 unsigned vsz = vec_full_reg_size(s);
4907 TCGv_ptr t_pg;
4908 TCGv_i32 t_desc;
4909 int desc, poff;
4911 /* Load the first quadword using the normal predicated load helpers. */
4912 desc = simd_desc(16, 16, zt);
4913 t_desc = tcg_const_i32(desc);
4915 poff = pred_full_reg_offset(s, pg);
4916 if (vsz > 16) {
4918 * Zero-extend the first 16 bits of the predicate into a temporary.
4919 * This avoids triggering an assert making sure we don't have bits
4920 * set within a predicate beyond VQ, but we have lowered VQ to 1
4921 * for this load operation.
4923 TCGv_i64 tmp = tcg_temp_new_i64();
4924 #ifdef HOST_WORDS_BIGENDIAN
4925 poff += 6;
4926 #endif
4927 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
4929 poff = offsetof(CPUARMState, vfp.preg_tmp);
4930 tcg_gen_st_i64(tmp, cpu_env, poff);
4931 tcg_temp_free_i64(tmp);
4934 t_pg = tcg_temp_new_ptr();
4935 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
4937 fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc);
4939 tcg_temp_free_ptr(t_pg);
4940 tcg_temp_free_i32(t_desc);
4942 /* Replicate that first quadword. */
4943 if (vsz > 16) {
4944 unsigned dofs = vec_full_reg_offset(s, zt);
4945 tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4949 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
4951 if (a->rm == 31) {
4952 return false;
4954 if (sve_access_check(s)) {
4955 int msz = dtype_msz(a->dtype);
4956 TCGv_i64 addr = new_tmp_a64(s);
4957 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4958 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4959 do_ldrq(s, a->rd, a->pg, addr, msz);
4961 return true;
4964 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
4966 if (sve_access_check(s)) {
4967 TCGv_i64 addr = new_tmp_a64(s);
4968 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4969 do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4971 return true;
4974 /* Load and broadcast element. */
4975 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
4977 unsigned vsz = vec_full_reg_size(s);
4978 unsigned psz = pred_full_reg_size(s);
4979 unsigned esz = dtype_esz[a->dtype];
4980 unsigned msz = dtype_msz(a->dtype);
4981 TCGLabel *over;
4982 TCGv_i64 temp, clean_addr;
4984 if (!sve_access_check(s)) {
4985 return true;
4988 over = gen_new_label();
4990 /* If the guarding predicate has no bits set, no load occurs. */
4991 if (psz <= 8) {
4992 /* Reduce the pred_esz_masks value simply to reduce the
4993 * size of the code generated here.
4995 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4996 temp = tcg_temp_new_i64();
4997 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4998 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4999 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5000 tcg_temp_free_i64(temp);
5001 } else {
5002 TCGv_i32 t32 = tcg_temp_new_i32();
5003 find_last_active(s, t32, esz, a->pg);
5004 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5005 tcg_temp_free_i32(t32);
5008 /* Load the data. */
5009 temp = tcg_temp_new_i64();
5010 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
5011 clean_addr = gen_mte_check1(s, temp, false, true, msz);
5013 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
5014 s->be_data | dtype_mop[a->dtype]);
5016 /* Broadcast to *all* elements. */
5017 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5018 vsz, vsz, temp);
5019 tcg_temp_free_i64(temp);
5021 /* Zero the inactive elements. */
5022 gen_set_label(over);
5023 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
5026 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5027 int msz, int esz, int nreg)
5029 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5030 { { { gen_helper_sve_st1bb_r,
5031 gen_helper_sve_st1bh_r,
5032 gen_helper_sve_st1bs_r,
5033 gen_helper_sve_st1bd_r },
5034 { NULL,
5035 gen_helper_sve_st1hh_le_r,
5036 gen_helper_sve_st1hs_le_r,
5037 gen_helper_sve_st1hd_le_r },
5038 { NULL, NULL,
5039 gen_helper_sve_st1ss_le_r,
5040 gen_helper_sve_st1sd_le_r },
5041 { NULL, NULL, NULL,
5042 gen_helper_sve_st1dd_le_r } },
5043 { { gen_helper_sve_st1bb_r,
5044 gen_helper_sve_st1bh_r,
5045 gen_helper_sve_st1bs_r,
5046 gen_helper_sve_st1bd_r },
5047 { NULL,
5048 gen_helper_sve_st1hh_be_r,
5049 gen_helper_sve_st1hs_be_r,
5050 gen_helper_sve_st1hd_be_r },
5051 { NULL, NULL,
5052 gen_helper_sve_st1ss_be_r,
5053 gen_helper_sve_st1sd_be_r },
5054 { NULL, NULL, NULL,
5055 gen_helper_sve_st1dd_be_r } } },
5057 { { { gen_helper_sve_st1bb_r_mte,
5058 gen_helper_sve_st1bh_r_mte,
5059 gen_helper_sve_st1bs_r_mte,
5060 gen_helper_sve_st1bd_r_mte },
5061 { NULL,
5062 gen_helper_sve_st1hh_le_r_mte,
5063 gen_helper_sve_st1hs_le_r_mte,
5064 gen_helper_sve_st1hd_le_r_mte },
5065 { NULL, NULL,
5066 gen_helper_sve_st1ss_le_r_mte,
5067 gen_helper_sve_st1sd_le_r_mte },
5068 { NULL, NULL, NULL,
5069 gen_helper_sve_st1dd_le_r_mte } },
5070 { { gen_helper_sve_st1bb_r_mte,
5071 gen_helper_sve_st1bh_r_mte,
5072 gen_helper_sve_st1bs_r_mte,
5073 gen_helper_sve_st1bd_r_mte },
5074 { NULL,
5075 gen_helper_sve_st1hh_be_r_mte,
5076 gen_helper_sve_st1hs_be_r_mte,
5077 gen_helper_sve_st1hd_be_r_mte },
5078 { NULL, NULL,
5079 gen_helper_sve_st1ss_be_r_mte,
5080 gen_helper_sve_st1sd_be_r_mte },
5081 { NULL, NULL, NULL,
5082 gen_helper_sve_st1dd_be_r_mte } } },
5084 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5085 { { { gen_helper_sve_st2bb_r,
5086 gen_helper_sve_st2hh_le_r,
5087 gen_helper_sve_st2ss_le_r,
5088 gen_helper_sve_st2dd_le_r },
5089 { gen_helper_sve_st3bb_r,
5090 gen_helper_sve_st3hh_le_r,
5091 gen_helper_sve_st3ss_le_r,
5092 gen_helper_sve_st3dd_le_r },
5093 { gen_helper_sve_st4bb_r,
5094 gen_helper_sve_st4hh_le_r,
5095 gen_helper_sve_st4ss_le_r,
5096 gen_helper_sve_st4dd_le_r } },
5097 { { gen_helper_sve_st2bb_r,
5098 gen_helper_sve_st2hh_be_r,
5099 gen_helper_sve_st2ss_be_r,
5100 gen_helper_sve_st2dd_be_r },
5101 { gen_helper_sve_st3bb_r,
5102 gen_helper_sve_st3hh_be_r,
5103 gen_helper_sve_st3ss_be_r,
5104 gen_helper_sve_st3dd_be_r },
5105 { gen_helper_sve_st4bb_r,
5106 gen_helper_sve_st4hh_be_r,
5107 gen_helper_sve_st4ss_be_r,
5108 gen_helper_sve_st4dd_be_r } } },
5109 { { { gen_helper_sve_st2bb_r_mte,
5110 gen_helper_sve_st2hh_le_r_mte,
5111 gen_helper_sve_st2ss_le_r_mte,
5112 gen_helper_sve_st2dd_le_r_mte },
5113 { gen_helper_sve_st3bb_r_mte,
5114 gen_helper_sve_st3hh_le_r_mte,
5115 gen_helper_sve_st3ss_le_r_mte,
5116 gen_helper_sve_st3dd_le_r_mte },
5117 { gen_helper_sve_st4bb_r_mte,
5118 gen_helper_sve_st4hh_le_r_mte,
5119 gen_helper_sve_st4ss_le_r_mte,
5120 gen_helper_sve_st4dd_le_r_mte } },
5121 { { gen_helper_sve_st2bb_r_mte,
5122 gen_helper_sve_st2hh_be_r_mte,
5123 gen_helper_sve_st2ss_be_r_mte,
5124 gen_helper_sve_st2dd_be_r_mte },
5125 { gen_helper_sve_st3bb_r_mte,
5126 gen_helper_sve_st3hh_be_r_mte,
5127 gen_helper_sve_st3ss_be_r_mte,
5128 gen_helper_sve_st3dd_be_r_mte },
5129 { gen_helper_sve_st4bb_r_mte,
5130 gen_helper_sve_st4hh_be_r_mte,
5131 gen_helper_sve_st4ss_be_r_mte,
5132 gen_helper_sve_st4dd_be_r_mte } } },
5134 gen_helper_gvec_mem *fn;
5135 int be = s->be_data == MO_BE;
5137 if (nreg == 0) {
5138 /* ST1 */
5139 fn = fn_single[s->mte_active[0]][be][msz][esz];
5140 nreg = 1;
5141 } else {
5142 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5143 assert(msz == esz);
5144 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
5146 assert(fn != NULL);
5147 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
5150 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
5152 if (a->rm == 31 || a->msz > a->esz) {
5153 return false;
5155 if (sve_access_check(s)) {
5156 TCGv_i64 addr = new_tmp_a64(s);
5157 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
5158 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5159 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5161 return true;
5164 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
5166 if (a->msz > a->esz) {
5167 return false;
5169 if (sve_access_check(s)) {
5170 int vsz = vec_full_reg_size(s);
5171 int elements = vsz >> a->esz;
5172 TCGv_i64 addr = new_tmp_a64(s);
5174 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5175 (a->imm * elements * (a->nreg + 1)) << a->msz);
5176 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5178 return true;
5182 *** SVE gather loads / scatter stores
5185 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
5186 int scale, TCGv_i64 scalar, int msz, bool is_write,
5187 gen_helper_gvec_mem_scatter *fn)
5189 unsigned vsz = vec_full_reg_size(s);
5190 TCGv_ptr t_zm = tcg_temp_new_ptr();
5191 TCGv_ptr t_pg = tcg_temp_new_ptr();
5192 TCGv_ptr t_zt = tcg_temp_new_ptr();
5193 TCGv_i32 t_desc;
5194 int desc = 0;
5196 if (s->mte_active[0]) {
5197 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5198 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5199 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5200 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
5201 desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz);
5202 desc <<= SVE_MTEDESC_SHIFT;
5204 desc = simd_desc(vsz, vsz, desc | scale);
5205 t_desc = tcg_const_i32(desc);
5207 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5208 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5209 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
5210 fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
5212 tcg_temp_free_ptr(t_zt);
5213 tcg_temp_free_ptr(t_zm);
5214 tcg_temp_free_ptr(t_pg);
5215 tcg_temp_free_i32(t_desc);
5218 /* Indexed by [mte][be][ff][xs][u][msz]. */
5219 static gen_helper_gvec_mem_scatter * const
5220 gather_load_fn32[2][2][2][2][2][3] = {
5221 { /* MTE Inactive */
5222 { /* Little-endian */
5223 { { { gen_helper_sve_ldbss_zsu,
5224 gen_helper_sve_ldhss_le_zsu,
5225 NULL, },
5226 { gen_helper_sve_ldbsu_zsu,
5227 gen_helper_sve_ldhsu_le_zsu,
5228 gen_helper_sve_ldss_le_zsu, } },
5229 { { gen_helper_sve_ldbss_zss,
5230 gen_helper_sve_ldhss_le_zss,
5231 NULL, },
5232 { gen_helper_sve_ldbsu_zss,
5233 gen_helper_sve_ldhsu_le_zss,
5234 gen_helper_sve_ldss_le_zss, } } },
5236 /* First-fault */
5237 { { { gen_helper_sve_ldffbss_zsu,
5238 gen_helper_sve_ldffhss_le_zsu,
5239 NULL, },
5240 { gen_helper_sve_ldffbsu_zsu,
5241 gen_helper_sve_ldffhsu_le_zsu,
5242 gen_helper_sve_ldffss_le_zsu, } },
5243 { { gen_helper_sve_ldffbss_zss,
5244 gen_helper_sve_ldffhss_le_zss,
5245 NULL, },
5246 { gen_helper_sve_ldffbsu_zss,
5247 gen_helper_sve_ldffhsu_le_zss,
5248 gen_helper_sve_ldffss_le_zss, } } } },
5250 { /* Big-endian */
5251 { { { gen_helper_sve_ldbss_zsu,
5252 gen_helper_sve_ldhss_be_zsu,
5253 NULL, },
5254 { gen_helper_sve_ldbsu_zsu,
5255 gen_helper_sve_ldhsu_be_zsu,
5256 gen_helper_sve_ldss_be_zsu, } },
5257 { { gen_helper_sve_ldbss_zss,
5258 gen_helper_sve_ldhss_be_zss,
5259 NULL, },
5260 { gen_helper_sve_ldbsu_zss,
5261 gen_helper_sve_ldhsu_be_zss,
5262 gen_helper_sve_ldss_be_zss, } } },
5264 /* First-fault */
5265 { { { gen_helper_sve_ldffbss_zsu,
5266 gen_helper_sve_ldffhss_be_zsu,
5267 NULL, },
5268 { gen_helper_sve_ldffbsu_zsu,
5269 gen_helper_sve_ldffhsu_be_zsu,
5270 gen_helper_sve_ldffss_be_zsu, } },
5271 { { gen_helper_sve_ldffbss_zss,
5272 gen_helper_sve_ldffhss_be_zss,
5273 NULL, },
5274 { gen_helper_sve_ldffbsu_zss,
5275 gen_helper_sve_ldffhsu_be_zss,
5276 gen_helper_sve_ldffss_be_zss, } } } } },
5277 { /* MTE Active */
5278 { /* Little-endian */
5279 { { { gen_helper_sve_ldbss_zsu_mte,
5280 gen_helper_sve_ldhss_le_zsu_mte,
5281 NULL, },
5282 { gen_helper_sve_ldbsu_zsu_mte,
5283 gen_helper_sve_ldhsu_le_zsu_mte,
5284 gen_helper_sve_ldss_le_zsu_mte, } },
5285 { { gen_helper_sve_ldbss_zss_mte,
5286 gen_helper_sve_ldhss_le_zss_mte,
5287 NULL, },
5288 { gen_helper_sve_ldbsu_zss_mte,
5289 gen_helper_sve_ldhsu_le_zss_mte,
5290 gen_helper_sve_ldss_le_zss_mte, } } },
5292 /* First-fault */
5293 { { { gen_helper_sve_ldffbss_zsu_mte,
5294 gen_helper_sve_ldffhss_le_zsu_mte,
5295 NULL, },
5296 { gen_helper_sve_ldffbsu_zsu_mte,
5297 gen_helper_sve_ldffhsu_le_zsu_mte,
5298 gen_helper_sve_ldffss_le_zsu_mte, } },
5299 { { gen_helper_sve_ldffbss_zss_mte,
5300 gen_helper_sve_ldffhss_le_zss_mte,
5301 NULL, },
5302 { gen_helper_sve_ldffbsu_zss_mte,
5303 gen_helper_sve_ldffhsu_le_zss_mte,
5304 gen_helper_sve_ldffss_le_zss_mte, } } } },
5306 { /* Big-endian */
5307 { { { gen_helper_sve_ldbss_zsu_mte,
5308 gen_helper_sve_ldhss_be_zsu_mte,
5309 NULL, },
5310 { gen_helper_sve_ldbsu_zsu_mte,
5311 gen_helper_sve_ldhsu_be_zsu_mte,
5312 gen_helper_sve_ldss_be_zsu_mte, } },
5313 { { gen_helper_sve_ldbss_zss_mte,
5314 gen_helper_sve_ldhss_be_zss_mte,
5315 NULL, },
5316 { gen_helper_sve_ldbsu_zss_mte,
5317 gen_helper_sve_ldhsu_be_zss_mte,
5318 gen_helper_sve_ldss_be_zss_mte, } } },
5320 /* First-fault */
5321 { { { gen_helper_sve_ldffbss_zsu_mte,
5322 gen_helper_sve_ldffhss_be_zsu_mte,
5323 NULL, },
5324 { gen_helper_sve_ldffbsu_zsu_mte,
5325 gen_helper_sve_ldffhsu_be_zsu_mte,
5326 gen_helper_sve_ldffss_be_zsu_mte, } },
5327 { { gen_helper_sve_ldffbss_zss_mte,
5328 gen_helper_sve_ldffhss_be_zss_mte,
5329 NULL, },
5330 { gen_helper_sve_ldffbsu_zss_mte,
5331 gen_helper_sve_ldffhsu_be_zss_mte,
5332 gen_helper_sve_ldffss_be_zss_mte, } } } } },
5335 /* Note that we overload xs=2 to indicate 64-bit offset. */
5336 static gen_helper_gvec_mem_scatter * const
5337 gather_load_fn64[2][2][2][3][2][4] = {
5338 { /* MTE Inactive */
5339 { /* Little-endian */
5340 { { { gen_helper_sve_ldbds_zsu,
5341 gen_helper_sve_ldhds_le_zsu,
5342 gen_helper_sve_ldsds_le_zsu,
5343 NULL, },
5344 { gen_helper_sve_ldbdu_zsu,
5345 gen_helper_sve_ldhdu_le_zsu,
5346 gen_helper_sve_ldsdu_le_zsu,
5347 gen_helper_sve_lddd_le_zsu, } },
5348 { { gen_helper_sve_ldbds_zss,
5349 gen_helper_sve_ldhds_le_zss,
5350 gen_helper_sve_ldsds_le_zss,
5351 NULL, },
5352 { gen_helper_sve_ldbdu_zss,
5353 gen_helper_sve_ldhdu_le_zss,
5354 gen_helper_sve_ldsdu_le_zss,
5355 gen_helper_sve_lddd_le_zss, } },
5356 { { gen_helper_sve_ldbds_zd,
5357 gen_helper_sve_ldhds_le_zd,
5358 gen_helper_sve_ldsds_le_zd,
5359 NULL, },
5360 { gen_helper_sve_ldbdu_zd,
5361 gen_helper_sve_ldhdu_le_zd,
5362 gen_helper_sve_ldsdu_le_zd,
5363 gen_helper_sve_lddd_le_zd, } } },
5365 /* First-fault */
5366 { { { gen_helper_sve_ldffbds_zsu,
5367 gen_helper_sve_ldffhds_le_zsu,
5368 gen_helper_sve_ldffsds_le_zsu,
5369 NULL, },
5370 { gen_helper_sve_ldffbdu_zsu,
5371 gen_helper_sve_ldffhdu_le_zsu,
5372 gen_helper_sve_ldffsdu_le_zsu,
5373 gen_helper_sve_ldffdd_le_zsu, } },
5374 { { gen_helper_sve_ldffbds_zss,
5375 gen_helper_sve_ldffhds_le_zss,
5376 gen_helper_sve_ldffsds_le_zss,
5377 NULL, },
5378 { gen_helper_sve_ldffbdu_zss,
5379 gen_helper_sve_ldffhdu_le_zss,
5380 gen_helper_sve_ldffsdu_le_zss,
5381 gen_helper_sve_ldffdd_le_zss, } },
5382 { { gen_helper_sve_ldffbds_zd,
5383 gen_helper_sve_ldffhds_le_zd,
5384 gen_helper_sve_ldffsds_le_zd,
5385 NULL, },
5386 { gen_helper_sve_ldffbdu_zd,
5387 gen_helper_sve_ldffhdu_le_zd,
5388 gen_helper_sve_ldffsdu_le_zd,
5389 gen_helper_sve_ldffdd_le_zd, } } } },
5390 { /* Big-endian */
5391 { { { gen_helper_sve_ldbds_zsu,
5392 gen_helper_sve_ldhds_be_zsu,
5393 gen_helper_sve_ldsds_be_zsu,
5394 NULL, },
5395 { gen_helper_sve_ldbdu_zsu,
5396 gen_helper_sve_ldhdu_be_zsu,
5397 gen_helper_sve_ldsdu_be_zsu,
5398 gen_helper_sve_lddd_be_zsu, } },
5399 { { gen_helper_sve_ldbds_zss,
5400 gen_helper_sve_ldhds_be_zss,
5401 gen_helper_sve_ldsds_be_zss,
5402 NULL, },
5403 { gen_helper_sve_ldbdu_zss,
5404 gen_helper_sve_ldhdu_be_zss,
5405 gen_helper_sve_ldsdu_be_zss,
5406 gen_helper_sve_lddd_be_zss, } },
5407 { { gen_helper_sve_ldbds_zd,
5408 gen_helper_sve_ldhds_be_zd,
5409 gen_helper_sve_ldsds_be_zd,
5410 NULL, },
5411 { gen_helper_sve_ldbdu_zd,
5412 gen_helper_sve_ldhdu_be_zd,
5413 gen_helper_sve_ldsdu_be_zd,
5414 gen_helper_sve_lddd_be_zd, } } },
5416 /* First-fault */
5417 { { { gen_helper_sve_ldffbds_zsu,
5418 gen_helper_sve_ldffhds_be_zsu,
5419 gen_helper_sve_ldffsds_be_zsu,
5420 NULL, },
5421 { gen_helper_sve_ldffbdu_zsu,
5422 gen_helper_sve_ldffhdu_be_zsu,
5423 gen_helper_sve_ldffsdu_be_zsu,
5424 gen_helper_sve_ldffdd_be_zsu, } },
5425 { { gen_helper_sve_ldffbds_zss,
5426 gen_helper_sve_ldffhds_be_zss,
5427 gen_helper_sve_ldffsds_be_zss,
5428 NULL, },
5429 { gen_helper_sve_ldffbdu_zss,
5430 gen_helper_sve_ldffhdu_be_zss,
5431 gen_helper_sve_ldffsdu_be_zss,
5432 gen_helper_sve_ldffdd_be_zss, } },
5433 { { gen_helper_sve_ldffbds_zd,
5434 gen_helper_sve_ldffhds_be_zd,
5435 gen_helper_sve_ldffsds_be_zd,
5436 NULL, },
5437 { gen_helper_sve_ldffbdu_zd,
5438 gen_helper_sve_ldffhdu_be_zd,
5439 gen_helper_sve_ldffsdu_be_zd,
5440 gen_helper_sve_ldffdd_be_zd, } } } } },
5441 { /* MTE Active */
5442 { /* Little-endian */
5443 { { { gen_helper_sve_ldbds_zsu_mte,
5444 gen_helper_sve_ldhds_le_zsu_mte,
5445 gen_helper_sve_ldsds_le_zsu_mte,
5446 NULL, },
5447 { gen_helper_sve_ldbdu_zsu_mte,
5448 gen_helper_sve_ldhdu_le_zsu_mte,
5449 gen_helper_sve_ldsdu_le_zsu_mte,
5450 gen_helper_sve_lddd_le_zsu_mte, } },
5451 { { gen_helper_sve_ldbds_zss_mte,
5452 gen_helper_sve_ldhds_le_zss_mte,
5453 gen_helper_sve_ldsds_le_zss_mte,
5454 NULL, },
5455 { gen_helper_sve_ldbdu_zss_mte,
5456 gen_helper_sve_ldhdu_le_zss_mte,
5457 gen_helper_sve_ldsdu_le_zss_mte,
5458 gen_helper_sve_lddd_le_zss_mte, } },
5459 { { gen_helper_sve_ldbds_zd_mte,
5460 gen_helper_sve_ldhds_le_zd_mte,
5461 gen_helper_sve_ldsds_le_zd_mte,
5462 NULL, },
5463 { gen_helper_sve_ldbdu_zd_mte,
5464 gen_helper_sve_ldhdu_le_zd_mte,
5465 gen_helper_sve_ldsdu_le_zd_mte,
5466 gen_helper_sve_lddd_le_zd_mte, } } },
5468 /* First-fault */
5469 { { { gen_helper_sve_ldffbds_zsu_mte,
5470 gen_helper_sve_ldffhds_le_zsu_mte,
5471 gen_helper_sve_ldffsds_le_zsu_mte,
5472 NULL, },
5473 { gen_helper_sve_ldffbdu_zsu_mte,
5474 gen_helper_sve_ldffhdu_le_zsu_mte,
5475 gen_helper_sve_ldffsdu_le_zsu_mte,
5476 gen_helper_sve_ldffdd_le_zsu_mte, } },
5477 { { gen_helper_sve_ldffbds_zss_mte,
5478 gen_helper_sve_ldffhds_le_zss_mte,
5479 gen_helper_sve_ldffsds_le_zss_mte,
5480 NULL, },
5481 { gen_helper_sve_ldffbdu_zss_mte,
5482 gen_helper_sve_ldffhdu_le_zss_mte,
5483 gen_helper_sve_ldffsdu_le_zss_mte,
5484 gen_helper_sve_ldffdd_le_zss_mte, } },
5485 { { gen_helper_sve_ldffbds_zd_mte,
5486 gen_helper_sve_ldffhds_le_zd_mte,
5487 gen_helper_sve_ldffsds_le_zd_mte,
5488 NULL, },
5489 { gen_helper_sve_ldffbdu_zd_mte,
5490 gen_helper_sve_ldffhdu_le_zd_mte,
5491 gen_helper_sve_ldffsdu_le_zd_mte,
5492 gen_helper_sve_ldffdd_le_zd_mte, } } } },
5493 { /* Big-endian */
5494 { { { gen_helper_sve_ldbds_zsu_mte,
5495 gen_helper_sve_ldhds_be_zsu_mte,
5496 gen_helper_sve_ldsds_be_zsu_mte,
5497 NULL, },
5498 { gen_helper_sve_ldbdu_zsu_mte,
5499 gen_helper_sve_ldhdu_be_zsu_mte,
5500 gen_helper_sve_ldsdu_be_zsu_mte,
5501 gen_helper_sve_lddd_be_zsu_mte, } },
5502 { { gen_helper_sve_ldbds_zss_mte,
5503 gen_helper_sve_ldhds_be_zss_mte,
5504 gen_helper_sve_ldsds_be_zss_mte,
5505 NULL, },
5506 { gen_helper_sve_ldbdu_zss_mte,
5507 gen_helper_sve_ldhdu_be_zss_mte,
5508 gen_helper_sve_ldsdu_be_zss_mte,
5509 gen_helper_sve_lddd_be_zss_mte, } },
5510 { { gen_helper_sve_ldbds_zd_mte,
5511 gen_helper_sve_ldhds_be_zd_mte,
5512 gen_helper_sve_ldsds_be_zd_mte,
5513 NULL, },
5514 { gen_helper_sve_ldbdu_zd_mte,
5515 gen_helper_sve_ldhdu_be_zd_mte,
5516 gen_helper_sve_ldsdu_be_zd_mte,
5517 gen_helper_sve_lddd_be_zd_mte, } } },
5519 /* First-fault */
5520 { { { gen_helper_sve_ldffbds_zsu_mte,
5521 gen_helper_sve_ldffhds_be_zsu_mte,
5522 gen_helper_sve_ldffsds_be_zsu_mte,
5523 NULL, },
5524 { gen_helper_sve_ldffbdu_zsu_mte,
5525 gen_helper_sve_ldffhdu_be_zsu_mte,
5526 gen_helper_sve_ldffsdu_be_zsu_mte,
5527 gen_helper_sve_ldffdd_be_zsu_mte, } },
5528 { { gen_helper_sve_ldffbds_zss_mte,
5529 gen_helper_sve_ldffhds_be_zss_mte,
5530 gen_helper_sve_ldffsds_be_zss_mte,
5531 NULL, },
5532 { gen_helper_sve_ldffbdu_zss_mte,
5533 gen_helper_sve_ldffhdu_be_zss_mte,
5534 gen_helper_sve_ldffsdu_be_zss_mte,
5535 gen_helper_sve_ldffdd_be_zss_mte, } },
5536 { { gen_helper_sve_ldffbds_zd_mte,
5537 gen_helper_sve_ldffhds_be_zd_mte,
5538 gen_helper_sve_ldffsds_be_zd_mte,
5539 NULL, },
5540 { gen_helper_sve_ldffbdu_zd_mte,
5541 gen_helper_sve_ldffhdu_be_zd_mte,
5542 gen_helper_sve_ldffsdu_be_zd_mte,
5543 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
5546 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
5548 gen_helper_gvec_mem_scatter *fn = NULL;
5549 bool be = s->be_data == MO_BE;
5550 bool mte = s->mte_active[0];
5552 if (!sve_access_check(s)) {
5553 return true;
5556 switch (a->esz) {
5557 case MO_32:
5558 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
5559 break;
5560 case MO_64:
5561 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
5562 break;
5564 assert(fn != NULL);
5566 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5567 cpu_reg_sp(s, a->rn), a->msz, false, fn);
5568 return true;
5571 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
5573 gen_helper_gvec_mem_scatter *fn = NULL;
5574 bool be = s->be_data == MO_BE;
5575 bool mte = s->mte_active[0];
5576 TCGv_i64 imm;
5578 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5579 return false;
5581 if (!sve_access_check(s)) {
5582 return true;
5585 switch (a->esz) {
5586 case MO_32:
5587 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
5588 break;
5589 case MO_64:
5590 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
5591 break;
5593 assert(fn != NULL);
5595 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5596 * by loading the immediate into the scalar parameter.
5598 imm = tcg_const_i64(a->imm << a->msz);
5599 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, false, fn);
5600 tcg_temp_free_i64(imm);
5601 return true;
5604 /* Indexed by [mte][be][xs][msz]. */
5605 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
5606 { /* MTE Inactive */
5607 { /* Little-endian */
5608 { gen_helper_sve_stbs_zsu,
5609 gen_helper_sve_sths_le_zsu,
5610 gen_helper_sve_stss_le_zsu, },
5611 { gen_helper_sve_stbs_zss,
5612 gen_helper_sve_sths_le_zss,
5613 gen_helper_sve_stss_le_zss, } },
5614 { /* Big-endian */
5615 { gen_helper_sve_stbs_zsu,
5616 gen_helper_sve_sths_be_zsu,
5617 gen_helper_sve_stss_be_zsu, },
5618 { gen_helper_sve_stbs_zss,
5619 gen_helper_sve_sths_be_zss,
5620 gen_helper_sve_stss_be_zss, } } },
5621 { /* MTE Active */
5622 { /* Little-endian */
5623 { gen_helper_sve_stbs_zsu_mte,
5624 gen_helper_sve_sths_le_zsu_mte,
5625 gen_helper_sve_stss_le_zsu_mte, },
5626 { gen_helper_sve_stbs_zss_mte,
5627 gen_helper_sve_sths_le_zss_mte,
5628 gen_helper_sve_stss_le_zss_mte, } },
5629 { /* Big-endian */
5630 { gen_helper_sve_stbs_zsu_mte,
5631 gen_helper_sve_sths_be_zsu_mte,
5632 gen_helper_sve_stss_be_zsu_mte, },
5633 { gen_helper_sve_stbs_zss_mte,
5634 gen_helper_sve_sths_be_zss_mte,
5635 gen_helper_sve_stss_be_zss_mte, } } },
5638 /* Note that we overload xs=2 to indicate 64-bit offset. */
5639 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
5640 { /* MTE Inactive */
5641 { /* Little-endian */
5642 { gen_helper_sve_stbd_zsu,
5643 gen_helper_sve_sthd_le_zsu,
5644 gen_helper_sve_stsd_le_zsu,
5645 gen_helper_sve_stdd_le_zsu, },
5646 { gen_helper_sve_stbd_zss,
5647 gen_helper_sve_sthd_le_zss,
5648 gen_helper_sve_stsd_le_zss,
5649 gen_helper_sve_stdd_le_zss, },
5650 { gen_helper_sve_stbd_zd,
5651 gen_helper_sve_sthd_le_zd,
5652 gen_helper_sve_stsd_le_zd,
5653 gen_helper_sve_stdd_le_zd, } },
5654 { /* Big-endian */
5655 { gen_helper_sve_stbd_zsu,
5656 gen_helper_sve_sthd_be_zsu,
5657 gen_helper_sve_stsd_be_zsu,
5658 gen_helper_sve_stdd_be_zsu, },
5659 { gen_helper_sve_stbd_zss,
5660 gen_helper_sve_sthd_be_zss,
5661 gen_helper_sve_stsd_be_zss,
5662 gen_helper_sve_stdd_be_zss, },
5663 { gen_helper_sve_stbd_zd,
5664 gen_helper_sve_sthd_be_zd,
5665 gen_helper_sve_stsd_be_zd,
5666 gen_helper_sve_stdd_be_zd, } } },
5667 { /* MTE Inactive */
5668 { /* Little-endian */
5669 { gen_helper_sve_stbd_zsu_mte,
5670 gen_helper_sve_sthd_le_zsu_mte,
5671 gen_helper_sve_stsd_le_zsu_mte,
5672 gen_helper_sve_stdd_le_zsu_mte, },
5673 { gen_helper_sve_stbd_zss_mte,
5674 gen_helper_sve_sthd_le_zss_mte,
5675 gen_helper_sve_stsd_le_zss_mte,
5676 gen_helper_sve_stdd_le_zss_mte, },
5677 { gen_helper_sve_stbd_zd_mte,
5678 gen_helper_sve_sthd_le_zd_mte,
5679 gen_helper_sve_stsd_le_zd_mte,
5680 gen_helper_sve_stdd_le_zd_mte, } },
5681 { /* Big-endian */
5682 { gen_helper_sve_stbd_zsu_mte,
5683 gen_helper_sve_sthd_be_zsu_mte,
5684 gen_helper_sve_stsd_be_zsu_mte,
5685 gen_helper_sve_stdd_be_zsu_mte, },
5686 { gen_helper_sve_stbd_zss_mte,
5687 gen_helper_sve_sthd_be_zss_mte,
5688 gen_helper_sve_stsd_be_zss_mte,
5689 gen_helper_sve_stdd_be_zss_mte, },
5690 { gen_helper_sve_stbd_zd_mte,
5691 gen_helper_sve_sthd_be_zd_mte,
5692 gen_helper_sve_stsd_be_zd_mte,
5693 gen_helper_sve_stdd_be_zd_mte, } } },
5696 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
5698 gen_helper_gvec_mem_scatter *fn;
5699 bool be = s->be_data == MO_BE;
5700 bool mte = s->mte_active[0];
5702 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5703 return false;
5705 if (!sve_access_check(s)) {
5706 return true;
5708 switch (a->esz) {
5709 case MO_32:
5710 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
5711 break;
5712 case MO_64:
5713 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
5714 break;
5715 default:
5716 g_assert_not_reached();
5718 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5719 cpu_reg_sp(s, a->rn), a->msz, true, fn);
5720 return true;
5723 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
5725 gen_helper_gvec_mem_scatter *fn = NULL;
5726 bool be = s->be_data == MO_BE;
5727 bool mte = s->mte_active[0];
5728 TCGv_i64 imm;
5730 if (a->esz < a->msz) {
5731 return false;
5733 if (!sve_access_check(s)) {
5734 return true;
5737 switch (a->esz) {
5738 case MO_32:
5739 fn = scatter_store_fn32[mte][be][0][a->msz];
5740 break;
5741 case MO_64:
5742 fn = scatter_store_fn64[mte][be][2][a->msz];
5743 break;
5745 assert(fn != NULL);
5747 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5748 * by loading the immediate into the scalar parameter.
5750 imm = tcg_const_i64(a->imm << a->msz);
5751 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, true, fn);
5752 tcg_temp_free_i64(imm);
5753 return true;
5757 * Prefetches
5760 static bool trans_PRF(DisasContext *s, arg_PRF *a)
5762 /* Prefetch is a nop within QEMU. */
5763 (void)sve_access_check(s);
5764 return true;
5767 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
5769 if (a->rm == 31) {
5770 return false;
5772 /* Prefetch is a nop within QEMU. */
5773 (void)sve_access_check(s);
5774 return true;
5778 * Move Prefix
5780 * TODO: The implementation so far could handle predicated merging movprfx.
5781 * The helper functions as written take an extra source register to
5782 * use in the operation, but the result is only written when predication
5783 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
5784 * to allow the final write back to the destination to be unconditional.
5785 * For predicated zeroing movprfx, we need to rearrange the helpers to
5786 * allow the final write back to zero inactives.
5788 * In the meantime, just emit the moves.
5791 static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
5793 return do_mov_z(s, a->rd, a->rn);
5796 static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
5798 if (sve_access_check(s)) {
5799 do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
5801 return true;
5804 static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
5806 return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);